diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/compproto/compproto_ut.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/compproto/compproto_ut.cpp')
-rw-r--r-- | library/cpp/compproto/compproto_ut.cpp | 543 |
1 files changed, 543 insertions, 0 deletions
diff --git a/library/cpp/compproto/compproto_ut.cpp b/library/cpp/compproto/compproto_ut.cpp new file mode 100644 index 00000000000..9393be967a3 --- /dev/null +++ b/library/cpp/compproto/compproto_ut.cpp @@ -0,0 +1,543 @@ +#include "huff.h" +#include "metainfo.h" +#include "bit.h" + +#include <util/generic/vector.h> +#include <util/generic/map.h> +#include <util/system/protect.h> + +#include <library/cpp/testing/unittest/registar.h> + +static ui64 gSeed = 42; + +static void FlushPseudoRandom() { + gSeed = 42; +} + +static ui32 PseudoRandom(ui32 max) { + // stupid and non-threadsafe, but very predictable chaos generator + gSeed += 1; + gSeed *= 419; + gSeed = gSeed ^ (ui64(max) << 17); + return gSeed % max; +}; + +enum ECompMode { + CM_SINGLEPASS, + CM_TWOPASS +}; + +struct TTestParams { + size_t DataSize; + ui32 ValueArraySize; +}; + +template <typename X> +void TestSaveLoadMeta(NCompProto::TMetaInfo<X>& src) { + TStringStream ss; + src.Save(ss); + TString data = ss.Str(); + NCompProto::TMetaInfo<X> loadedMeta(data); + ss = TStringStream(); + loadedMeta.Save(ss); + UNIT_ASSERT_EQUAL(ss.Str(), data); +} + +template <typename TDecompressor, template <typename, typename> class TSerialize> +void TestWithParams(const TString& metainfo, const ECompMode mode, const TTestParams& params) { + using namespace NCompProto; + FlushPseudoRandom(); + + TStringInput stream(metainfo); + + THolder<TMetaInfo<THuff>> meta; + if (mode == CM_TWOPASS) { + TMetaInfo<THist> hist(stream); + TEmpty empty; + TSerialize<THist, TEmpty>::Serialize(hist, empty, params); + meta.Reset(new TMetaInfo<THuff>(hist, THistToHuff::Instance())); + } else { + meta.Reset(new TMetaInfo<THuff>(stream)); + } + TestSaveLoadMeta(*meta.Get()); + + TBitBuffer buffer; + TSerialize<THuff, TBitBuffer>::Serialize(*meta, buffer, params); + + ui64 codedSize = buffer.Position; + + TMetaInfo<TTable> decompressor(*meta, THuffToTable::Instance()); + + // verify that no memory read beyond buffer occurs + const size_t byteSize = buffer.ByteLength(); + const size_t PAGESIZEX = 4096; + const size_t busyPages = (byteSize + (PAGESIZEX - 1)) / PAGESIZEX; + const size_t allPages = busyPages + 1; + const size_t allocSize = (allPages + 1) * PAGESIZEX; + TVector<ui8> readBuffer(allocSize); + ui8* start = &readBuffer[0]; + ui8* pageStart = reinterpret_cast<ui8*>((size_t(start) + PAGESIZEX) & ~(PAGESIZEX - 1)); + // XX DATA DATA DATA DATA PROT + // | | | | | pages + // calculate dataStart so that data ends exactly at the page end + ui8* dataStart = pageStart + busyPages * PAGESIZEX - byteSize; + ui8* dataEnd = pageStart + busyPages * PAGESIZEX; + ProtectMemory(dataEnd, PAGESIZEX, PM_NONE); + // memory copying should be performed without any problems + memcpy(dataStart, buffer.Out.data(), byteSize); + + ui64 position = 0; + TMetaIterator<TDecompressor> instance; + // we should not read beyond dataEnd here + instance.Decompress(&decompressor, dataStart, position); + const ui64 decodedSize = position; + UNIT_ASSERT_EQUAL(codedSize, decodedSize); + // unprotect memory + ProtectMemory(dataEnd, PAGESIZEX, PM_READ | PM_WRITE | PM_EXEC); +} + +template <typename TDecompressor, template <typename, typename> class TSerialize> +void Test(const TString& metainfo, const ECompMode mode) { + for (size_t ds = 3; ds < 42; ds += (3 + PseudoRandom(5))) { + for (size_t vas = 5; vas < 42; vas += (4 + PseudoRandom(10))) { + TTestParams params; + params.DataSize = ds; + params.ValueArraySize = vas; + TestWithParams<TDecompressor, TSerialize>(metainfo, mode, params); + } + } +} + +Y_UNIT_TEST_SUITE(CompProtoTestBasic) { + using namespace NCompProto; + + const TString metainfo = + "\n\ + repeated data id 0\n\ + scalar clicks id 0 default const 0\n\ + scalar shows id 1 default const 0\n\ + repeated regClicks id 2\n\ + scalar clicks id 0 default const 0\n\ + scalar shows id 1 default const 0\n\ + end\n\ + scalar extra id 31 default const 0\n\ + end\n"; + + struct TRegInfo { + ui32 Clicks; + ui32 Shows; + }; + + struct TData { + ui32 Clicks; + ui32 Shows; + ui32 Extra; + TMap<ui32, TRegInfo> RegClicks; + }; + + TVector<TData> data; + + template <class TMeta, class TFunctor> + struct TSerialize { + static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) { + FlushPseudoRandom(); + meta.BeginSelf(functor); + data.clear(); + data.resize(params.DataSize); + for (ui32 i = 0; i < params.DataSize; ++i) { + meta.BeginElement(i, functor); + + data[i].Clicks = PseudoRandom(16) + 100; + data[i].Shows = PseudoRandom(500) * PseudoRandom(16); + data[i].Extra = PseudoRandom(500) + (1UL << 31); // test also saving of big values + meta.SetScalar(0, data[i].Clicks, functor); + meta.SetScalar(1, data[i].Shows, functor); + + TMetaInfo<TMeta>& regClicks = meta.BeginRepeated(2, functor); + for (ui32 j = 0; j < PseudoRandom(200); j += 1 + PseudoRandom(10)) { + regClicks.BeginElement(j, functor); + TRegInfo& r = data[i].RegClicks[j]; + + r.Clicks = PseudoRandom(2); + r.Shows = PseudoRandom(800) * PseudoRandom(8) + 56; + regClicks.SetScalar(0, r.Clicks, functor); + regClicks.SetScalar(1, r.Shows, functor); + + regClicks.EndElement(functor); + } + regClicks.EndRepeated(functor); + meta.SetScalar(31, data[i].Extra, functor); + meta.EndElement(functor); + } + meta.EndRepeated(functor); + } + }; + + struct TMultiDecompressor: public TParentHold<TMultiDecompressor> { + struct TRegClicks: public TParentHold<TRegClicks> { + const TData* Data; + const TRegInfo* Elem; + TRegClicks() + : Data(nullptr) + , Elem(nullptr) + { + } + void BeginSelf(ui32 /*count*/, ui32 /*id*/) { + } + void EndSelf() { + } + void BeginElement(ui32 element) { + TMap<ui32, TRegInfo>::const_iterator it = Data->RegClicks.find(element); + if (it == Data->RegClicks.end()) { + UNIT_ASSERT(0); + } + Elem = &it->second; + } + void EndElement() { + } + void SetScalar(size_t index, ui32 val) { + if (index == 0) + UNIT_ASSERT_EQUAL(val, Elem->Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, Elem->Shows); + } + IDecompressor& GetDecompressor(size_t) { + UNIT_ASSERT(0); + return GetEmptyDecompressor(); + } + }; + + const TData* Elem; + TMetaIterator<TRegClicks> RegClicks; + void BeginSelf(ui32 /*count*/, ui32 /*id*/) { + } + void EndSelf() { + } + void BeginElement(ui32 element) { + UNIT_ASSERT(element < data.size()); + Elem = &data[element]; + } + void EndElement() { + } + void SetScalar(size_t index, ui32 val) { + if (index == 0) + UNIT_ASSERT_EQUAL(val, Elem->Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, Elem->Shows); + if (index == 31) + UNIT_ASSERT_EQUAL(val, Elem->Extra); + } + IDecompressor& GetDecompressor(size_t index) { + if (index == 2) { + RegClicks.Self.Data = Elem; + return RegClicks; + } + UNIT_ASSERT(0); + return GetEmptyDecompressor(); + } + TMultiDecompressor() + : Elem(nullptr) + { + } + }; + + struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> { + enum EState { + Startstop, + OutDataElem, + InDataElem, + InRegClicks, + }; + EState State; + + ui32 DataInd; + TMap<ui32, TRegInfo>::iterator RegIter; + + TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) { + Y_UNUSED(index); + return *Parent; + } + + TVerifyingDecompressor() + : State(Startstop) + , DataInd(0) + { + } + void BeginSelf(ui32 /*count*/, ui32 id) { + switch (State) { + case Startstop: + UNIT_ASSERT_EQUAL(id, 0); + State = OutDataElem; + break; + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + UNIT_ASSERT_EQUAL(id, 2); + State = InRegClicks; + RegIter = data[DataInd].RegClicks.begin(); + break; + case InRegClicks: + UNIT_ASSERT(0); + default: + UNIT_ASSERT(0); + } + } + void EndSelf() { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + State = Startstop; + break; + case InDataElem: + UNIT_ASSERT(0); + case InRegClicks: + UNIT_ASSERT_EQUAL(RegIter, data[DataInd].RegClicks.end()); + State = InDataElem; + break; + default: + UNIT_ASSERT(0); + } + } + void BeginElement(ui32 element) { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + UNIT_ASSERT(element < data.size()); + State = InDataElem; + break; + case InDataElem: + UNIT_ASSERT(0); + case InRegClicks: + UNIT_ASSERT_EQUAL(element, RegIter->first); + break; + } + } + void EndElement() { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + State = OutDataElem; + ++DataInd; + break; + case InRegClicks: + ++RegIter; + break; + } + } + + void SetScalar(size_t index, ui32 val) { + switch (State) { + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + if (index == 0) + UNIT_ASSERT_EQUAL(val, data[DataInd].Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, data[DataInd].Shows); + if (index == 31) + UNIT_ASSERT_EQUAL(val, data[DataInd].Extra); + break; + case InRegClicks: + if (index == 0) + UNIT_ASSERT_EQUAL(val, RegIter->second.Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, RegIter->second.Shows); + break; + default: + UNIT_ASSERT(0); + } + } + }; + + Y_UNIT_TEST(VerifyDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } + + Y_UNIT_TEST(VerifyDecompressionMulti) { + Test<TMultiDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompressionMulti) { + Test<TMultiDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } +} + +Y_UNIT_TEST_SUITE(CompProtoTestExtended) { + using namespace NCompProto; + const TString metainfo = + "\n\ + repeated data id 0\n\ + repeated second id 3\n\ + scalar inner2 id 0 default const 0\n\ + end\n\ + repeated first id 2\n\ + scalar inner id 0 default const 0\n\ + end\n\ + end\n"; + TVector<std::pair<TVector<ui32>, TVector<ui32>>> data; + + template <class TMeta, class TFunctor> + struct TSerialize { + static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) { + FlushPseudoRandom(); + meta.BeginSelf(functor); + data.clear(); + data.resize(params.DataSize); + for (size_t i = 0; i < params.DataSize; ++i) { + meta.BeginElement(i, functor); + TMetaInfo<TMeta>& first = meta.BeginRepeated(2, functor); + data[i].first.resize(params.ValueArraySize); + for (ui32 j = 0; j < params.ValueArraySize; j++) { + first.BeginElement(j, functor); + + ui32 val = PseudoRandom(42 * 42 * 42); + first.SetScalar(0, val, functor); + data[i].first[j] = val; + + first.EndElement(functor); + } + first.EndRepeated(functor); + + TMetaInfo<TMeta>& second = meta.BeginRepeated(3, functor); + data[i].second.resize(params.ValueArraySize); + for (ui32 j = 0; j < params.ValueArraySize; j++) { + second.BeginElement(j, functor); + + ui32 val = PseudoRandom(42 * 42 * 42); + second.SetScalar(0, val, functor); + data[i].second[j] = val; + + second.EndElement(functor); + } + second.EndRepeated(functor); + meta.EndElement(functor); + } + meta.EndRepeated(functor); + } + }; + + struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> { + enum EState { + Startstop, + OutDataElem, + InDataElemBeforeSecond, + InDataElemSecond, + InFirst, + InSecond, + }; + EState State; + + ui32 DataInd; + ui32 ArrayInd; + + TVerifyingDecompressor() + : State(Startstop) + , DataInd(0) + , ArrayInd(0) + { + } + + TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) { + Y_UNUSED(index); + return *Parent; + } + + void BeginSelf(ui32 /*count*/, ui32 id) { + switch (State) { + case Startstop: + UNIT_ASSERT_EQUAL(id, 0); + State = OutDataElem; + break; + case InDataElemBeforeSecond: + UNIT_ASSERT_EQUAL(id, 2); + State = InFirst; + ArrayInd = 0; + break; + case InDataElemSecond: + UNIT_ASSERT_EQUAL(id, 3); + State = InSecond; + ArrayInd = 0; + break; + default: + UNIT_ASSERT(0); + } + } + void EndSelf() { + switch (State) { + case OutDataElem: + State = Startstop; + break; + case InFirst: + State = InDataElemSecond; + break; + case InSecond: + State = InDataElemSecond; + break; + default: + UNIT_ASSERT(0); + } + } + void BeginElement(ui32 element) { + switch (State) { + case OutDataElem: + UNIT_ASSERT(element < data.size()); + State = InDataElemBeforeSecond; + break; + case InFirst: + UNIT_ASSERT(element < data[DataInd].first.size()); + break; + case InSecond: + UNIT_ASSERT(element < data[DataInd].second.size()); + break; + default: + Cerr << (ui32)State << Endl; + UNIT_ASSERT(0); + } + } + void EndElement() { + switch (State) { + case InFirst: + case InSecond: + ++ArrayInd; + break; + case InDataElemSecond: + ++DataInd; + State = OutDataElem; + break; + default: + Cerr << (ui32)State << Endl; + UNIT_ASSERT(0); + } + } + + void SetScalar(size_t index, ui32 val) { + UNIT_ASSERT_EQUAL(index, 0); + switch (State) { + case InFirst: + UNIT_ASSERT_EQUAL(val, data[DataInd].first[ArrayInd]); + break; + case InSecond: + UNIT_ASSERT_EQUAL(val, data[DataInd].second[ArrayInd]); + break; + default: + UNIT_ASSERT(0); + } + } + }; + Y_UNIT_TEST(VerifyDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } +} |