diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/packers | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/packers')
-rw-r--r-- | library/cpp/packers/README.md | 2 | ||||
-rw-r--r-- | library/cpp/packers/packers.cpp | 20 | ||||
-rw-r--r-- | library/cpp/packers/packers.h | 611 | ||||
-rw-r--r-- | library/cpp/packers/proto_packer.cpp | 1 | ||||
-rw-r--r-- | library/cpp/packers/proto_packer.h | 50 | ||||
-rw-r--r-- | library/cpp/packers/region_packer.cpp | 1 | ||||
-rw-r--r-- | library/cpp/packers/region_packer.h | 42 | ||||
-rw-r--r-- | library/cpp/packers/ut/packers_ut.cpp | 110 | ||||
-rw-r--r-- | library/cpp/packers/ut/proto_packer_ut.cpp | 104 | ||||
-rw-r--r-- | library/cpp/packers/ut/region_packer_ut.cpp | 40 | ||||
-rw-r--r-- | library/cpp/packers/ut/test.proto | 11 | ||||
-rw-r--r-- | library/cpp/packers/ut/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/packers/ya.make | 11 |
13 files changed, 1015 insertions, 0 deletions
diff --git a/library/cpp/packers/README.md b/library/cpp/packers/README.md new file mode 100644 index 0000000000..c635bf8c8a --- /dev/null +++ b/library/cpp/packers/README.md @@ -0,0 +1,2 @@ +A library of packers used to serialize data in the library/cpp/containers/comptrie. +It is specially excluded as separate library since many packers are of standalone interest. diff --git a/library/cpp/packers/packers.cpp b/library/cpp/packers/packers.cpp new file mode 100644 index 0000000000..54615a9e7f --- /dev/null +++ b/library/cpp/packers/packers.cpp @@ -0,0 +1,20 @@ +#include "packers.h" +#include "region_packer.h" + +namespace NPackers { +#define _X_4(X) X, X, X, X +#define _X_8(X) _X_4(X), _X_4(X) +#define _X_16(X) _X_8(X), _X_8(X) +#define _X_32(X) _X_16(X), _X_16(X) +#define _X_64(X) _X_32(X), _X_32(X) +#define _X_128(X) _X_64(X), _X_64(X) + + const ui8 SkipTable[256] = {_X_128(1), _X_64(2), _X_32(3), _X_16(4), _X_8(5), _X_4(6), 7, 7, 8, 9}; + +#undef _X_4 +#undef _X_8 +#undef _X_16 +#undef _X_32 +#undef _X_64 +#undef _X_128 +} diff --git a/library/cpp/packers/packers.h b/library/cpp/packers/packers.h new file mode 100644 index 0000000000..1bde1b59aa --- /dev/null +++ b/library/cpp/packers/packers.h @@ -0,0 +1,611 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/strbuf.h> +#include <util/generic/set.h> +#include <util/generic/list.h> +#include <util/generic/vector.h> +#include <util/generic/bitops.h> + +#include <array> +// Data serialization strategy class. +// Default realization can pack only limited range of types, but you can pack any data other using your own strategy class. + +template <class T> +class TNullPacker { // Very effective package class - pack any data into zero bytes :) +public: + void UnpackLeaf(const char*, T& t) const { + t = T(); + } + + void PackLeaf(char*, const T&, size_t) const { + } + + size_t MeasureLeaf(const T&) const { + return 0; + } + + size_t SkipLeaf(const char*) const { + return 0; + } +}; + +template <typename T> +class TAsIsPacker { // this packer is not really a packer... +public: + void UnpackLeaf(const char* p, T& t) const { + memcpy(&t, p, sizeof(T)); + } + void PackLeaf(char* buffer, const T& data, size_t computedSize) const { + Y_ASSERT(computedSize == sizeof(data)); + memcpy(buffer, &data, sizeof(T)); + } + size_t MeasureLeaf(const T& data) const { + Y_UNUSED(data); + return sizeof(T); + } + size_t SkipLeaf(const char*) const { + return sizeof(T); + } +}; + +// Implementation + +namespace NPackers { + template <class T> + inline ui64 ConvertIntegral(const T& data); + + template <> + inline ui64 ConvertIntegral(const i64& data) { + if (data < 0) { + return (static_cast<ui64>(-1 * data) << 1) | 1; + } else { + return static_cast<ui64>(data) << 1; + } + } + + namespace NImpl { + template <class T, bool isSigned> + struct TConvertImpl { + static inline ui64 Convert(const T& data); + }; + + template <class T> + struct TConvertImpl<T, true> { + static inline ui64 Convert(const T& data) { + return ConvertIntegral<i64>(static_cast<i64>(data)); + } + }; + + template <class T> + struct TConvertImpl<T, false> { + static inline ui64 Convert(const T& data) { + return data; + } + }; + } + + template <class T> + inline ui64 ConvertIntegral(const T& data) { + static_assert(std::is_integral<T>::value, "T must be integral type"); + return NImpl::TConvertImpl<T, std::is_signed<T>::value>::Convert(data); + } + + //--------------------------------- + // TIntegralPacker --- for integral types. + + template <class T> + class TIntegralPacker { // can pack only integral types <= ui64 + public: + void UnpackLeaf(const char* p, T& t) const; + void PackLeaf(char* buffer, const T& data, size_t size) const; + size_t MeasureLeaf(const T& data) const; + size_t SkipLeaf(const char* p) const; + }; + + template <> + inline size_t TIntegralPacker<ui64>::MeasureLeaf(const ui64& val) const { + constexpr size_t MAX_SIZE = sizeof(ui64) + sizeof(ui64) / 8; + + ui64 value = val; + size_t len = 1; + + value >>= 7; + for (; value && len < MAX_SIZE; value >>= 7) + ++len; + + return len; + } + + template <> + inline void TIntegralPacker<ui64>::PackLeaf(char* buffer, const ui64& val, size_t len) const { + ui64 value = val; + int lenmask = 0; + + for (size_t i = len - 1; i; --i) { + buffer[i] = (char)(value & 0xFF); + value >>= 8; + lenmask = ((lenmask >> 1) | (1 << 7)); + } + + buffer[0] = (char)(lenmask | value); + } + + extern const ui8 SkipTable[]; + + template <> + inline void TIntegralPacker<ui64>::UnpackLeaf(const char* p, ui64& result) const { + unsigned char ch = *(p++); + size_t taillen = SkipTable[ch] - 1; + + result = (ch & (0x7F >> taillen)); + + while (taillen--) + result = ((result << 8) | (*(p++) & 0xFF)); + } + + template <> + inline size_t TIntegralPacker<ui64>::SkipLeaf(const char* p) const { + return SkipTable[(ui8)*p]; + } + + namespace NImpl { + template <class T, bool isSigned> + struct TUnpackLeafImpl { + inline void UnpackLeaf(const char* p, T& t) const; + }; + template <class T> + struct TUnpackLeafImpl<T, true> { + inline void UnpackLeaf(const char* p, T& t) const { + ui64 val; + TIntegralPacker<ui64>().UnpackLeaf(p, val); + if (val & 1) { + t = -1 * static_cast<i64>(val >> 1); + } else { + t = static_cast<T>(val >> 1); + } + } + }; + template <class T> + struct TUnpackLeafImpl<T, false> { + inline void UnpackLeaf(const char* p, T& t) const { + ui64 tmp; + TIntegralPacker<ui64>().UnpackLeaf(p, tmp); + t = static_cast<T>(tmp); + } + }; + } + + template <class T> + inline void TIntegralPacker<T>::UnpackLeaf(const char* p, T& t) const { + NImpl::TUnpackLeafImpl<T, std::is_signed<T>::value>().UnpackLeaf(p, t); + } + + template <class T> + inline void TIntegralPacker<T>::PackLeaf(char* buffer, const T& data, size_t size) const { + TIntegralPacker<ui64>().PackLeaf(buffer, ConvertIntegral<T>(data), size); + } + + template <class T> + inline size_t TIntegralPacker<T>::MeasureLeaf(const T& data) const { + return TIntegralPacker<ui64>().MeasureLeaf(ConvertIntegral<T>(data)); + } + + template <class T> + inline size_t TIntegralPacker<T>::SkipLeaf(const char* p) const { + return TIntegralPacker<ui64>().SkipLeaf(p); + } + + //------------------------------------------- + // TFPPacker --- for float/double + namespace NImpl { + template <class TFloat, class TUInt> + class TFPPackerBase { + protected: + typedef TIntegralPacker<TUInt> TPacker; + + union THelper { + TFloat F; + TUInt U; + }; + + TFloat FromUInt(TUInt u) const { + THelper h; + h.U = ReverseBytes(u); + return h.F; + } + + TUInt ToUInt(TFloat f) const { + THelper h; + h.F = f; + return ReverseBytes(h.U); + } + + public: + void UnpackLeaf(const char* c, TFloat& t) const { + TUInt u = 0; + TPacker().UnpackLeaf(c, u); + t = FromUInt(u); + } + + void PackLeaf(char* c, const TFloat& t, size_t sz) const { + TPacker().PackLeaf(c, ToUInt(t), sz); + } + + size_t MeasureLeaf(const TFloat& t) const { + return TPacker().MeasureLeaf(ToUInt(t)); + } + + size_t SkipLeaf(const char* c) const { + return TPacker().SkipLeaf(c); + } + }; + } + + class TFloatPacker: public NImpl::TFPPackerBase<float, ui32> { + }; + + class TDoublePacker: public NImpl::TFPPackerBase<double, ui64> { + }; + + //------------------------------------------- + // TStringPacker --- for TString/TUtf16String and TStringBuf. + + template <class TStringType> + class TStringPacker { + public: + void UnpackLeaf(const char* p, TStringType& t) const; + void PackLeaf(char* buffer, const TStringType& data, size_t size) const; + size_t MeasureLeaf(const TStringType& data) const; + size_t SkipLeaf(const char* p) const; + }; + + template <class TStringType> + inline void TStringPacker<TStringType>::UnpackLeaf(const char* buf, TStringType& t) const { + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buf, len); + size_t start = TIntegralPacker<size_t>().SkipLeaf(buf); + t = TStringType((const typename TStringType::char_type*)(buf + start), len); + } + + template <class TStringType> + inline void TStringPacker<TStringType>::PackLeaf(char* buf, const TStringType& str, size_t size) const { + size_t len = str.size(); + size_t lenChar = len * sizeof(typename TStringType::char_type); + size_t start = size - lenChar; + TIntegralPacker<size_t>().PackLeaf(buf, len, TIntegralPacker<size_t>().MeasureLeaf(len)); + memcpy(buf + start, str.data(), lenChar); + } + + template <class TStringType> + inline size_t TStringPacker<TStringType>::MeasureLeaf(const TStringType& str) const { + size_t len = str.size(); + return TIntegralPacker<size_t>().MeasureLeaf(len) + len * sizeof(typename TStringType::char_type); + } + + template <class TStringType> + inline size_t TStringPacker<TStringType>::SkipLeaf(const char* buf) const { + size_t result = TIntegralPacker<size_t>().SkipLeaf(buf); + { + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buf, len); + result += len * sizeof(typename TStringType::char_type); + } + return result; + } + + template <class T> + class TPacker; + + // TContainerPacker --- for any container + // Requirements to class C: + // - has method size() (returns size_t) + // - has subclass C::value_type + // - has subclass C::const_iterator + // - has methods begin() and end() (return C::const_iterator) + // - has method insert(C::const_iterator, const C::value_type&) + // Examples: TVector, TList, TSet + // Requirements to class EP: has methods as in any packer (UnpackLeaf, PackLeaf, MeasureLeaf, SkipLeaf) that + // are applicable to C::value_type + + template <typename T> + struct TContainerInfo { + enum { + IsVector = 0 + }; + }; + + template <typename T> + struct TContainerInfo<std::vector<T>> { + enum { + IsVector = 1 + }; + }; + + template <typename T> + struct TContainerInfo<TVector<T>> { + enum { + IsVector = 1 + }; + }; + + template <bool IsVector> + class TContainerPackerHelper { + }; + + template <> + class TContainerPackerHelper<false> { + public: + template <class Packer, class Container> + static void UnpackLeaf(Packer& p, const char* buffer, Container& c) { + p.UnpackLeafSimple(buffer, c); + } + }; + + template <> + class TContainerPackerHelper<true> { + public: + template <class Packer, class Container> + static void UnpackLeaf(Packer& p, const char* buffer, Container& c) { + p.UnpackLeafVector(buffer, c); + } + }; + + template <class C, class EP = TPacker<typename C::value_type>> + class TContainerPacker { + private: + typedef C TContainer; + typedef EP TElementPacker; + typedef typename TContainer::const_iterator TElementIterator; + + void UnpackLeafSimple(const char* buffer, TContainer& c) const; + void UnpackLeafVector(const char* buffer, TContainer& c) const; + + friend class TContainerPackerHelper<TContainerInfo<C>::IsVector>; + + public: + void UnpackLeaf(const char* buffer, TContainer& c) const { + TContainerPackerHelper<TContainerInfo<C>::IsVector>::UnpackLeaf(*this, buffer, c); + } + void PackLeaf(char* buffer, const TContainer& data, size_t size) const; + size_t MeasureLeaf(const TContainer& data) const; + size_t SkipLeaf(const char* buffer) const; + }; + + template <class C, class EP> + inline void TContainerPacker<C, EP>::UnpackLeafSimple(const char* buffer, C& result) const { + size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here) + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len); + offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset); + + result.clear(); + + typename C::value_type value; + for (size_t i = 0; i < len; i++) { + TElementPacker().UnpackLeaf(buffer + offset, value); + result.insert(result.end(), value); + offset += TElementPacker().SkipLeaf(buffer + offset); + } + } + + template <class C, class EP> + inline void TContainerPacker<C, EP>::UnpackLeafVector(const char* buffer, C& result) const { + size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here) + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len); + offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset); + result.resize(len); + + for (size_t i = 0; i < len; i++) { + TElementPacker().UnpackLeaf(buffer + offset, result[i]); + offset += TElementPacker().SkipLeaf(buffer + offset); + } + } + + template <class C, class EP> + inline void TContainerPacker<C, EP>::PackLeaf(char* buffer, const C& data, size_t size) const { + size_t sizeOfSize = TIntegralPacker<size_t>().MeasureLeaf(size); + TIntegralPacker<size_t>().PackLeaf(buffer, size, sizeOfSize); + size_t len = data.size(); + size_t curSize = TIntegralPacker<size_t>().MeasureLeaf(len); + TIntegralPacker<size_t>().PackLeaf(buffer + sizeOfSize, len, curSize); + curSize += sizeOfSize; + for (TElementIterator p = data.begin(); p != data.end(); p++) { + size_t sizeChange = TElementPacker().MeasureLeaf(*p); + TElementPacker().PackLeaf(buffer + curSize, *p, sizeChange); + curSize += sizeChange; + } + Y_ASSERT(curSize == size); + } + + template <class C, class EP> + inline size_t TContainerPacker<C, EP>::MeasureLeaf(const C& data) const { + size_t curSize = TIntegralPacker<size_t>().MeasureLeaf(data.size()); + for (TElementIterator p = data.begin(); p != data.end(); p++) + curSize += TElementPacker().MeasureLeaf(*p); + size_t extraSize = TIntegralPacker<size_t>().MeasureLeaf(curSize); + + // Double measurement protects against sudden increases in extraSize, + // e.g. when curSize is 127 and stays in one byte, but curSize + 1 requires two bytes. + + extraSize = TIntegralPacker<size_t>().MeasureLeaf(curSize + extraSize); + Y_ASSERT(extraSize == TIntegralPacker<size_t>().MeasureLeaf(curSize + extraSize)); + return curSize + extraSize; + } + + template <class C, class EP> + inline size_t TContainerPacker<C, EP>::SkipLeaf(const char* buffer) const { + size_t value; + TIntegralPacker<size_t>().UnpackLeaf(buffer, value); + return value; + } + + // TPairPacker --- for std::pair<T1, T2> (any two types; can be nested) + // TPacker<T1> and TPacker<T2> should be valid classes + + template <class T1, class T2, class TPacker1 = TPacker<T1>, class TPacker2 = TPacker<T2>> + class TPairPacker { + private: + typedef std::pair<T1, T2> TMyPair; + + public: + void UnpackLeaf(const char* buffer, TMyPair& pair) const; + void PackLeaf(char* buffer, const TMyPair& data, size_t size) const; + size_t MeasureLeaf(const TMyPair& data) const; + size_t SkipLeaf(const char* buffer) const; + }; + + template <class T1, class T2, class TPacker1, class TPacker2> + inline void TPairPacker<T1, T2, TPacker1, TPacker2>::UnpackLeaf(const char* buffer, std::pair<T1, T2>& pair) const { + TPacker1().UnpackLeaf(buffer, pair.first); + size_t size = TPacker1().SkipLeaf(buffer); + TPacker2().UnpackLeaf(buffer + size, pair.second); + } + + template <class T1, class T2, class TPacker1, class TPacker2> + inline void TPairPacker<T1, T2, TPacker1, TPacker2>::PackLeaf(char* buffer, const std::pair<T1, T2>& data, size_t size) const { + size_t size1 = TPacker1().MeasureLeaf(data.first); + TPacker1().PackLeaf(buffer, data.first, size1); + size_t size2 = TPacker2().MeasureLeaf(data.second); + TPacker2().PackLeaf(buffer + size1, data.second, size2); + Y_ASSERT(size == size1 + size2); + } + + template <class T1, class T2, class TPacker1, class TPacker2> + inline size_t TPairPacker<T1, T2, TPacker1, TPacker2>::MeasureLeaf(const std::pair<T1, T2>& data) const { + size_t size1 = TPacker1().MeasureLeaf(data.first); + size_t size2 = TPacker2().MeasureLeaf(data.second); + return size1 + size2; + } + + template <class T1, class T2, class TPacker1, class TPacker2> + inline size_t TPairPacker<T1, T2, TPacker1, TPacker2>::SkipLeaf(const char* buffer) const { + size_t size1 = TPacker1().SkipLeaf(buffer); + size_t size2 = TPacker2().SkipLeaf(buffer + size1); + return size1 + size2; + } + + //------------------------------------------------------------------------------------------ + // Packer for fixed-size arrays, i.e. for std::array. + // Saves memory by not storing anything about their size. + // SkipLeaf skips every value, so can be slow for big arrays. + // Requires std::tuple_size<TValue>, TValue::operator[] and possibly TValue::value_type. + template <class TValue, class TElementPacker = TPacker<typename TValue::value_type>> + class TArrayPacker { + public: + using TElemPacker = TElementPacker; + + enum { + Size = std::tuple_size<TValue>::value + }; + + void UnpackLeaf(const char* p, TValue& t) const { + const char* buf = p; + for (size_t i = 0; i < Size; ++i) { + TElemPacker().UnpackLeaf(buf, t[i]); + buf += TElemPacker().SkipLeaf(buf); + } + } + + void PackLeaf(char* buffer, const TValue& data, size_t computedSize) const { + size_t remainingSize = computedSize; + char* pos = buffer; + for (size_t i = 0; i < Size; ++i) { + const size_t elemSize = TElemPacker().MeasureLeaf(data[i]); + TElemPacker().PackLeaf(pos, data[i], Min(elemSize, remainingSize)); + pos += elemSize; + remainingSize -= elemSize; + } + } + + size_t MeasureLeaf(const TValue& data) const { + size_t result = 0; + for (size_t i = 0; i < Size; ++i) { + result += TElemPacker().MeasureLeaf(data[i]); + } + return result; + } + + size_t SkipLeaf(const char* p) const // this function better be fast because it is very frequently used + { + const char* buf = p; + for (size_t i = 0; i < Size; ++i) { + buf += TElemPacker().SkipLeaf(buf); + } + return buf - p; + } + }; + + //------------------------------------ + // TPacker --- the generic packer. + + template <class T, bool IsIntegral> + class TPackerImpl; + + template <class T> + class TPackerImpl<T, true>: public TIntegralPacker<T> { + }; + // No implementation for non-integral types. + + template <class T> + class TPacker: public TPackerImpl<T, std::is_integral<T>::value> { + }; + + template <> + class TPacker<float>: public TAsIsPacker<float> { + }; + + template <> + class TPacker<double>: public TAsIsPacker<double> { + }; + + template <> + class TPacker<TString>: public TStringPacker<TString> { + }; + + template <> + class TPacker<TUtf16String>: public TStringPacker<TUtf16String> { + }; + + template <> + class TPacker<TStringBuf>: public TStringPacker<TStringBuf> { + }; + + template <> + class TPacker<TWtringBuf>: public TStringPacker<TWtringBuf> { + }; + + template <class T> + class TPacker<std::vector<T>>: public TContainerPacker<std::vector<T>> { + }; + + template <class T> + class TPacker<TVector<T>>: public TContainerPacker<TVector<T>> { + }; + + template <class T> + class TPacker<std::list<T>>: public TContainerPacker<std::list<T>> { + }; + + template <class T> + class TPacker<TList<T>>: public TContainerPacker<TList<T>> { + }; + + template <class T> + class TPacker<std::set<T>>: public TContainerPacker<std::set<T>> { + }; + + template <class T> + class TPacker<TSet<T>>: public TContainerPacker<TSet<T>> { + }; + + template <class T1, class T2> + class TPacker<std::pair<T1, T2>>: public TPairPacker<T1, T2> { + }; + + template <class T, size_t N> + class TPacker<std::array<T, N>>: public TArrayPacker<std::array<T, N>> { + }; + +} diff --git a/library/cpp/packers/proto_packer.cpp b/library/cpp/packers/proto_packer.cpp new file mode 100644 index 0000000000..ddca0d5b3b --- /dev/null +++ b/library/cpp/packers/proto_packer.cpp @@ -0,0 +1 @@ +#include "proto_packer.h" diff --git a/library/cpp/packers/proto_packer.h b/library/cpp/packers/proto_packer.h new file mode 100644 index 0000000000..5a3d008e29 --- /dev/null +++ b/library/cpp/packers/proto_packer.h @@ -0,0 +1,50 @@ +#pragma once + +#include "packers.h" + +#include <util/generic/yexception.h> + +namespace NPackers { + template <typename TProtoMessage> + class TProtoMessagePacker { + public: + void UnpackLeaf(const char* bufferPtr, TProtoMessage& protoMessage) const { + const size_t protoMessageByteSize = GetProtoMessageByteSize(bufferPtr); + const size_t skipBytesCount = ProtoMessageByteSizePacker.SkipLeaf(bufferPtr); + + if (!protoMessage.ParseFromArray(static_cast<const void*>(bufferPtr + skipBytesCount), protoMessageByteSize)) { + ythrow yexception() << "Cannot unpack leaf with proto message"; + } + } + + void PackLeaf(char* bufferPtr, const TProtoMessage& protoMessage, const size_t totalByteSize) const { + const size_t protoMessageByteSize = protoMessage.ByteSize(); + const size_t skipBytesCount = totalByteSize - protoMessageByteSize; + + ProtoMessageByteSizePacker.PackLeaf(bufferPtr, protoMessageByteSize, skipBytesCount); + + if (!protoMessage.SerializeToArray(static_cast<void*>(bufferPtr + skipBytesCount), protoMessageByteSize)) { + ythrow yexception() << "Cannot pack leaf with proto message"; + } + } + + size_t MeasureLeaf(const TProtoMessage& protoMessage) const { + const size_t protoMessageByteSize = protoMessage.ByteSize(); + return ProtoMessageByteSizePacker.MeasureLeaf(protoMessageByteSize) + protoMessageByteSize; + } + + size_t SkipLeaf(const char* bufferPtr) const { + const size_t protoMessageByteSize = GetProtoMessageByteSize(bufferPtr); + return ProtoMessageByteSizePacker.SkipLeaf(bufferPtr) + protoMessageByteSize; + } + + private: + TIntegralPacker<size_t> ProtoMessageByteSizePacker; + + size_t GetProtoMessageByteSize(const char* bufferPtr) const { + size_t result; + ProtoMessageByteSizePacker.UnpackLeaf(bufferPtr, result); + return result; + } + }; +} diff --git a/library/cpp/packers/region_packer.cpp b/library/cpp/packers/region_packer.cpp new file mode 100644 index 0000000000..3d8b20c371 --- /dev/null +++ b/library/cpp/packers/region_packer.cpp @@ -0,0 +1 @@ +#include "region_packer.h" diff --git a/library/cpp/packers/region_packer.h b/library/cpp/packers/region_packer.h new file mode 100644 index 0000000000..2c661cb5bc --- /dev/null +++ b/library/cpp/packers/region_packer.h @@ -0,0 +1,42 @@ +#pragma once + +#include "packers.h" + +#include <util/generic/array_ref.h> + +// Stores an array of PODs in the trie (copying them with memcpy). +// Byte order and alignment are your problem. + +template <class TRecord> +class TRegionPacker { +public: + typedef TArrayRef<TRecord> TRecords; + + void UnpackLeaf(const char* p, TRecords& result) const { + size_t len; + NPackers::TIntegralPacker<size_t>().UnpackLeaf(p, len); + size_t start = NPackers::TIntegralPacker<size_t>().SkipLeaf(p); + result = TRecords((TRecord*)(p + start), len); + } + + void PackLeaf(char* buf, const TRecords& data, size_t computedSize) const { + size_t len = data.size(); + size_t lenChar = len * sizeof(TRecord); + size_t start = computedSize - lenChar; + NPackers::TIntegralPacker<size_t>().PackLeaf(buf, len, NPackers::TIntegralPacker<size_t>().MeasureLeaf(len)); + memcpy(buf + start, data.data(), lenChar); + } + + size_t MeasureLeaf(const TRecords& data) const { + size_t len = data.size(); + return NPackers::TIntegralPacker<size_t>().MeasureLeaf(len) + len * sizeof(TRecord); + } + + size_t SkipLeaf(const char* p) const { + size_t result = NPackers::TIntegralPacker<size_t>().SkipLeaf(p); + size_t len; + NPackers::TIntegralPacker<size_t>().UnpackLeaf(p, len); + result += len * sizeof(TRecord); + return result; + } +}; diff --git a/library/cpp/packers/ut/packers_ut.cpp b/library/cpp/packers/ut/packers_ut.cpp new file mode 100644 index 0000000000..18ce2150d1 --- /dev/null +++ b/library/cpp/packers/ut/packers_ut.cpp @@ -0,0 +1,110 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/output.h> +#include <utility> + +#include <util/charset/wide.h> +#include <util/generic/algorithm.h> +#include <util/generic/buffer.h> +#include <util/generic/map.h> +#include <util/generic/vector.h> +#include <util/generic/ptr.h> +#include <util/generic/ylimits.h> + +#include <util/folder/dirut.h> + +#include <util/random/random.h> + +#include <util/string/hex.h> + +#include "packers.h" + +#include <array> +#include <iterator> + +class TPackersTest: public TTestBase { +private: + UNIT_TEST_SUITE(TPackersTest); + UNIT_TEST(TestPackers); + UNIT_TEST_SUITE_END(); + + template <class TData, class TPacker> + void TestPacker(const TData& data); + + template <class TData, class TPacker> + void TestPacker(const TData* test, size_t size); + +public: + void TestPackers(); +}; + +UNIT_TEST_SUITE_REGISTRATION(TPackersTest); + +template <class TData, class TPacker> +void TPackersTest::TestPacker(const TData& data) { + size_t len = TPacker().MeasureLeaf(data); + size_t bufLen = len * 3; + + TArrayHolder<char> buf(new char[bufLen]); + memset(buf.Get(), -1, bufLen); + + TPacker().PackLeaf(buf.Get(), data, len); + + UNIT_ASSERT(TPacker().SkipLeaf(buf.Get()) == len); + + TData dataTmp; + TPacker().UnpackLeaf(buf.Get(), dataTmp); + UNIT_ASSERT(data == dataTmp); +} + +template <class TData, class TPacker> +void TPackersTest::TestPacker(const TData* test, size_t size) { + for (size_t i = 0; i < size; ++i) { + TestPacker<TData, TPacker>(test[i]); + } +} + +void TPackersTest::TestPackers() { + { + const TString test[] = {"", + "a", "b", "c", "d", + "aa", "ab", "ac", "ad", + "aaa", "aab", "aac", "aad", + "aba", "abb", "abc", "abd", + "asdfjjmk.gjilsjgilsjilgjildsajgfilsjdfilgjm ldsa8oq43u 583uq4905 -q435 jiores u893q 5oiju fd-KE 89536 9Q2URE 12AI894T3 89 Q*(re43"}; + + TestPacker<TString, NPackers::TPacker<TString>>(test, Y_ARRAY_SIZE(test)); + + for (size_t i = 0; i != Y_ARRAY_SIZE(test); ++i) { + TestPacker<TUtf16String, NPackers::TPacker<TUtf16String>>(UTF8ToWide(test[i])); + } + } + { + const ui64 test[] = { + 0, 1, 2, 3, 4, 5, 6, 76, 100000, Max<ui64>()}; + + TestPacker<ui64, NPackers::TPacker<ui64>>(test, Y_ARRAY_SIZE(test)); + } + { + const int test[] = { + 0, 1, 2, 3, 4, 5, 6, 76, 100000, -1, -2, -3, -4, -5, -6, -76, -10000, Min<int>(), Max<int>()}; + + TestPacker<int, NPackers::TPacker<int>>(test, Y_ARRAY_SIZE(test)); + } + { + const float test[] = { + 2.f, 3.f, 4.f, 0.f, -0.f, 1.f, -1.f, 1.1f, -1.1f, + std::numeric_limits<float>::min(), -std::numeric_limits<float>::min(), + std::numeric_limits<float>::max(), -std::numeric_limits<float>::max()}; + + TestPacker<float, NPackers::TFloatPacker>(test, Y_ARRAY_SIZE(test)); + } + { + const double test[] = { + 0., -0., 1., -1., 1.1, -1.1, + std::numeric_limits<double>::min(), -std::numeric_limits<double>::min(), + std::numeric_limits<double>::max(), -std::numeric_limits<double>::max()}; + + TestPacker<double, NPackers::TDoublePacker>(test, Y_ARRAY_SIZE(test)); + } +} diff --git a/library/cpp/packers/ut/proto_packer_ut.cpp b/library/cpp/packers/ut/proto_packer_ut.cpp new file mode 100644 index 0000000000..e4151ba68c --- /dev/null +++ b/library/cpp/packers/ut/proto_packer_ut.cpp @@ -0,0 +1,104 @@ +#include "proto_packer.h" + +#include <library/cpp/packers/ut/test.pb.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/string.h> + +using namespace NPackers; +using namespace NProtoPackerTest; + +void FillRequiredFields(TTestMessage& msg) { + msg.SetRequiredString("required_string"); + msg.SetRequiredInt32(42); +} + +void FillOptionalFields(TTestMessage& msg) { + msg.SetOptionalString("optional_string"); + msg.SetOptionalInt32(43); +} + +void FillRepeatedFields(TTestMessage& msg) { + msg.ClearRepeatedStrings(); + for (ui32 idx = 0; idx < 5; ++idx) { + msg.AddRepeatedStrings("repeated_string" + ToString(idx)); + } +} + +// do not want to use google/protobuf/util/message_differencer because of warnings +bool operator==(const TTestMessage& lhs, const TTestMessage& rhs) { + if (lhs.GetRequiredString() != rhs.GetRequiredString() || + lhs.GetRequiredInt32() != rhs.GetRequiredInt32() || + lhs.HasOptionalString() != rhs.HasOptionalString() || + (lhs.HasOptionalString() && lhs.GetOptionalString() != rhs.GetOptionalString()) || + lhs.HasOptionalInt32() != rhs.HasOptionalInt32() || + (lhs.HasOptionalInt32() && lhs.GetOptionalInt32() != rhs.GetOptionalInt32()) || + lhs.RepeatedStringsSize() != rhs.RepeatedStringsSize()) + { + return false; + } + for (ui32 idx = 0; idx < lhs.RepeatedStringsSize(); ++idx) { + if (lhs.GetRepeatedStrings(idx) != rhs.GetRepeatedStrings(idx)) { + return false; + } + } + return true; +} + +Y_UNIT_TEST_SUITE(ProtoPackerTestSuite) { + TProtoMessagePacker<TTestMessage> Packer; + TString Buffer; + + void DoPackUnpackTest(const TTestMessage& msg) { + const ui32 msgByteSize = Packer.MeasureLeaf(msg); + Buffer.resize(msgByteSize); + + Packer.PackLeaf(Buffer.begin(), msg, msgByteSize); + + TTestMessage checkMsg; + Packer.UnpackLeaf(Buffer.begin(), checkMsg); + + UNIT_ASSERT_EQUAL(msg, checkMsg); + } + + Y_UNIT_TEST(TestPackUnpackOnlyRequired) { + TTestMessage msg; + FillRequiredFields(msg); + DoPackUnpackTest(msg); + } + + Y_UNIT_TEST(TestPackUnpackRequiredAndOptional) { + TTestMessage msg; + FillRequiredFields(msg); + FillOptionalFields(msg); + DoPackUnpackTest(msg); + } + + Y_UNIT_TEST(TestPackUnpackAll) { + TTestMessage msg; + FillRequiredFields(msg); + FillOptionalFields(msg); + FillRepeatedFields(msg); + DoPackUnpackTest(msg); + } + + Y_UNIT_TEST(TestSkipLeaf) { + TTestMessage msgFirst; + FillRequiredFields(msgFirst); + TTestMessage msgSecond; + FillRequiredFields(msgSecond); + FillOptionalFields(msgSecond); + + const ui32 msgFirstByteSize = Packer.MeasureLeaf(msgFirst); + const ui32 msgSecondByteSize = Packer.MeasureLeaf(msgSecond); + + Buffer.resize(msgFirstByteSize + msgSecondByteSize); + Packer.PackLeaf(Buffer.begin(), msgFirst, msgFirstByteSize); + Packer.PackLeaf(Buffer.begin() + msgFirstByteSize, msgSecond, msgSecondByteSize); + + TTestMessage checkMsg; + Packer.UnpackLeaf(Buffer.begin() + Packer.SkipLeaf(Buffer.begin()), checkMsg); + + UNIT_ASSERT_EQUAL(msgSecond, checkMsg); + } +} diff --git a/library/cpp/packers/ut/region_packer_ut.cpp b/library/cpp/packers/ut/region_packer_ut.cpp new file mode 100644 index 0000000000..0cb08ccf65 --- /dev/null +++ b/library/cpp/packers/ut/region_packer_ut.cpp @@ -0,0 +1,40 @@ +#include "region_packer.h" +#include <library/cpp/testing/unittest/registar.h> + +template <typename TValue> +void TestPacker() { + TValue values[] = {1, 2, 3, 42}; + TString buffer; + + TRegionPacker<TValue> p; + + using TValues = TArrayRef<TValue>; + TValues valueRegion = TValues(values, Y_ARRAY_SIZE(values)); + size_t sz = p.MeasureLeaf(valueRegion); + UNIT_ASSERT_VALUES_EQUAL(sz, 1 + sizeof(values)); + + buffer.resize(sz); + p.PackLeaf(buffer.begin(), valueRegion, sz); + UNIT_ASSERT_VALUES_EQUAL(buffer[0], 4); + + p.UnpackLeaf(buffer.data(), valueRegion); + UNIT_ASSERT_EQUAL(valueRegion.data(), (const TValue*)(buffer.begin() + 1)); + UNIT_ASSERT_EQUAL(valueRegion.size(), Y_ARRAY_SIZE(values)); + UNIT_ASSERT_EQUAL(0, memcmp(values, valueRegion.data(), sizeof(values))); +} + +Y_UNIT_TEST_SUITE(RegionPacker) { + Y_UNIT_TEST(Test0) { + TestPacker<char>(); + TestPacker<signed char>(); + TestPacker<unsigned char>(); + TestPacker<i8>(); + TestPacker<ui8>(); + TestPacker<i16>(); + TestPacker<ui16>(); + TestPacker<i32>(); + TestPacker<ui32>(); + TestPacker<i64>(); + TestPacker<ui64>(); + } +} diff --git a/library/cpp/packers/ut/test.proto b/library/cpp/packers/ut/test.proto new file mode 100644 index 0000000000..c872616bcc --- /dev/null +++ b/library/cpp/packers/ut/test.proto @@ -0,0 +1,11 @@ +package NProtoPackerTest; + +message TTestMessage { + required string RequiredString = 1; + optional string OptionalString = 2; + + required int32 RequiredInt32 = 3; + optional int32 OptionalInt32 = 4; + + repeated string RepeatedStrings = 5; +} diff --git a/library/cpp/packers/ut/ya.make b/library/cpp/packers/ut/ya.make new file mode 100644 index 0000000000..1c024ffd94 --- /dev/null +++ b/library/cpp/packers/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/packers) + +OWNER(velavokr) + +SRCS( + packers_ut.cpp + proto_packer_ut.cpp + region_packer_ut.cpp + test.proto +) + +END() diff --git a/library/cpp/packers/ya.make b/library/cpp/packers/ya.make new file mode 100644 index 0000000000..e1ec4972ed --- /dev/null +++ b/library/cpp/packers/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +OWNER(velavokr) + +SRCS( + packers.cpp + proto_packer.cpp + region_packer.cpp +) + +END() |