aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/delta_codec.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/delta_codec.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/delta_codec.h')
-rw-r--r--library/cpp/codecs/delta_codec.h143
1 files changed, 143 insertions, 0 deletions
diff --git a/library/cpp/codecs/delta_codec.h b/library/cpp/codecs/delta_codec.h
new file mode 100644
index 00000000000..21325825e6a
--- /dev/null
+++ b/library/cpp/codecs/delta_codec.h
@@ -0,0 +1,143 @@
+#pragma once
+
+#include "codecs.h"
+
+#include <util/generic/array_ref.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/bitops.h>
+#include <util/string/cast.h>
+
+namespace NCodecs {
+ template <typename T = ui64, bool UnsignedDelta = true>
+ class TDeltaCodec: public ICodec {
+ static_assert(std::is_integral<T>::value, "expect std::is_integral<T>::value");
+
+ public:
+ using TUnsigned = std::make_unsigned_t<T>;
+ using TSigned = std::make_signed_t<T>;
+ using TDelta = std::conditional_t<UnsignedDelta, TUnsigned, TSigned>;
+
+ private:
+ const TDelta MinDelta{Min<TDelta>()};
+ const TDelta MaxDelta{Max<TDelta>() - 1};
+ const TDelta InvalidDelta{MaxDelta + 1};
+
+ Y_FORCE_INLINE static TDelta AddSafe(TUnsigned a, TUnsigned b) {
+ return a + b;
+ }
+
+ Y_FORCE_INLINE static TDelta SubSafe(TUnsigned a, TUnsigned b) {
+ return a - b;
+ }
+
+ public:
+ struct TDecoder {
+ const TDelta InvalidDelta{Max<TDelta>()};
+
+ T Last = 0;
+ T Result = 0;
+
+ bool First = true;
+ bool Invalid = false;
+
+ Y_FORCE_INLINE bool Decode(TDelta t) {
+ if (Y_UNLIKELY(First)) {
+ First = false;
+ Result = Last = t;
+ return true;
+ }
+
+ if (Y_UNLIKELY(Invalid)) {
+ Invalid = false;
+ Last = 0;
+ Result = t;
+ return true;
+ }
+
+ Result = (Last += t);
+ Invalid = t == InvalidDelta;
+
+ return !Invalid;
+ }
+ };
+
+ public:
+ static TStringBuf MyName();
+
+ TDeltaCodec() {
+ MyTraits.SizeOfInputElement = sizeof(T);
+ MyTraits.AssumesStructuredInput = true;
+ }
+
+ TString GetName() const override {
+ return ToString(MyName());
+ }
+
+ template <class TItem>
+ static void AppendTo(TBuffer& b, TItem t) {
+ b.Append((char*)&t, sizeof(t));
+ }
+
+ ui8 Encode(TStringBuf s, TBuffer& b) const override {
+ b.Clear();
+ if (s.empty()) {
+ return 0;
+ }
+
+ b.Reserve(s.size());
+ TArrayRef<const T> tin{(const T*)s.data(), s.size() / sizeof(T)};
+
+ const T* it = tin.begin();
+ TDelta last = *(it++);
+ AppendTo(b, last);
+
+ TDelta maxt = SubSafe(MaxDelta, last);
+ TDelta mint = AddSafe(MinDelta, last);
+
+ for (; it != tin.end(); ++it) {
+ TDelta t = *it;
+
+ if (Y_LIKELY((t >= mint) & (t <= maxt))) {
+ AppendTo(b, t - last);
+ last = t;
+ maxt = SubSafe(MaxDelta, last);
+ mint = AddSafe(MinDelta, last);
+ } else {
+ // delta overflow
+ AppendTo(b, InvalidDelta);
+ AppendTo(b, t);
+ last = 0;
+ maxt = MaxDelta;
+ mint = MinDelta;
+ }
+ }
+
+ return 0;
+ }
+
+ void Decode(TStringBuf s, TBuffer& b) const override {
+ b.Clear();
+ if (s.empty()) {
+ return;
+ }
+
+ b.Reserve(s.size());
+ TArrayRef<const T> tin{(const T*)s.data(), s.size() / sizeof(T)};
+
+ TDecoder dec;
+
+ for (const T* it = tin.begin(); it != tin.end(); ++it) {
+ T tmp;
+ memcpy(&tmp, it, sizeof(tmp));
+ if (dec.Decode(tmp)) {
+ AppendTo(b, dec.Result);
+ }
+ }
+ }
+
+ protected:
+ void DoLearn(ISequenceReader&) override {
+ }
+ };
+
+}