summaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/pfor_codec.h
diff options
context:
space:
mode:
authorRuslan Kovalev <[email protected]>2022-02-10 16:46:45 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:46:45 +0300
commit9123176b341b6f2658cff5132482b8237c1416c8 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/codecs/pfor_codec.h
parent59e19371de37995fcb36beb16cd6ec030af960bc (diff)
Restoring authorship annotation for Ruslan Kovalev <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/codecs/pfor_codec.h')
-rw-r--r--library/cpp/codecs/pfor_codec.h124
1 files changed, 62 insertions, 62 deletions
diff --git a/library/cpp/codecs/pfor_codec.h b/library/cpp/codecs/pfor_codec.h
index b0207512acb..d7d4bb8bf48 100644
--- a/library/cpp/codecs/pfor_codec.h
+++ b/library/cpp/codecs/pfor_codec.h
@@ -1,48 +1,48 @@
-#pragma once
-
-#include "codecs.h"
-
-#include "delta_codec.h"
-#include "tls_cache.h"
-
+#pragma once
+
+#include "codecs.h"
+
+#include "delta_codec.h"
+#include "tls_cache.h"
+
#include <library/cpp/bit_io/bitinput.h>
#include <library/cpp/bit_io/bitoutput.h>
#include <util/string/cast.h>
-
-namespace NCodecs {
+
+namespace NCodecs {
template <typename T, bool WithDelta = false>
class TPForCodec: public ICodec {
using TUnsigned = std::make_unsigned_t<T>;
typedef TDeltaCodec<TUnsigned> TDCodec;
-
+
typedef std::conditional_t<WithDelta, typename TDCodec::TDelta, T> TValue;
static_assert(std::is_unsigned<TValue>::value, "expect std:is_unsigned<TValue>::value");
-
+
static const ui64 BitsInT = sizeof(TUnsigned) * 8;
-
+
TDCodec DeltaCodec;
-
+
public:
static TStringBuf MyName();
-
+
TPForCodec() {
MyTraits.AssumesStructuredInput = true;
MyTraits.SizeOfInputElement = sizeof(T);
MyTraits.SizeOnDecodeMultiplier = sizeof(T);
}
-
+
TString GetName() const override {
return ToString(MyName());
}
-
+
ui8 Encode(TStringBuf s, TBuffer& b) const override {
b.Clear();
if (s.empty()) {
return 0;
}
-
+
b.Reserve(2 * s.size() + b.Size());
-
+
if (WithDelta) {
auto buffer = TBufferTlsCache::TlsInstance().Item();
TBuffer& db = buffer.Get();
@@ -51,50 +51,50 @@ namespace NCodecs {
DeltaCodec.Encode(s, db);
s = TStringBuf{db.data(), db.size()};
}
-
+
TArrayRef<const TValue> tin{(const TValue*)s.data(), s.size() / sizeof(TValue)};
-
+
const ui64 sz = tin.size();
ui64 bitcounts[BitsInT + 1];
Zero(bitcounts);
-
+
ui32 zeros = 0;
-
+
for (const TValue* it = tin.begin(); it != tin.end(); ++it) {
TUnsigned v = 1 + (TUnsigned)*it;
ui64 l = MostSignificantBit(v) + 1;
++bitcounts[l];
-
+
if (!v) {
++zeros;
}
}
-
+
// cumulative bit counts
for (ui64 i = 0; i < BitsInT; ++i) {
bitcounts[i + 1] += bitcounts[i];
- }
-
+ }
+
bool hasexceptions = zeros;
ui64 optimalbits = BitsInT;
-
+
{
ui64 excsize = 0;
ui64 minsize = sz * BitsInT;
-
+
for (ui64 current = BitsInT; current; --current) {
ui64 size = bitcounts[current] * current + (sz - bitcounts[current]) * (current + 6 + excsize) + zeros * (current + 6);
-
+
excsize += current * bitcounts[current];
-
+
if (size < minsize) {
minsize = size;
optimalbits = current;
hasexceptions = zeros || sz - bitcounts[current];
}
- }
- }
-
+ }
+ }
+
if (!optimalbits || BitsInT == optimalbits) {
b.Append((ui8)-1);
b.Append(s.data(), s.size());
@@ -104,7 +104,7 @@ namespace NCodecs {
bout.Write(0, 1);
bout.Write(hasexceptions, 1);
bout.Write(optimalbits, 6);
-
+
for (const TValue* it = tin.begin(); it != tin.end(); ++it) {
TUnsigned word = 1 + (TUnsigned)*it;
ui64 len = MostSignificantBit(word) + 1;
@@ -116,29 +116,29 @@ namespace NCodecs {
} else {
bout.Write(word, optimalbits);
}
- }
-
+ }
+
return bout.GetByteReminder();
} // the rest of the last byte is zero padded. BitsInT is always > 7.
- }
-
+ }
+
void Decode(TStringBuf s, TBuffer& b) const override {
b.Clear();
if (s.empty()) {
return;
}
-
+
b.Reserve(s.size() * sizeof(T) + b.Size());
-
+
ui64 isplain = 0;
ui64 hasexceptions = 0;
ui64 bits = 0;
-
+
NBitIO::TBitInput bin(s);
bin.ReadK<1>(isplain);
bin.ReadK<1>(hasexceptions);
bin.ReadK<6>(bits);
-
+
if (Y_UNLIKELY(isplain)) {
s.Skip(1);
@@ -147,17 +147,17 @@ namespace NCodecs {
} else {
b.Append(s.data(), s.size());
}
- } else {
+ } else {
typename TDCodec::TDecoder decoder;
-
+
if (hasexceptions) {
ui64 word = 0;
while (bin.Read(word, bits)) {
if (word || (bin.ReadK<6>(word) && bin.Read(word, word))) {
--word;
-
+
TValue t = word;
-
+
if (WithDelta) {
if (decoder.Decode(t)) {
TStringBuf r{(char*)&decoder.Result, sizeof(decoder.Result)};
@@ -166,46 +166,46 @@ namespace NCodecs {
} else {
TStringBuf r{(char*)&t, sizeof(t)};
b.Append(r.data(), r.size());
- }
- }
- }
+ }
+ }
+ }
} else {
ui64 word = 0;
T outarr[256 / sizeof(T)];
ui32 cnt = 0;
while (true) {
ui64 v = bin.Read(word, bits);
-
+
if ((!v) | (!word))
break;
-
+
--word;
TValue t = word;
-
+
if (WithDelta) {
if (decoder.Decode(t)) {
outarr[cnt++] = decoder.Result;
}
} else {
outarr[cnt++] = t;
- }
+ }
if (cnt == Y_ARRAY_SIZE(outarr)) {
b.Append((const char*)outarr, sizeof(outarr));
cnt = 0;
}
- }
-
+ }
+
if (cnt) {
b.Append((const char*)outarr, cnt * sizeof(T));
- }
- }
- }
- }
-
+ }
+ }
+ }
+ }
+
protected:
void DoLearn(ISequenceReader&) override {
}
};
-
-}
+
+}