aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/compproto/huff.h
diff options
context:
space:
mode:
authorironpeter <ironpeter@yandex-team.ru>2022-02-10 16:49:52 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:52 +0300
commitedee5b99e1eec042f46725b89dcd81ea7e41d663 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/compproto/huff.h
parentff97837ecc5972a00cb395483d8856566738375c (diff)
downloadydb-edee5b99e1eec042f46725b89dcd81ea7e41d663.tar.gz
Restoring authorship annotation for <ironpeter@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/compproto/huff.h')
-rw-r--r--library/cpp/compproto/huff.h136
1 files changed, 68 insertions, 68 deletions
diff --git a/library/cpp/compproto/huff.h b/library/cpp/compproto/huff.h
index da2b8ca4f6..fa5c139189 100644
--- a/library/cpp/compproto/huff.h
+++ b/library/cpp/compproto/huff.h
@@ -1,5 +1,5 @@
-#pragma once
-
+#pragma once
+
#include <util/system/defaults.h>
#include <util/generic/yexception.h>
#include <util/generic/ptr.h>
@@ -9,8 +9,8 @@
#include <queue>
-#include "compressor.h"
-
+#include "compressor.h"
+
namespace NCompProto {
template <size_t CacheSize, typename TEntry>
struct TCache {
@@ -47,17 +47,17 @@ namespace NCompProto {
, Start(start)
, Bits(bits)
{
- }
-
+ }
+
bool operator<(const TCode& code) const {
return Probability < code.Probability;
}
-
+
bool operator>(const TCode& code) const {
return Probability > code.Probability;
}
};
-
+
struct TAccum {
struct TTable {
TAutoPtr<TTable> Tables[16];
@@ -74,25 +74,25 @@ namespace NCompProto {
for (auto& count : Counts)
count = 0;
}
-
+
i64 GetCellCount(size_t i) {
i64 count = Counts[i];
if (Tables[i].Get()) {
for (size_t j = 0; j < 16; ++j) {
count += Tables[i]->GetCellCount(j);
}
- }
+ }
return count;
- }
-
+ }
+
i64 GetCount() {
i64 count = 0;
- for (size_t j = 0; j < 16; ++j) {
+ for (size_t j = 0; j < 16; ++j) {
count += GetCellCount(j);
- }
+ }
return count;
- }
-
+ }
+
void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int depth, int termDepth, ui32 code, i64 cnt) {
if (depth == termDepth) {
for (size_t i = 0; i < 16; ++i) {
@@ -101,7 +101,7 @@ namespace NCompProto {
Counts[i] = iCount;
Tables[i].Reset(nullptr);
}
-
+
if (iCount > cnt || (termDepth == 0 && iCount > 0)) {
std::pair<i64, TCode> codep;
codep.first = iCount;
@@ -113,32 +113,32 @@ namespace NCompProto {
}
}
}
- for (size_t i = 0; i < 16; ++i) {
+ for (size_t i = 0; i < 16; ++i) {
if (Tables[i].Get()) {
Tables[i]->GenerateFreqs(codes, depth + 4, termDepth, code + (i << (28 - depth)), cnt);
- }
- }
- }
+ }
+ }
+ }
};
-
+
TTable Root;
int TableCount;
i64 Total;
ui64 Max;
-
+
TAccum() {
TableCount = 0;
Total = 0;
Max = 0;
}
-
+
void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int mul) const {
TTable root(Root);
-
+
for (int i = 28; i > 0; i -= 4) {
root.GenerateFreqs(codes, 0, i, 0, Total / mul);
}
-
+
i64 iCount = root.GetCount();
if (iCount == 0)
return;
@@ -154,10 +154,10 @@ namespace NCompProto {
}
codep.second.Bits = bits;
codes.push_back(codep);
- }
-
+ }
+
TCache<256, i64*> Cache;
-
+
void AddMap(ui32 value, i64 weight = 1) {
ui32 index = Cache.Hash(value);
if (Cache.CacheKey[index] == value) {
@@ -177,22 +177,22 @@ namespace NCompProto {
root->Counts[index2] += weight;
return;
}
- }
+ }
root = root->Tables[index2].Get();
- }
+ }
Cache.CacheKey[index] = value;
Cache.CacheVal[index] = &root->Counts[value & 0xf];
root->Counts[value & 0xf] += weight;
- }
-
+ }
+
void Add(ui32 value, i64 weight = 1) {
Max = ::Max(Max, (ui64)value);
Total += weight;
AddMap(value, weight);
};
- };
-
+ };
+
struct THuffNode {
i64 Weight;
i64 Priority;
@@ -206,7 +206,7 @@ namespace NCompProto {
Nodes[0] = nullptr;
Nodes[1] = nullptr;
}
-
+
void BuildPrefixes(ui32 depth, ui32 prefix) {
if (Code) {
Code->Prefix = prefix;
@@ -216,44 +216,44 @@ namespace NCompProto {
Nodes[0]->BuildPrefixes(depth + 1, prefix + (0UL << depth));
Nodes[1]->BuildPrefixes(depth + 1, prefix + (1UL << depth));
}
-
+
i64 Iterate(size_t depth) const {
if (Code) {
return (depth + Code->Bits) * Code->Probability;
}
return Nodes[0]->Iterate(depth + 1) + Nodes[1]->Iterate(depth + 1);
- }
-
+ }
+
size_t Depth() const {
if (Code) {
return 0;
}
return Max(Nodes[0]->Depth(), Nodes[1]->Depth()) + 1;
- }
+ }
};
-
+
struct THLess {
bool operator()(const THuffNode* a, const THuffNode* b) {
if (a->Weight > b->Weight)
return 1;
if (a->Weight == b->Weight && a->Priority > b->Priority)
return 1;
- return 0;
- }
+ return 0;
+ }
};
-
+
inline i64 BuildHuff(TVector<TCode>& codes) {
TVector<TSimpleSharedPtr<THuffNode>> hold;
std::priority_queue<THuffNode*, TVector<THuffNode*>, THLess> nodes;
i64 ret = 0;
-
+
int priority = 0;
for (size_t i = 0; i < codes.size(); ++i) {
TSimpleSharedPtr<THuffNode> node(new THuffNode(codes[i].Probability, priority++, &codes[i]));
hold.push_back(node);
nodes.push(node.Get());
}
-
+
while (nodes.size() > 1) {
THuffNode* nodea = nodes.top();
nodes.pop();
@@ -265,27 +265,27 @@ namespace NCompProto {
hold.push_back(node);
nodes.push(node.Get());
}
-
+
if (nodes.size()) {
THuffNode* node = nodes.top();
node->BuildPrefixes(0, 0);
ret = node->Iterate(0);
}
-
+
return ret;
};
-
+
struct TCoderEntry {
ui32 MinValue;
ui16 Prefix;
ui8 PrefixBits;
ui8 AllBits;
-
+
ui64 MaxValue() const {
return MinValue + (1ULL << (AllBits - PrefixBits));
}
};
-
+
inline i64 Analyze(const TAccum& acc, TVector<TCoderEntry>& retCodes) {
i64 ret;
for (int k = 256; k > 0; --k) {
@@ -296,9 +296,9 @@ namespace NCompProto {
for (size_t i = 0; i < pairs.size(); ++i) {
codes.push_back(pairs[i].second);
}
-
+
StableSort(codes.begin(), codes.end(), std::greater<TCode>());
-
+
ret = BuildHuff(codes);
bool valid = true;
for (size_t i = 0; i < codes.size(); ++i) {
@@ -313,17 +313,17 @@ namespace NCompProto {
}
if (valid)
return ret;
- }
-
+ }
+
return ret;
}
-
+
struct TComparer {
bool operator()(const TCoderEntry& e0, const TCoderEntry& e1) const {
return e0.AllBits < e1.AllBits;
- }
+ }
};
-
+
struct TCoder {
TVector<TCoderEntry> Entries;
void Normalize() {
@@ -355,14 +355,14 @@ namespace NCompProto {
}
};
}
-
+
TCache<1024, TCoderEntry> Cache;
-
+
ui64 RealCode(ui32 value, const TCoderEntry& entry, size_t& length) {
length = entry.AllBits;
return (ui64(value - entry.MinValue) << entry.PrefixBits) + entry.Prefix;
}
-
+
bool Empty() const {
return Entries.empty();
}
@@ -374,17 +374,17 @@ namespace NCompProto {
id = ui8(i);
return entry;
}
- }
+ }
ythrow yexception() << "bad entry";
return Entries[0];
- }
-
+ }
+
ui64 Code(ui32 entry, size_t& length) {
ui32 index = Cache.Hash(entry);
if (Cache.CacheKey[index] == entry) {
++Cache.Hits;
- return RealCode(entry, Cache.CacheVal[index], length);
- }
+ return RealCode(entry, Cache.CacheVal[index], length);
+ }
++Cache.Misses;
for (size_t i = 0; i < Entries.size(); ++i) {
if (entry >= Entries[i].MinValue && entry < Entries[i].MaxValue()) {
@@ -396,7 +396,7 @@ namespace NCompProto {
ythrow yexception() << "bad huff tree";
return 0;
- }
+ }
};
-
+
}