diff options
author | ironpeter <ironpeter@yandex-team.ru> | 2022-02-10 16:49:52 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:52 +0300 |
commit | edee5b99e1eec042f46725b89dcd81ea7e41d663 (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/compproto/huff.h | |
parent | ff97837ecc5972a00cb395483d8856566738375c (diff) | |
download | ydb-edee5b99e1eec042f46725b89dcd81ea7e41d663.tar.gz |
Restoring authorship annotation for <ironpeter@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/compproto/huff.h')
-rw-r--r-- | library/cpp/compproto/huff.h | 136 |
1 files changed, 68 insertions, 68 deletions
diff --git a/library/cpp/compproto/huff.h b/library/cpp/compproto/huff.h index da2b8ca4f6..fa5c139189 100644 --- a/library/cpp/compproto/huff.h +++ b/library/cpp/compproto/huff.h @@ -1,5 +1,5 @@ -#pragma once - +#pragma once + #include <util/system/defaults.h> #include <util/generic/yexception.h> #include <util/generic/ptr.h> @@ -9,8 +9,8 @@ #include <queue> -#include "compressor.h" - +#include "compressor.h" + namespace NCompProto { template <size_t CacheSize, typename TEntry> struct TCache { @@ -47,17 +47,17 @@ namespace NCompProto { , Start(start) , Bits(bits) { - } - + } + bool operator<(const TCode& code) const { return Probability < code.Probability; } - + bool operator>(const TCode& code) const { return Probability > code.Probability; } }; - + struct TAccum { struct TTable { TAutoPtr<TTable> Tables[16]; @@ -74,25 +74,25 @@ namespace NCompProto { for (auto& count : Counts) count = 0; } - + i64 GetCellCount(size_t i) { i64 count = Counts[i]; if (Tables[i].Get()) { for (size_t j = 0; j < 16; ++j) { count += Tables[i]->GetCellCount(j); } - } + } return count; - } - + } + i64 GetCount() { i64 count = 0; - for (size_t j = 0; j < 16; ++j) { + for (size_t j = 0; j < 16; ++j) { count += GetCellCount(j); - } + } return count; - } - + } + void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int depth, int termDepth, ui32 code, i64 cnt) { if (depth == termDepth) { for (size_t i = 0; i < 16; ++i) { @@ -101,7 +101,7 @@ namespace NCompProto { Counts[i] = iCount; Tables[i].Reset(nullptr); } - + if (iCount > cnt || (termDepth == 0 && iCount > 0)) { std::pair<i64, TCode> codep; codep.first = iCount; @@ -113,32 +113,32 @@ namespace NCompProto { } } } - for (size_t i = 0; i < 16; ++i) { + for (size_t i = 0; i < 16; ++i) { if (Tables[i].Get()) { Tables[i]->GenerateFreqs(codes, depth + 4, termDepth, code + (i << (28 - depth)), cnt); - } - } - } + } + } + } }; - + TTable Root; int TableCount; i64 Total; ui64 Max; - + TAccum() { TableCount = 0; Total = 0; Max = 0; } - + void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int mul) const { TTable root(Root); - + for (int i = 28; i > 0; i -= 4) { root.GenerateFreqs(codes, 0, i, 0, Total / mul); } - + i64 iCount = root.GetCount(); if (iCount == 0) return; @@ -154,10 +154,10 @@ namespace NCompProto { } codep.second.Bits = bits; codes.push_back(codep); - } - + } + TCache<256, i64*> Cache; - + void AddMap(ui32 value, i64 weight = 1) { ui32 index = Cache.Hash(value); if (Cache.CacheKey[index] == value) { @@ -177,22 +177,22 @@ namespace NCompProto { root->Counts[index2] += weight; return; } - } + } root = root->Tables[index2].Get(); - } + } Cache.CacheKey[index] = value; Cache.CacheVal[index] = &root->Counts[value & 0xf]; root->Counts[value & 0xf] += weight; - } - + } + void Add(ui32 value, i64 weight = 1) { Max = ::Max(Max, (ui64)value); Total += weight; AddMap(value, weight); }; - }; - + }; + struct THuffNode { i64 Weight; i64 Priority; @@ -206,7 +206,7 @@ namespace NCompProto { Nodes[0] = nullptr; Nodes[1] = nullptr; } - + void BuildPrefixes(ui32 depth, ui32 prefix) { if (Code) { Code->Prefix = prefix; @@ -216,44 +216,44 @@ namespace NCompProto { Nodes[0]->BuildPrefixes(depth + 1, prefix + (0UL << depth)); Nodes[1]->BuildPrefixes(depth + 1, prefix + (1UL << depth)); } - + i64 Iterate(size_t depth) const { if (Code) { return (depth + Code->Bits) * Code->Probability; } return Nodes[0]->Iterate(depth + 1) + Nodes[1]->Iterate(depth + 1); - } - + } + size_t Depth() const { if (Code) { return 0; } return Max(Nodes[0]->Depth(), Nodes[1]->Depth()) + 1; - } + } }; - + struct THLess { bool operator()(const THuffNode* a, const THuffNode* b) { if (a->Weight > b->Weight) return 1; if (a->Weight == b->Weight && a->Priority > b->Priority) return 1; - return 0; - } + return 0; + } }; - + inline i64 BuildHuff(TVector<TCode>& codes) { TVector<TSimpleSharedPtr<THuffNode>> hold; std::priority_queue<THuffNode*, TVector<THuffNode*>, THLess> nodes; i64 ret = 0; - + int priority = 0; for (size_t i = 0; i < codes.size(); ++i) { TSimpleSharedPtr<THuffNode> node(new THuffNode(codes[i].Probability, priority++, &codes[i])); hold.push_back(node); nodes.push(node.Get()); } - + while (nodes.size() > 1) { THuffNode* nodea = nodes.top(); nodes.pop(); @@ -265,27 +265,27 @@ namespace NCompProto { hold.push_back(node); nodes.push(node.Get()); } - + if (nodes.size()) { THuffNode* node = nodes.top(); node->BuildPrefixes(0, 0); ret = node->Iterate(0); } - + return ret; }; - + struct TCoderEntry { ui32 MinValue; ui16 Prefix; ui8 PrefixBits; ui8 AllBits; - + ui64 MaxValue() const { return MinValue + (1ULL << (AllBits - PrefixBits)); } }; - + inline i64 Analyze(const TAccum& acc, TVector<TCoderEntry>& retCodes) { i64 ret; for (int k = 256; k > 0; --k) { @@ -296,9 +296,9 @@ namespace NCompProto { for (size_t i = 0; i < pairs.size(); ++i) { codes.push_back(pairs[i].second); } - + StableSort(codes.begin(), codes.end(), std::greater<TCode>()); - + ret = BuildHuff(codes); bool valid = true; for (size_t i = 0; i < codes.size(); ++i) { @@ -313,17 +313,17 @@ namespace NCompProto { } if (valid) return ret; - } - + } + return ret; } - + struct TComparer { bool operator()(const TCoderEntry& e0, const TCoderEntry& e1) const { return e0.AllBits < e1.AllBits; - } + } }; - + struct TCoder { TVector<TCoderEntry> Entries; void Normalize() { @@ -355,14 +355,14 @@ namespace NCompProto { } }; } - + TCache<1024, TCoderEntry> Cache; - + ui64 RealCode(ui32 value, const TCoderEntry& entry, size_t& length) { length = entry.AllBits; return (ui64(value - entry.MinValue) << entry.PrefixBits) + entry.Prefix; } - + bool Empty() const { return Entries.empty(); } @@ -374,17 +374,17 @@ namespace NCompProto { id = ui8(i); return entry; } - } + } ythrow yexception() << "bad entry"; return Entries[0]; - } - + } + ui64 Code(ui32 entry, size_t& length) { ui32 index = Cache.Hash(entry); if (Cache.CacheKey[index] == entry) { ++Cache.Hits; - return RealCode(entry, Cache.CacheVal[index], length); - } + return RealCode(entry, Cache.CacheVal[index], length); + } ++Cache.Misses; for (size_t i = 0; i < Entries.size(); ++i) { if (entry >= Entries[i].MinValue && entry < Entries[i].MaxValue()) { @@ -396,7 +396,7 @@ namespace NCompProto { ythrow yexception() << "bad huff tree"; return 0; - } + } }; - + } |