aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/greedy_dict/gd_entry.cpp
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/codecs/greedy_dict/gd_entry.cpp
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_entry.cpp')
-rw-r--r--library/cpp/codecs/greedy_dict/gd_entry.cpp126
1 files changed, 63 insertions, 63 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_entry.cpp b/library/cpp/codecs/greedy_dict/gd_entry.cpp
index 2c315c7f7c..0603a9fca8 100644
--- a/library/cpp/codecs/greedy_dict/gd_entry.cpp
+++ b/library/cpp/codecs/greedy_dict/gd_entry.cpp
@@ -5,94 +5,94 @@
#include <util/generic/singleton.h>
namespace NGreedyDict {
- class TAlphas {
- char Memory[512];
-
- public:
- TStringBufs Alphas;
-
- TAlphas() {
- for (ui32 i = 0; i < 256; ++i) {
- Memory[2 * i] = (char)i;
- Memory[2 * i + 1] = 0;
-
- Alphas.push_back(TStringBuf(&Memory[2 * i], 1));
- }
+ class TAlphas {
+ char Memory[512];
+
+ public:
+ TStringBufs Alphas;
+
+ TAlphas() {
+ for (ui32 i = 0; i < 256; ++i) {
+ Memory[2 * i] = (char)i;
+ Memory[2 * i + 1] = 0;
+
+ Alphas.push_back(TStringBuf(&Memory[2 * i], 1));
+ }
+ }
+ };
+
+ void TEntrySet::InitWithAlpha() {
+ Pool.ClearKeepFirstChunk();
+ const TStringBufs& a = Singleton<TAlphas>()->Alphas;
+ for (auto it : a) {
+ Add(it);
}
- };
-
- void TEntrySet::InitWithAlpha() {
- Pool.ClearKeepFirstChunk();
- const TStringBufs& a = Singleton<TAlphas>()->Alphas;
- for (auto it : a) {
- Add(it);
- }
- BuildHierarchy();
+ BuildHierarchy();
}
- void TEntrySet::BuildHierarchy() {
- Sort(begin(), end(), TEntry::StrLess);
+ void TEntrySet::BuildHierarchy() {
+ Sort(begin(), end(), TEntry::StrLess);
- TCompactTrieBuilder<char, ui32, TAsIsPacker<ui32>> builder(CTBF_PREFIX_GROUPED);
+ TCompactTrieBuilder<char, ui32, TAsIsPacker<ui32>> builder(CTBF_PREFIX_GROUPED);
- for (iterator it = begin(); it != end(); ++it) {
- it->Number = (it - begin());
- TStringBuf suff = it->Str;
- size_t len = 0;
- ui32 val = 0;
+ for (iterator it = begin(); it != end(); ++it) {
+ it->Number = (it - begin());
+ TStringBuf suff = it->Str;
+ size_t len = 0;
+ ui32 val = 0;
if (builder.FindLongestPrefix(suff.data(), suff.size(), &len, &val) && len) {
- it->NearestPrefix = val;
- }
+ it->NearestPrefix = val;
+ }
builder.Add(suff.data(), suff.size(), it->Number);
}
- TBufferOutput bout;
- builder.Save(bout);
- Trie.Init(TBlob::FromBuffer(bout.Buffer()));
+ TBufferOutput bout;
+ builder.Save(bout);
+ Trie.Init(TBlob::FromBuffer(bout.Buffer()));
}
- TEntry* TEntrySet::FindPrefix(TStringBuf& str) {
- size_t len = 0;
- ui32 off = 0;
+ TEntry* TEntrySet::FindPrefix(TStringBuf& str) {
+ size_t len = 0;
+ ui32 off = 0;
- if (!Trie.FindLongestPrefix(str, &len, &off)) {
- return nullptr;
- }
+ if (!Trie.FindLongestPrefix(str, &len, &off)) {
+ return nullptr;
+ }
- str.Skip(len);
- return &Get(off);
+ str.Skip(len);
+ return &Get(off);
}
- void TEntrySet::SetModelP() {
- for (iterator it = begin(); it != end(); ++it) {
- TEntry& e = *it;
+ void TEntrySet::SetModelP() {
+ for (iterator it = begin(); it != end(); ++it) {
+ TEntry& e = *it;
- if (!e.HasPrefix()) {
- e.ModelP = 0;
- continue;
- }
+ if (!e.HasPrefix()) {
+ e.ModelP = 0;
+ continue;
+ }
- TStringBuf suff = e.Str;
- const TEntry& p = Get(e.NearestPrefix);
- suff.Skip(p.Len());
+ TStringBuf suff = e.Str;
+ const TEntry& p = Get(e.NearestPrefix);
+ suff.Skip(p.Len());
- float modelp = float(p.Count + e.Count) / TotalCount;
+ float modelp = float(p.Count + e.Count) / TotalCount;
- while (!!suff) {
- TEntry* pp = FindPrefix(suff);
- modelp *= float(pp->Count + e.Count) / TotalCount;
- }
+ while (!!suff) {
+ TEntry* pp = FindPrefix(suff);
+ modelp *= float(pp->Count + e.Count) / TotalCount;
+ }
- e.ModelP = modelp;
+ e.ModelP = modelp;
}
}
- void TEntrySet::SetScores(EEntryScore s) {
- for (auto& it : *this) {
- it.Score = Score(s, it.Len(), it.ModelP, it.Count, TotalCount);
- }
+ void TEntrySet::SetScores(EEntryScore s) {
+ for (auto& it : *this) {
+ it.Score = Score(s, it.Len(), it.ModelP, it.Count, TotalCount);
+ }
}
}