aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/greedy_dict/gd_entry.h
diff options
context:
space:
mode:
authorRuslan Kovalev <ruslan.a.kovalev@gmail.com>2022-02-10 16:46:44 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:44 +0300
commit59e19371de37995fcb36beb16cd6ec030af960bc (patch)
treefa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/greedy_dict/gd_entry.h
parent89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff)
downloadydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_entry.h')
-rw-r--r--library/cpp/codecs/greedy_dict/gd_entry.h66
1 files changed, 33 insertions, 33 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_entry.h b/library/cpp/codecs/greedy_dict/gd_entry.h
index 18b5be0e15..0362fd9f99 100644
--- a/library/cpp/codecs/greedy_dict/gd_entry.h
+++ b/library/cpp/codecs/greedy_dict/gd_entry.h
@@ -1,42 +1,42 @@
-#pragma once
-
-#include "gd_stats.h"
-
+#pragma once
+
+#include "gd_stats.h"
+
#include <library/cpp/containers/comptrie/comptrie.h>
-
-#include <util/generic/ptr.h>
-#include <util/generic/strbuf.h>
-#include <util/generic/vector.h>
-
-#include <util/memory/pool.h>
-
-namespace NGreedyDict {
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+
+#include <util/memory/pool.h>
+
+namespace NGreedyDict {
using TStringBufs = TVector<TStringBuf>;
-
+
struct TEntry {
static const i32 NoPrefix = -1;
-
+
TStringBuf Str;
-
+
i32 NearestPrefix = NoPrefix;
ui32 Count = 0;
ui32 Number = 0;
float ModelP = 0;
float Score = 0;
-
+
TEntry(TStringBuf b = TStringBuf(), ui32 cnt = 0)
: Str(b)
, Count(cnt)
{
}
-
+
bool HasPrefix() const {
return NearestPrefix != NoPrefix;
}
ui32 Len() const {
return Str.size();
}
-
+
static bool StrLess(const TEntry& a, const TEntry& b) {
return a.Str < b.Str;
}
@@ -47,20 +47,20 @@ namespace NGreedyDict {
return a.Score > b.Score;
}
};
-
+
class TEntrySet: public TVector<TEntry>, TNonCopyable {
TMemoryPool Pool{8112};
TCompactTrie<char, ui32, TAsIsPacker<ui32>> Trie;
-
+
public:
ui32 TotalCount = 0;
-
+
void InitWithAlpha();
-
+
void Add(TStringBuf a) {
push_back(TStringBuf(Pool.Append(a.data(), a.size()), a.size()));
}
-
+
void Add(TStringBuf a, TStringBuf b) {
size_t sz = a.size() + b.size();
char* p = (char*)Pool.Allocate(sz);
@@ -68,36 +68,36 @@ namespace NGreedyDict {
memcpy(p + a.size(), b.data(), b.size());
push_back(TStringBuf(p, sz));
}
-
+
TEntry& Get(ui32 idx) {
return (*this)[idx];
}
-
+
const TEntry& Get(ui32 idx) const {
return (*this)[idx];
}
-
+
void BuildHierarchy();
-
+
// longest prefix
TEntry* FindPrefix(TStringBuf& str);
-
+
const TEntry* FindPrefix(TStringBuf& str) const {
return ((TEntrySet*)this)->FindPrefix(str);
}
-
+
const TEntry* FirstPrefix(const TEntry& e, TStringBuf& suff) {
if (!e.HasPrefix())
return nullptr;
-
+
const TEntry& p = Get(e.NearestPrefix);
suff = e.Str;
suff.Skip(p.Str.size());
return &p;
}
-
+
void SetModelP();
void SetScores(EEntryScore);
};
-
-}
+
+}