diff options
author | Ruslan Kovalev <ruslan.a.kovalev@gmail.com> | 2022-02-10 16:46:44 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:44 +0300 |
commit | 59e19371de37995fcb36beb16cd6ec030af960bc (patch) | |
tree | fa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/greedy_dict/gd_entry.h | |
parent | 89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff) | |
download | ydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz |
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_entry.h')
-rw-r--r-- | library/cpp/codecs/greedy_dict/gd_entry.h | 66 |
1 files changed, 33 insertions, 33 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_entry.h b/library/cpp/codecs/greedy_dict/gd_entry.h index 18b5be0e15..0362fd9f99 100644 --- a/library/cpp/codecs/greedy_dict/gd_entry.h +++ b/library/cpp/codecs/greedy_dict/gd_entry.h @@ -1,42 +1,42 @@ -#pragma once - -#include "gd_stats.h" - +#pragma once + +#include "gd_stats.h" + #include <library/cpp/containers/comptrie/comptrie.h> - -#include <util/generic/ptr.h> -#include <util/generic/strbuf.h> -#include <util/generic/vector.h> - -#include <util/memory/pool.h> - -namespace NGreedyDict { + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> + +#include <util/memory/pool.h> + +namespace NGreedyDict { using TStringBufs = TVector<TStringBuf>; - + struct TEntry { static const i32 NoPrefix = -1; - + TStringBuf Str; - + i32 NearestPrefix = NoPrefix; ui32 Count = 0; ui32 Number = 0; float ModelP = 0; float Score = 0; - + TEntry(TStringBuf b = TStringBuf(), ui32 cnt = 0) : Str(b) , Count(cnt) { } - + bool HasPrefix() const { return NearestPrefix != NoPrefix; } ui32 Len() const { return Str.size(); } - + static bool StrLess(const TEntry& a, const TEntry& b) { return a.Str < b.Str; } @@ -47,20 +47,20 @@ namespace NGreedyDict { return a.Score > b.Score; } }; - + class TEntrySet: public TVector<TEntry>, TNonCopyable { TMemoryPool Pool{8112}; TCompactTrie<char, ui32, TAsIsPacker<ui32>> Trie; - + public: ui32 TotalCount = 0; - + void InitWithAlpha(); - + void Add(TStringBuf a) { push_back(TStringBuf(Pool.Append(a.data(), a.size()), a.size())); } - + void Add(TStringBuf a, TStringBuf b) { size_t sz = a.size() + b.size(); char* p = (char*)Pool.Allocate(sz); @@ -68,36 +68,36 @@ namespace NGreedyDict { memcpy(p + a.size(), b.data(), b.size()); push_back(TStringBuf(p, sz)); } - + TEntry& Get(ui32 idx) { return (*this)[idx]; } - + const TEntry& Get(ui32 idx) const { return (*this)[idx]; } - + void BuildHierarchy(); - + // longest prefix TEntry* FindPrefix(TStringBuf& str); - + const TEntry* FindPrefix(TStringBuf& str) const { return ((TEntrySet*)this)->FindPrefix(str); } - + const TEntry* FirstPrefix(const TEntry& e, TStringBuf& suff) { if (!e.HasPrefix()) return nullptr; - + const TEntry& p = Get(e.NearestPrefix); suff = e.Str; suff.Skip(p.Str.size()); return &p; } - + void SetModelP(); void SetScores(EEntryScore); }; - -} + +} |