aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/greedy_dict/gd_builder.h
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/codecs/greedy_dict/gd_builder.h
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_builder.h')
-rw-r--r--library/cpp/codecs/greedy_dict/gd_builder.h168
1 files changed, 84 insertions, 84 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.h b/library/cpp/codecs/greedy_dict/gd_builder.h
index b8e9a5e37b..ab0057e1ca 100644
--- a/library/cpp/codecs/greedy_dict/gd_builder.h
+++ b/library/cpp/codecs/greedy_dict/gd_builder.h
@@ -6,89 +6,89 @@
#include <util/random/fast.h>
namespace NGreedyDict {
- struct TBuildSettings {
- EEntryStatTest StatTest = EST_SIMPLE_NORM;
- EEntryScore Score = ES_LEN_SIMPLE;
-
- float MinPValue = 0.75;
- ui32 MinAbsCount = 10;
- ui32 GrowLimit = 10; // times of maxentries
- bool Verbose = false;
- };
-
- class TDictBuilder {
- using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
- using TCandidate = std::pair<float, ui64>;
- using TCandidates = TVector<TCandidate>;
-
- private:
- TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
- TStringBufs Input;
-
- THolder<TEntrySet> Current;
-
- TMemoryPool CompoundCountsPool;
- THolder<TCompoundCounts> CompoundCounts;
-
- TCandidates Candidates;
-
- TBuildSettings Settings;
-
- public:
- TDictBuilder(const TBuildSettings& s = TBuildSettings())
- : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
- , Settings(s)
- {
- }
-
- void SetInput(const TStringBufs& in) {
- Input = in;
- }
-
- const TBuildSettings& GetSettings() const {
- return Settings;
- }
-
- TBuildSettings& GetSettings() {
- return Settings;
- }
-
- void SetSettings(const TBuildSettings& s) {
- Settings = s;
- }
-
- TEntrySet& EntrySet() {
- return *Current;
- }
-
- const TEntrySet& EntrySet() const {
- return *Current;
- }
-
- THolder<TEntrySet> ReleaseEntrySet() {
- return std::move(Current);
- }
-
- ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10);
-
- public:
- void RebuildCounts(ui32 maxcand, bool final);
- ui32 /*diff size*/ BuildNextGeneration(ui32 maxent);
-
- static bool IsCompound(ui64 ent) {
- return ent & 0xFFFFFFFF00000000ULL;
- }
-
- static ui32 Next(ui64 ent) {
- return ent;
- }
- static ui32 Prev(ui64 ent) {
- return (ent >> 32) - 1;
- }
-
- static ui64 Compose(ui32 prev, ui32 next) {
- return ((prev + 1ULL) << 32) | next;
- }
- };
+ struct TBuildSettings {
+ EEntryStatTest StatTest = EST_SIMPLE_NORM;
+ EEntryScore Score = ES_LEN_SIMPLE;
+
+ float MinPValue = 0.75;
+ ui32 MinAbsCount = 10;
+ ui32 GrowLimit = 10; // times of maxentries
+ bool Verbose = false;
+ };
+
+ class TDictBuilder {
+ using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
+ using TCandidate = std::pair<float, ui64>;
+ using TCandidates = TVector<TCandidate>;
+
+ private:
+ TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
+ TStringBufs Input;
+
+ THolder<TEntrySet> Current;
+
+ TMemoryPool CompoundCountsPool;
+ THolder<TCompoundCounts> CompoundCounts;
+
+ TCandidates Candidates;
+
+ TBuildSettings Settings;
+
+ public:
+ TDictBuilder(const TBuildSettings& s = TBuildSettings())
+ : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
+ , Settings(s)
+ {
+ }
+
+ void SetInput(const TStringBufs& in) {
+ Input = in;
+ }
+
+ const TBuildSettings& GetSettings() const {
+ return Settings;
+ }
+
+ TBuildSettings& GetSettings() {
+ return Settings;
+ }
+
+ void SetSettings(const TBuildSettings& s) {
+ Settings = s;
+ }
+
+ TEntrySet& EntrySet() {
+ return *Current;
+ }
+
+ const TEntrySet& EntrySet() const {
+ return *Current;
+ }
+
+ THolder<TEntrySet> ReleaseEntrySet() {
+ return std::move(Current);
+ }
+
+ ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10);
+
+ public:
+ void RebuildCounts(ui32 maxcand, bool final);
+ ui32 /*diff size*/ BuildNextGeneration(ui32 maxent);
+
+ static bool IsCompound(ui64 ent) {
+ return ent & 0xFFFFFFFF00000000ULL;
+ }
+
+ static ui32 Next(ui64 ent) {
+ return ent;
+ }
+ static ui32 Prev(ui64 ent) {
+ return (ent >> 32) - 1;
+ }
+
+ static ui64 Compose(ui32 prev, ui32 next) {
+ return ((prev + 1ULL) << 32) | next;
+ }
+ };
}