aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/greedy_dict/gd_builder.h
diff options
context:
space:
mode:
authorRuslan Kovalev <ruslan.a.kovalev@gmail.com>2022-02-10 16:46:44 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:44 +0300
commit59e19371de37995fcb36beb16cd6ec030af960bc (patch)
treefa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/greedy_dict/gd_builder.h
parent89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff)
downloadydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_builder.h')
-rw-r--r--library/cpp/codecs/greedy_dict/gd_builder.h60
1 files changed, 30 insertions, 30 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.h b/library/cpp/codecs/greedy_dict/gd_builder.h
index b8e9a5e37b..7f3cea88cb 100644
--- a/library/cpp/codecs/greedy_dict/gd_builder.h
+++ b/library/cpp/codecs/greedy_dict/gd_builder.h
@@ -1,94 +1,94 @@
-#pragma once
-
-#include "gd_entry.h"
-
-#include <util/generic/hash.h>
-#include <util/random/fast.h>
-
-namespace NGreedyDict {
+#pragma once
+
+#include "gd_entry.h"
+
+#include <util/generic/hash.h>
+#include <util/random/fast.h>
+
+namespace NGreedyDict {
struct TBuildSettings {
EEntryStatTest StatTest = EST_SIMPLE_NORM;
EEntryScore Score = ES_LEN_SIMPLE;
-
+
float MinPValue = 0.75;
ui32 MinAbsCount = 10;
ui32 GrowLimit = 10; // times of maxentries
bool Verbose = false;
};
-
+
class TDictBuilder {
using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
using TCandidate = std::pair<float, ui64>;
using TCandidates = TVector<TCandidate>;
-
+
private:
TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
TStringBufs Input;
-
+
THolder<TEntrySet> Current;
-
+
TMemoryPool CompoundCountsPool;
THolder<TCompoundCounts> CompoundCounts;
-
+
TCandidates Candidates;
-
+
TBuildSettings Settings;
-
+
public:
TDictBuilder(const TBuildSettings& s = TBuildSettings())
: CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
, Settings(s)
{
}
-
+
void SetInput(const TStringBufs& in) {
Input = in;
}
-
+
const TBuildSettings& GetSettings() const {
return Settings;
}
-
+
TBuildSettings& GetSettings() {
return Settings;
}
-
+
void SetSettings(const TBuildSettings& s) {
Settings = s;
}
-
+
TEntrySet& EntrySet() {
return *Current;
}
-
+
const TEntrySet& EntrySet() const {
return *Current;
}
-
+
THolder<TEntrySet> ReleaseEntrySet() {
return std::move(Current);
}
-
+
ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10);
-
+
public:
void RebuildCounts(ui32 maxcand, bool final);
ui32 /*diff size*/ BuildNextGeneration(ui32 maxent);
-
+
static bool IsCompound(ui64 ent) {
return ent & 0xFFFFFFFF00000000ULL;
}
-
+
static ui32 Next(ui64 ent) {
return ent;
}
static ui32 Prev(ui64 ent) {
return (ent >> 32) - 1;
}
-
+
static ui64 Compose(ui32 prev, ui32 next) {
return ((prev + 1ULL) << 32) | next;
}
};
-
-}
+
+}