diff options
author | Ruslan Kovalev <ruslan.a.kovalev@gmail.com> | 2022-02-10 16:46:44 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:44 +0300 |
commit | 59e19371de37995fcb36beb16cd6ec030af960bc (patch) | |
tree | fa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/greedy_dict/gd_builder.h | |
parent | 89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff) | |
download | ydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz |
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_builder.h')
-rw-r--r-- | library/cpp/codecs/greedy_dict/gd_builder.h | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.h b/library/cpp/codecs/greedy_dict/gd_builder.h index b8e9a5e37b..7f3cea88cb 100644 --- a/library/cpp/codecs/greedy_dict/gd_builder.h +++ b/library/cpp/codecs/greedy_dict/gd_builder.h @@ -1,94 +1,94 @@ -#pragma once - -#include "gd_entry.h" - -#include <util/generic/hash.h> -#include <util/random/fast.h> - -namespace NGreedyDict { +#pragma once + +#include "gd_entry.h" + +#include <util/generic/hash.h> +#include <util/random/fast.h> + +namespace NGreedyDict { struct TBuildSettings { EEntryStatTest StatTest = EST_SIMPLE_NORM; EEntryScore Score = ES_LEN_SIMPLE; - + float MinPValue = 0.75; ui32 MinAbsCount = 10; ui32 GrowLimit = 10; // times of maxentries bool Verbose = false; }; - + class TDictBuilder { using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>; using TCandidate = std::pair<float, ui64>; using TCandidates = TVector<TCandidate>; - + private: TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb}; TStringBufs Input; - + THolder<TEntrySet> Current; - + TMemoryPool CompoundCountsPool; THolder<TCompoundCounts> CompoundCounts; - + TCandidates Candidates; - + TBuildSettings Settings; - + public: TDictBuilder(const TBuildSettings& s = TBuildSettings()) : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance()) , Settings(s) { } - + void SetInput(const TStringBufs& in) { Input = in; } - + const TBuildSettings& GetSettings() const { return Settings; } - + TBuildSettings& GetSettings() { return Settings; } - + void SetSettings(const TBuildSettings& s) { Settings = s; } - + TEntrySet& EntrySet() { return *Current; } - + const TEntrySet& EntrySet() const { return *Current; } - + THolder<TEntrySet> ReleaseEntrySet() { return std::move(Current); } - + ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10); - + public: void RebuildCounts(ui32 maxcand, bool final); ui32 /*diff size*/ BuildNextGeneration(ui32 maxent); - + static bool IsCompound(ui64 ent) { return ent & 0xFFFFFFFF00000000ULL; } - + static ui32 Next(ui64 ent) { return ent; } static ui32 Prev(ui64 ent) { return (ent >> 32) - 1; } - + static ui64 Compose(ui32 prev, ui32 next) { return ((prev + 1ULL) << 32) | next; } }; - -} + +} |