diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/codecs/greedy_dict/gd_builder.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict/gd_builder.h')
-rw-r--r-- | library/cpp/codecs/greedy_dict/gd_builder.h | 168 |
1 files changed, 84 insertions, 84 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.h b/library/cpp/codecs/greedy_dict/gd_builder.h index b8e9a5e37b..ab0057e1ca 100644 --- a/library/cpp/codecs/greedy_dict/gd_builder.h +++ b/library/cpp/codecs/greedy_dict/gd_builder.h @@ -6,89 +6,89 @@ #include <util/random/fast.h> namespace NGreedyDict { - struct TBuildSettings { - EEntryStatTest StatTest = EST_SIMPLE_NORM; - EEntryScore Score = ES_LEN_SIMPLE; - - float MinPValue = 0.75; - ui32 MinAbsCount = 10; - ui32 GrowLimit = 10; // times of maxentries - bool Verbose = false; - }; - - class TDictBuilder { - using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>; - using TCandidate = std::pair<float, ui64>; - using TCandidates = TVector<TCandidate>; - - private: - TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb}; - TStringBufs Input; - - THolder<TEntrySet> Current; - - TMemoryPool CompoundCountsPool; - THolder<TCompoundCounts> CompoundCounts; - - TCandidates Candidates; - - TBuildSettings Settings; - - public: - TDictBuilder(const TBuildSettings& s = TBuildSettings()) - : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance()) - , Settings(s) - { - } - - void SetInput(const TStringBufs& in) { - Input = in; - } - - const TBuildSettings& GetSettings() const { - return Settings; - } - - TBuildSettings& GetSettings() { - return Settings; - } - - void SetSettings(const TBuildSettings& s) { - Settings = s; - } - - TEntrySet& EntrySet() { - return *Current; - } - - const TEntrySet& EntrySet() const { - return *Current; - } - - THolder<TEntrySet> ReleaseEntrySet() { - return std::move(Current); - } - - ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10); - - public: - void RebuildCounts(ui32 maxcand, bool final); - ui32 /*diff size*/ BuildNextGeneration(ui32 maxent); - - static bool IsCompound(ui64 ent) { - return ent & 0xFFFFFFFF00000000ULL; - } - - static ui32 Next(ui64 ent) { - return ent; - } - static ui32 Prev(ui64 ent) { - return (ent >> 32) - 1; - } - - static ui64 Compose(ui32 prev, ui32 next) { - return ((prev + 1ULL) << 32) | next; - } - }; + struct TBuildSettings { + EEntryStatTest StatTest = EST_SIMPLE_NORM; + EEntryScore Score = ES_LEN_SIMPLE; + + float MinPValue = 0.75; + ui32 MinAbsCount = 10; + ui32 GrowLimit = 10; // times of maxentries + bool Verbose = false; + }; + + class TDictBuilder { + using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>; + using TCandidate = std::pair<float, ui64>; + using TCandidates = TVector<TCandidate>; + + private: + TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb}; + TStringBufs Input; + + THolder<TEntrySet> Current; + + TMemoryPool CompoundCountsPool; + THolder<TCompoundCounts> CompoundCounts; + + TCandidates Candidates; + + TBuildSettings Settings; + + public: + TDictBuilder(const TBuildSettings& s = TBuildSettings()) + : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance()) + , Settings(s) + { + } + + void SetInput(const TStringBufs& in) { + Input = in; + } + + const TBuildSettings& GetSettings() const { + return Settings; + } + + TBuildSettings& GetSettings() { + return Settings; + } + + void SetSettings(const TBuildSettings& s) { + Settings = s; + } + + TEntrySet& EntrySet() { + return *Current; + } + + const TEntrySet& EntrySet() const { + return *Current; + } + + THolder<TEntrySet> ReleaseEntrySet() { + return std::move(Current); + } + + ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10); + + public: + void RebuildCounts(ui32 maxcand, bool final); + ui32 /*diff size*/ BuildNextGeneration(ui32 maxent); + + static bool IsCompound(ui64 ent) { + return ent & 0xFFFFFFFF00000000ULL; + } + + static ui32 Next(ui64 ent) { + return ent; + } + static ui32 Prev(ui64 ent) { + return (ent >> 32) - 1; + } + + static ui64 Compose(ui32 prev, ui32 next) { + return ((prev + 1ULL) << 32) | next; + } + }; } |