diff options
author | alzobnin <alzobnin@yandex-team.com> | 2022-12-15 18:44:25 +0300 |
---|---|---|
committer | alzobnin <alzobnin@yandex-team.com> | 2022-12-15 18:44:25 +0300 |
commit | 6b780718b1af069992f4f7311c1cb753c8a68d05 (patch) | |
tree | b6904be74d2722cf575508e05be1bb81184785e3 /library/cpp/codecs/solar_codec.cpp | |
parent | feb341993178f4dc73afc5930dcb1442ec306bfd (diff) | |
download | ydb-6b780718b1af069992f4f7311c1cb753c8a68d05.tar.gz |
Restrict max length of learned prefixes and fix solar codec
Diffstat (limited to 'library/cpp/codecs/solar_codec.cpp')
-rw-r--r-- | library/cpp/codecs/solar_codec.cpp | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/library/cpp/codecs/solar_codec.cpp b/library/cpp/codecs/solar_codec.cpp index d0692fe2a4..916bbbd5d1 100644 --- a/library/cpp/codecs/solar_codec.cpp +++ b/library/cpp/codecs/solar_codec.cpp @@ -17,6 +17,8 @@ namespace NCodecs { void TSolarCodec::DoLearn(ISequenceReader& r) { using namespace NGreedyDict; + const ui32 maxlen = Max<ui32>() / Max<ui32>(MaxEntries, 1); + Decoder.clear(); Pool.Clear(); @@ -34,7 +36,7 @@ namespace NCodecs { { TDictBuilder b(Settings); b.SetInput(bufs); - b.Build(MaxEntries, MaxIterations); + b.Build(MaxEntries, MaxIterations, maxlen); set = b.ReleaseEntrySet(); } @@ -47,7 +49,8 @@ namespace NCodecs { tmp.reserve(set->size()); for (const auto& it : *set) { - tmp.push_back(std::make_pair(-it.Score, TStringBuf(it.Str).Trunc(Max<ui32>() / Max<ui32>(MaxEntries, 1)))); + Y_ENSURE(it.Str.Size() <= maxlen); + tmp.push_back(std::make_pair(-it.Score, it.Str)); } Sort(tmp.begin(), tmp.end()); |