From 6b780718b1af069992f4f7311c1cb753c8a68d05 Mon Sep 17 00:00:00 2001
From: alzobnin <alzobnin@yandex-team.com>
Date: Thu, 15 Dec 2022 18:44:25 +0300
Subject: Restrict max length of learned prefixes and fix solar codec

---
 library/cpp/codecs/greedy_dict/gd_builder.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'library/cpp/codecs/greedy_dict/gd_builder.cpp')

diff --git a/library/cpp/codecs/greedy_dict/gd_builder.cpp b/library/cpp/codecs/greedy_dict/gd_builder.cpp
index 844e07d5a0..33e104926e 100644
--- a/library/cpp/codecs/greedy_dict/gd_builder.cpp
+++ b/library/cpp/codecs/greedy_dict/gd_builder.cpp
@@ -53,7 +53,7 @@ namespace NGreedyDict {
         Current->SetModelP();
     }
 
-    ui32 TDictBuilder::BuildNextGeneration(ui32 maxent) {
+    ui32 TDictBuilder::BuildNextGeneration(ui32 maxent, ui32 maxlen) {
         TAutoPtr<TEntrySet> newset = new TEntrySet;
         newset->InitWithAlpha();
         maxent -= newset->size();
@@ -86,7 +86,7 @@ namespace NGreedyDict {
                     const TEntry& next = set.Get(Next(it->first));
                     float modelp = ModelP(prev.Count, next.Count, total);
                     ui32 cnt = it->second;
-                    if (cnt > mincnt && StatTest(test, modelp, cnt, total) > minpval)
+                    if (cnt > mincnt && StatTest(test, modelp, cnt, total) > minpval && prev.Len() + next.Len() <= maxlen)
                         Candidates.push_back(TCandidate(-Score(score, prev.Len() + next.Len(), modelp, cnt, total), it->first));
                 }
             }
@@ -113,7 +113,7 @@ namespace NGreedyDict {
         return deletions + additions;
     }
 
-    ui32 TDictBuilder::Build(ui32 maxentries, ui32 maxiters, ui32 mindiff) {
+    ui32 TDictBuilder::Build(ui32 maxentries, ui32 maxiters, ui32 maxlen, ui32 mindiff) {
         /* size_t totalsz = 0;
         for (auto it : Input)
             totalsz += it.size();*/
@@ -128,7 +128,7 @@ namespace NGreedyDict {
                 Clog << Sprintf("%-110s RSS=%" PRIu32 "M", mess.data(), (ui32)(TRusage::Get().MaxRss >> 20)) << Endl;
             }
 
-            ui32 diff = BuildNextGeneration(maxentries);
+            ui32 diff = BuildNextGeneration(maxentries, maxlen);
 
             if (Current->size() == maxentries && diff < mindiff)
                 break;
-- 
cgit v1.2.3