aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/greedy_dict
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/codecs/greedy_dict
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/greedy_dict')
-rw-r--r--library/cpp/codecs/greedy_dict/gd_builder.cpp190
-rw-r--r--library/cpp/codecs/greedy_dict/gd_builder.h168
-rw-r--r--library/cpp/codecs/greedy_dict/gd_entry.cpp126
-rw-r--r--library/cpp/codecs/greedy_dict/gd_entry.h126
-rw-r--r--library/cpp/codecs/greedy_dict/gd_stats.h116
-rw-r--r--library/cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp8
6 files changed, 367 insertions, 367 deletions
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.cpp b/library/cpp/codecs/greedy_dict/gd_builder.cpp
index 561bfbca01..802c721753 100644
--- a/library/cpp/codecs/greedy_dict/gd_builder.cpp
+++ b/library/cpp/codecs/greedy_dict/gd_builder.cpp
@@ -9,134 +9,134 @@
#include <util/system/rusage.h>
namespace NGreedyDict {
- void TDictBuilder::RebuildCounts(ui32 maxcand, bool final) {
- if (!Current) {
+ void TDictBuilder::RebuildCounts(ui32 maxcand, bool final) {
+ if (!Current) {
Current = MakeHolder<TEntrySet>();
- Current->InitWithAlpha();
- }
+ Current->InitWithAlpha();
+ }
- TEntrySet& set = *Current;
+ TEntrySet& set = *Current;
- for (auto& it : set)
- it.Count = 0;
+ for (auto& it : set)
+ it.Count = 0;
- CompoundCounts = nullptr;
- CompoundCountsPool.Clear();
+ CompoundCounts = nullptr;
+ CompoundCountsPool.Clear();
- if (!final) {
+ if (!final) {
CompoundCounts = MakeHolder<TCompoundCounts>(&CompoundCountsPool);
- CompoundCounts->reserve(maxcand);
- }
+ CompoundCounts->reserve(maxcand);
+ }
- Shuffle(Input.begin(), Input.end(), Rng);
+ Shuffle(Input.begin(), Input.end(), Rng);
- for (auto str : Input) {
- if (!final && CompoundCounts->size() > maxcand)
- break;
+ for (auto str : Input) {
+ if (!final && CompoundCounts->size() > maxcand)
+ break;
- i32 prev = -1;
+ i32 prev = -1;
- while (!!str) {
- TEntry* e = set.FindPrefix(str);
- ui32 num = e->Number;
+ while (!!str) {
+ TEntry* e = set.FindPrefix(str);
+ ui32 num = e->Number;
- e->Count += 1;
- if (!final && prev >= 0) {
- (*CompoundCounts)[Compose(prev, num)] += 1;
- }
+ e->Count += 1;
+ if (!final && prev >= 0) {
+ (*CompoundCounts)[Compose(prev, num)] += 1;
+ }
- prev = num;
- ++set.TotalCount;
+ prev = num;
+ ++set.TotalCount;
}
- }
+ }
- Current->SetModelP();
+ Current->SetModelP();
}
- ui32 TDictBuilder::BuildNextGeneration(ui32 maxent) {
- TAutoPtr<TEntrySet> newset = new TEntrySet;
- newset->InitWithAlpha();
- maxent -= newset->size();
-
- ui32 additions = 0;
- ui32 deletions = 0;
-
- {
- const TEntrySet& set = *Current;
-
- Candidates.clear();
- const ui32 total = set.TotalCount;
- const float minpval = Settings.MinPValue;
- const EEntryStatTest test = Settings.StatTest;
- const EEntryScore score = Settings.Score;
- const ui32 mincnt = Settings.MinAbsCount;
-
- for (const auto& it : set) {
- const TEntry& e = it;
- float modelp = e.ModelP;
- ui32 cnt = e.Count;
-
- if (e.HasPrefix() && e.Count > mincnt && StatTest(test, modelp, cnt, total) > minpval)
- Candidates.push_back(TCandidate(-Score(score, e.Len(), modelp, cnt, total), it.Number));
- }
-
- if (!!CompoundCounts) {
- for (TCompoundCounts::const_iterator it = CompoundCounts->begin(); it != CompoundCounts->end(); ++it) {
- const TEntry& prev = set.Get(Prev(it->first));
- const TEntry& next = set.Get(Next(it->first));
- float modelp = ModelP(prev.Count, next.Count, total);
- ui32 cnt = it->second;
- if (cnt > mincnt && StatTest(test, modelp, cnt, total) > minpval)
- Candidates.push_back(TCandidate(-Score(score, prev.Len() + next.Len(), modelp, cnt, total), it->first));
- }
+ ui32 TDictBuilder::BuildNextGeneration(ui32 maxent) {
+ TAutoPtr<TEntrySet> newset = new TEntrySet;
+ newset->InitWithAlpha();
+ maxent -= newset->size();
+
+ ui32 additions = 0;
+ ui32 deletions = 0;
+
+ {
+ const TEntrySet& set = *Current;
+
+ Candidates.clear();
+ const ui32 total = set.TotalCount;
+ const float minpval = Settings.MinPValue;
+ const EEntryStatTest test = Settings.StatTest;
+ const EEntryScore score = Settings.Score;
+ const ui32 mincnt = Settings.MinAbsCount;
+
+ for (const auto& it : set) {
+ const TEntry& e = it;
+ float modelp = e.ModelP;
+ ui32 cnt = e.Count;
+
+ if (e.HasPrefix() && e.Count > mincnt && StatTest(test, modelp, cnt, total) > minpval)
+ Candidates.push_back(TCandidate(-Score(score, e.Len(), modelp, cnt, total), it.Number));
+ }
+
+ if (!!CompoundCounts) {
+ for (TCompoundCounts::const_iterator it = CompoundCounts->begin(); it != CompoundCounts->end(); ++it) {
+ const TEntry& prev = set.Get(Prev(it->first));
+ const TEntry& next = set.Get(Next(it->first));
+ float modelp = ModelP(prev.Count, next.Count, total);
+ ui32 cnt = it->second;
+ if (cnt > mincnt && StatTest(test, modelp, cnt, total) > minpval)
+ Candidates.push_back(TCandidate(-Score(score, prev.Len() + next.Len(), modelp, cnt, total), it->first));
+ }
}
- Sort(Candidates.begin(), Candidates.end());
+ Sort(Candidates.begin(), Candidates.end());
- if (Candidates.size() > maxent)
- Candidates.resize(maxent);
+ if (Candidates.size() > maxent)
+ Candidates.resize(maxent);
- for (const auto& candidate : Candidates) {
- if (IsCompound(candidate.second)) {
- additions++;
- newset->Add(set.Get(Prev(candidate.second)).Str, set.Get(Next(candidate.second)).Str);
- } else {
- newset->Add(set.Get(candidate.second).Str);
- }
+ for (const auto& candidate : Candidates) {
+ if (IsCompound(candidate.second)) {
+ additions++;
+ newset->Add(set.Get(Prev(candidate.second)).Str, set.Get(Next(candidate.second)).Str);
+ } else {
+ newset->Add(set.Get(candidate.second).Str);
+ }
}
-
- deletions = set.size() - (newset->size() - additions);
+
+ deletions = set.size() - (newset->size() - additions);
}
- Current = newset;
- Current->BuildHierarchy();
- return deletions + additions;
+ Current = newset;
+ Current->BuildHierarchy();
+ return deletions + additions;
}
- ui32 TDictBuilder::Build(ui32 maxentries, ui32 maxiters, ui32 mindiff) {
- size_t totalsz = 0;
- for (auto it : Input)
+ ui32 TDictBuilder::Build(ui32 maxentries, ui32 maxiters, ui32 mindiff) {
+ size_t totalsz = 0;
+ for (auto it : Input)
totalsz += it.size();
- while (maxiters) {
- maxiters--;
+ while (maxiters) {
+ maxiters--;
- RebuildCounts(maxentries * Settings.GrowLimit, false);
+ RebuildCounts(maxentries * Settings.GrowLimit, false);
- if (Settings.Verbose) {
- TString mess = Sprintf("iter:%" PRIu32 " sz:%" PRIu32 " pend:%" PRIu32, maxiters, (ui32)Current->size(), (ui32)CompoundCounts->size());
+ if (Settings.Verbose) {
+ TString mess = Sprintf("iter:%" PRIu32 " sz:%" PRIu32 " pend:%" PRIu32, maxiters, (ui32)Current->size(), (ui32)CompoundCounts->size());
Clog << Sprintf("%-110s RSS=%" PRIu32 "M", mess.data(), (ui32)(TRusage::Get().MaxRss >> 20)) << Endl;
- }
-
- ui32 diff = BuildNextGeneration(maxentries);
+ }
- if (Current->size() == maxentries && diff < mindiff)
- break;
+ ui32 diff = BuildNextGeneration(maxentries);
+
+ if (Current->size() == maxentries && diff < mindiff)
+ break;
}
- RebuildCounts(0, true);
- Current->SetScores(Settings.Score);
- return maxiters;
+ RebuildCounts(0, true);
+ Current->SetScores(Settings.Score);
+ return maxiters;
}
}
diff --git a/library/cpp/codecs/greedy_dict/gd_builder.h b/library/cpp/codecs/greedy_dict/gd_builder.h
index b8e9a5e37b..ab0057e1ca 100644
--- a/library/cpp/codecs/greedy_dict/gd_builder.h
+++ b/library/cpp/codecs/greedy_dict/gd_builder.h
@@ -6,89 +6,89 @@
#include <util/random/fast.h>
namespace NGreedyDict {
- struct TBuildSettings {
- EEntryStatTest StatTest = EST_SIMPLE_NORM;
- EEntryScore Score = ES_LEN_SIMPLE;
-
- float MinPValue = 0.75;
- ui32 MinAbsCount = 10;
- ui32 GrowLimit = 10; // times of maxentries
- bool Verbose = false;
- };
-
- class TDictBuilder {
- using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
- using TCandidate = std::pair<float, ui64>;
- using TCandidates = TVector<TCandidate>;
-
- private:
- TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
- TStringBufs Input;
-
- THolder<TEntrySet> Current;
-
- TMemoryPool CompoundCountsPool;
- THolder<TCompoundCounts> CompoundCounts;
-
- TCandidates Candidates;
-
- TBuildSettings Settings;
-
- public:
- TDictBuilder(const TBuildSettings& s = TBuildSettings())
- : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
- , Settings(s)
- {
- }
-
- void SetInput(const TStringBufs& in) {
- Input = in;
- }
-
- const TBuildSettings& GetSettings() const {
- return Settings;
- }
-
- TBuildSettings& GetSettings() {
- return Settings;
- }
-
- void SetSettings(const TBuildSettings& s) {
- Settings = s;
- }
-
- TEntrySet& EntrySet() {
- return *Current;
- }
-
- const TEntrySet& EntrySet() const {
- return *Current;
- }
-
- THolder<TEntrySet> ReleaseEntrySet() {
- return std::move(Current);
- }
-
- ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10);
-
- public:
- void RebuildCounts(ui32 maxcand, bool final);
- ui32 /*diff size*/ BuildNextGeneration(ui32 maxent);
-
- static bool IsCompound(ui64 ent) {
- return ent & 0xFFFFFFFF00000000ULL;
- }
-
- static ui32 Next(ui64 ent) {
- return ent;
- }
- static ui32 Prev(ui64 ent) {
- return (ent >> 32) - 1;
- }
-
- static ui64 Compose(ui32 prev, ui32 next) {
- return ((prev + 1ULL) << 32) | next;
- }
- };
+ struct TBuildSettings {
+ EEntryStatTest StatTest = EST_SIMPLE_NORM;
+ EEntryScore Score = ES_LEN_SIMPLE;
+
+ float MinPValue = 0.75;
+ ui32 MinAbsCount = 10;
+ ui32 GrowLimit = 10; // times of maxentries
+ bool Verbose = false;
+ };
+
+ class TDictBuilder {
+ using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
+ using TCandidate = std::pair<float, ui64>;
+ using TCandidates = TVector<TCandidate>;
+
+ private:
+ TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
+ TStringBufs Input;
+
+ THolder<TEntrySet> Current;
+
+ TMemoryPool CompoundCountsPool;
+ THolder<TCompoundCounts> CompoundCounts;
+
+ TCandidates Candidates;
+
+ TBuildSettings Settings;
+
+ public:
+ TDictBuilder(const TBuildSettings& s = TBuildSettings())
+ : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
+ , Settings(s)
+ {
+ }
+
+ void SetInput(const TStringBufs& in) {
+ Input = in;
+ }
+
+ const TBuildSettings& GetSettings() const {
+ return Settings;
+ }
+
+ TBuildSettings& GetSettings() {
+ return Settings;
+ }
+
+ void SetSettings(const TBuildSettings& s) {
+ Settings = s;
+ }
+
+ TEntrySet& EntrySet() {
+ return *Current;
+ }
+
+ const TEntrySet& EntrySet() const {
+ return *Current;
+ }
+
+ THolder<TEntrySet> ReleaseEntrySet() {
+ return std::move(Current);
+ }
+
+ ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 mindiff = 10);
+
+ public:
+ void RebuildCounts(ui32 maxcand, bool final);
+ ui32 /*diff size*/ BuildNextGeneration(ui32 maxent);
+
+ static bool IsCompound(ui64 ent) {
+ return ent & 0xFFFFFFFF00000000ULL;
+ }
+
+ static ui32 Next(ui64 ent) {
+ return ent;
+ }
+ static ui32 Prev(ui64 ent) {
+ return (ent >> 32) - 1;
+ }
+
+ static ui64 Compose(ui32 prev, ui32 next) {
+ return ((prev + 1ULL) << 32) | next;
+ }
+ };
}
diff --git a/library/cpp/codecs/greedy_dict/gd_entry.cpp b/library/cpp/codecs/greedy_dict/gd_entry.cpp
index 2c315c7f7c..0603a9fca8 100644
--- a/library/cpp/codecs/greedy_dict/gd_entry.cpp
+++ b/library/cpp/codecs/greedy_dict/gd_entry.cpp
@@ -5,94 +5,94 @@
#include <util/generic/singleton.h>
namespace NGreedyDict {
- class TAlphas {
- char Memory[512];
-
- public:
- TStringBufs Alphas;
-
- TAlphas() {
- for (ui32 i = 0; i < 256; ++i) {
- Memory[2 * i] = (char)i;
- Memory[2 * i + 1] = 0;
-
- Alphas.push_back(TStringBuf(&Memory[2 * i], 1));
- }
+ class TAlphas {
+ char Memory[512];
+
+ public:
+ TStringBufs Alphas;
+
+ TAlphas() {
+ for (ui32 i = 0; i < 256; ++i) {
+ Memory[2 * i] = (char)i;
+ Memory[2 * i + 1] = 0;
+
+ Alphas.push_back(TStringBuf(&Memory[2 * i], 1));
+ }
+ }
+ };
+
+ void TEntrySet::InitWithAlpha() {
+ Pool.ClearKeepFirstChunk();
+ const TStringBufs& a = Singleton<TAlphas>()->Alphas;
+ for (auto it : a) {
+ Add(it);
}
- };
-
- void TEntrySet::InitWithAlpha() {
- Pool.ClearKeepFirstChunk();
- const TStringBufs& a = Singleton<TAlphas>()->Alphas;
- for (auto it : a) {
- Add(it);
- }
- BuildHierarchy();
+ BuildHierarchy();
}
- void TEntrySet::BuildHierarchy() {
- Sort(begin(), end(), TEntry::StrLess);
+ void TEntrySet::BuildHierarchy() {
+ Sort(begin(), end(), TEntry::StrLess);
- TCompactTrieBuilder<char, ui32, TAsIsPacker<ui32>> builder(CTBF_PREFIX_GROUPED);
+ TCompactTrieBuilder<char, ui32, TAsIsPacker<ui32>> builder(CTBF_PREFIX_GROUPED);
- for (iterator it = begin(); it != end(); ++it) {
- it->Number = (it - begin());
- TStringBuf suff = it->Str;
- size_t len = 0;
- ui32 val = 0;
+ for (iterator it = begin(); it != end(); ++it) {
+ it->Number = (it - begin());
+ TStringBuf suff = it->Str;
+ size_t len = 0;
+ ui32 val = 0;
if (builder.FindLongestPrefix(suff.data(), suff.size(), &len, &val) && len) {
- it->NearestPrefix = val;
- }
+ it->NearestPrefix = val;
+ }
builder.Add(suff.data(), suff.size(), it->Number);
}
- TBufferOutput bout;
- builder.Save(bout);
- Trie.Init(TBlob::FromBuffer(bout.Buffer()));
+ TBufferOutput bout;
+ builder.Save(bout);
+ Trie.Init(TBlob::FromBuffer(bout.Buffer()));
}
- TEntry* TEntrySet::FindPrefix(TStringBuf& str) {
- size_t len = 0;
- ui32 off = 0;
+ TEntry* TEntrySet::FindPrefix(TStringBuf& str) {
+ size_t len = 0;
+ ui32 off = 0;
- if (!Trie.FindLongestPrefix(str, &len, &off)) {
- return nullptr;
- }
+ if (!Trie.FindLongestPrefix(str, &len, &off)) {
+ return nullptr;
+ }
- str.Skip(len);
- return &Get(off);
+ str.Skip(len);
+ return &Get(off);
}
- void TEntrySet::SetModelP() {
- for (iterator it = begin(); it != end(); ++it) {
- TEntry& e = *it;
+ void TEntrySet::SetModelP() {
+ for (iterator it = begin(); it != end(); ++it) {
+ TEntry& e = *it;
- if (!e.HasPrefix()) {
- e.ModelP = 0;
- continue;
- }
+ if (!e.HasPrefix()) {
+ e.ModelP = 0;
+ continue;
+ }
- TStringBuf suff = e.Str;
- const TEntry& p = Get(e.NearestPrefix);
- suff.Skip(p.Len());
+ TStringBuf suff = e.Str;
+ const TEntry& p = Get(e.NearestPrefix);
+ suff.Skip(p.Len());
- float modelp = float(p.Count + e.Count) / TotalCount;
+ float modelp = float(p.Count + e.Count) / TotalCount;
- while (!!suff) {
- TEntry* pp = FindPrefix(suff);
- modelp *= float(pp->Count + e.Count) / TotalCount;
- }
+ while (!!suff) {
+ TEntry* pp = FindPrefix(suff);
+ modelp *= float(pp->Count + e.Count) / TotalCount;
+ }
- e.ModelP = modelp;
+ e.ModelP = modelp;
}
}
- void TEntrySet::SetScores(EEntryScore s) {
- for (auto& it : *this) {
- it.Score = Score(s, it.Len(), it.ModelP, it.Count, TotalCount);
- }
+ void TEntrySet::SetScores(EEntryScore s) {
+ for (auto& it : *this) {
+ it.Score = Score(s, it.Len(), it.ModelP, it.Count, TotalCount);
+ }
}
}
diff --git a/library/cpp/codecs/greedy_dict/gd_entry.h b/library/cpp/codecs/greedy_dict/gd_entry.h
index 18b5be0e15..e123c66b4a 100644
--- a/library/cpp/codecs/greedy_dict/gd_entry.h
+++ b/library/cpp/codecs/greedy_dict/gd_entry.h
@@ -11,93 +11,93 @@
#include <util/memory/pool.h>
namespace NGreedyDict {
- using TStringBufs = TVector<TStringBuf>;
+ using TStringBufs = TVector<TStringBuf>;
- struct TEntry {
- static const i32 NoPrefix = -1;
+ struct TEntry {
+ static const i32 NoPrefix = -1;
- TStringBuf Str;
+ TStringBuf Str;
- i32 NearestPrefix = NoPrefix;
- ui32 Count = 0;
- ui32 Number = 0;
- float ModelP = 0;
- float Score = 0;
+ i32 NearestPrefix = NoPrefix;
+ ui32 Count = 0;
+ ui32 Number = 0;
+ float ModelP = 0;
+ float Score = 0;
- TEntry(TStringBuf b = TStringBuf(), ui32 cnt = 0)
- : Str(b)
- , Count(cnt)
- {
- }
+ TEntry(TStringBuf b = TStringBuf(), ui32 cnt = 0)
+ : Str(b)
+ , Count(cnt)
+ {
+ }
- bool HasPrefix() const {
- return NearestPrefix != NoPrefix;
- }
- ui32 Len() const {
+ bool HasPrefix() const {
+ return NearestPrefix != NoPrefix;
+ }
+ ui32 Len() const {
return Str.size();
- }
+ }
- static bool StrLess(const TEntry& a, const TEntry& b) {
- return a.Str < b.Str;
- }
- static bool NumberLess(const TEntry& a, const TEntry& b) {
- return a.Number < b.Number;
- }
- static bool ScoreMore(const TEntry& a, const TEntry& b) {
- return a.Score > b.Score;
- }
- };
+ static bool StrLess(const TEntry& a, const TEntry& b) {
+ return a.Str < b.Str;
+ }
+ static bool NumberLess(const TEntry& a, const TEntry& b) {
+ return a.Number < b.Number;
+ }
+ static bool ScoreMore(const TEntry& a, const TEntry& b) {
+ return a.Score > b.Score;
+ }
+ };
- class TEntrySet: public TVector<TEntry>, TNonCopyable {
- TMemoryPool Pool{8112};
- TCompactTrie<char, ui32, TAsIsPacker<ui32>> Trie;
+ class TEntrySet: public TVector<TEntry>, TNonCopyable {
+ TMemoryPool Pool{8112};
+ TCompactTrie<char, ui32, TAsIsPacker<ui32>> Trie;
- public:
- ui32 TotalCount = 0;
+ public:
+ ui32 TotalCount = 0;
- void InitWithAlpha();
+ void InitWithAlpha();
- void Add(TStringBuf a) {
+ void Add(TStringBuf a) {
push_back(TStringBuf(Pool.Append(a.data(), a.size()), a.size()));
- }
+ }
- void Add(TStringBuf a, TStringBuf b) {
+ void Add(TStringBuf a, TStringBuf b) {
size_t sz = a.size() + b.size();
- char* p = (char*)Pool.Allocate(sz);
+ char* p = (char*)Pool.Allocate(sz);
memcpy(p, a.data(), a.size());
memcpy(p + a.size(), b.data(), b.size());
- push_back(TStringBuf(p, sz));
- }
+ push_back(TStringBuf(p, sz));
+ }
- TEntry& Get(ui32 idx) {
- return (*this)[idx];
- }
+ TEntry& Get(ui32 idx) {
+ return (*this)[idx];
+ }
- const TEntry& Get(ui32 idx) const {
- return (*this)[idx];
- }
+ const TEntry& Get(ui32 idx) const {
+ return (*this)[idx];
+ }
- void BuildHierarchy();
+ void BuildHierarchy();
- // longest prefix
- TEntry* FindPrefix(TStringBuf& str);
+ // longest prefix
+ TEntry* FindPrefix(TStringBuf& str);
- const TEntry* FindPrefix(TStringBuf& str) const {
- return ((TEntrySet*)this)->FindPrefix(str);
- }
+ const TEntry* FindPrefix(TStringBuf& str) const {
+ return ((TEntrySet*)this)->FindPrefix(str);
+ }
- const TEntry* FirstPrefix(const TEntry& e, TStringBuf& suff) {
- if (!e.HasPrefix())
- return nullptr;
+ const TEntry* FirstPrefix(const TEntry& e, TStringBuf& suff) {
+ if (!e.HasPrefix())
+ return nullptr;
- const TEntry& p = Get(e.NearestPrefix);
- suff = e.Str;
+ const TEntry& p = Get(e.NearestPrefix);
+ suff = e.Str;
suff.Skip(p.Str.size());
- return &p;
- }
+ return &p;
+ }
- void SetModelP();
- void SetScores(EEntryScore);
- };
+ void SetModelP();
+ void SetScores(EEntryScore);
+ };
}
diff --git a/library/cpp/codecs/greedy_dict/gd_stats.h b/library/cpp/codecs/greedy_dict/gd_stats.h
index b63c4c38d2..90f46a0fb9 100644
--- a/library/cpp/codecs/greedy_dict/gd_stats.h
+++ b/library/cpp/codecs/greedy_dict/gd_stats.h
@@ -1,78 +1,78 @@
#pragma once
-#include <util/generic/ymath.h>
+#include <util/generic/ymath.h>
#include <util/generic/algorithm.h>
#include <util/generic/yexception.h>
namespace NGreedyDict {
- enum EEntryScore {
- ES_COUNT,
- ES_LEN_COUNT,
- ES_SIMPLE,
- ES_LEN_SIMPLE,
- ES_SOLAR
- };
+ enum EEntryScore {
+ ES_COUNT,
+ ES_LEN_COUNT,
+ ES_SIMPLE,
+ ES_LEN_SIMPLE,
+ ES_SOLAR
+ };
- enum EEntryStatTest {
- EST_NONE = 0,
- EST_SIMPLE_NORM = 2
- };
+ enum EEntryStatTest {
+ EST_NONE = 0,
+ EST_SIMPLE_NORM = 2
+ };
- inline float ModelP(ui32 countA, ui32 countB, ui32 total) {
- return float(countA) * countB / total / total;
- }
+ inline float ModelP(ui32 countA, ui32 countB, ui32 total) {
+ return float(countA) * countB / total / total;
+ }
- // P (ab | dependent)
- inline float SimpleTest(float modelp, ui32 countAB, ui32 total) {
- float realp = float(countAB) / total;
- return modelp >= realp ? 0 : (realp - modelp);
- }
+ // P (ab | dependent)
+ inline float SimpleTest(float modelp, ui32 countAB, ui32 total) {
+ float realp = float(countAB) / total;
+ return modelp >= realp ? 0 : (realp - modelp);
+ }
- inline float SolarTest(float modelp, ui32 countAB, ui32 total) {
- float realp = float(countAB) / total;
- return modelp >= realp ? 0 : (modelp + realp * (log(realp / modelp) - 1));
- }
+ inline float SolarTest(float modelp, ui32 countAB, ui32 total) {
+ float realp = float(countAB) / total;
+ return modelp >= realp ? 0 : (modelp + realp * (log(realp / modelp) - 1));
+ }
- // P (ab | dependent) / P (ab)
- inline float SimpleTestNorm(float modelp, ui32 countAB, ui32 total) {
- float realp = float(countAB) / total;
- return modelp >= realp ? 0 : (realp - modelp) / realp;
- }
+ // P (ab | dependent) / P (ab)
+ inline float SimpleTestNorm(float modelp, ui32 countAB, ui32 total) {
+ float realp = float(countAB) / total;
+ return modelp >= realp ? 0 : (realp - modelp) / realp;
+ }
- inline float StatTest(EEntryStatTest test, float modelp, ui32 countAB, ui32 total) {
- if (!total) {
- return 0;
- }
- switch (test) {
- case EST_NONE:
- return 1;
- case EST_SIMPLE_NORM:
- return SimpleTestNorm(modelp, countAB, total);
- }
- Y_FAIL("no way!");
+ inline float StatTest(EEntryStatTest test, float modelp, ui32 countAB, ui32 total) {
+ if (!total) {
+ return 0;
+ }
+ switch (test) {
+ case EST_NONE:
+ return 1;
+ case EST_SIMPLE_NORM:
+ return SimpleTestNorm(modelp, countAB, total);
+ }
+ Y_FAIL("no way!");
return 0;
}
- inline float Score(EEntryScore score, ui32 len, float modelp, ui32 count, ui32 total) {
- if (!total) {
- return 0;
- }
- ui32 m = 1;
- switch (score) {
- case ES_LEN_COUNT:
- m = len;
+ inline float Score(EEntryScore score, ui32 len, float modelp, ui32 count, ui32 total) {
+ if (!total) {
+ return 0;
+ }
+ ui32 m = 1;
+ switch (score) {
+ case ES_LEN_COUNT:
+ m = len;
[[fallthrough]];
- case ES_COUNT:
- return m * count;
- case ES_LEN_SIMPLE:
- m = len;
+ case ES_COUNT:
+ return m * count;
+ case ES_LEN_SIMPLE:
+ m = len;
[[fallthrough]];
- case ES_SIMPLE:
- return m * SimpleTest(modelp, count, total);
- case ES_SOLAR:
- return SolarTest(modelp, count, total);
- }
- Y_FAIL("no way!");
+ case ES_SIMPLE:
+ return m * SimpleTest(modelp, count, total);
+ case ES_SOLAR:
+ return SolarTest(modelp, count, total);
+ }
+ Y_FAIL("no way!");
return 0;
}
diff --git a/library/cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp b/library/cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp
index 679089a11b..e33976d333 100644
--- a/library/cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp
+++ b/library/cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp
@@ -6,11 +6,11 @@
#include <util/generic/ymath.h>
class TGreedyDictTest: public TTestBase {
- UNIT_TEST_SUITE(TGreedyDictTest);
+ UNIT_TEST_SUITE(TGreedyDictTest);
UNIT_TEST(TestEntrySet)
UNIT_TEST(TestBuilder0)
UNIT_TEST(TestBuilder)
- UNIT_TEST_SUITE_END();
+ UNIT_TEST_SUITE_END();
void TestEntrySet() {
using namespace NGreedyDict;
@@ -120,7 +120,7 @@ class TGreedyDictTest: public TTestBase {
}
void FillData(NGreedyDict::TStringBufs& data) {
- static const char* urls[] = {"http://53.ru/car/motors/foreign/opel/tigra/", "http://abakan.24au.ru/tender/85904/", "http://anm15.gulaig.com/", "http://avto-parts.com/mercedes-benz/mercedes-benz-w220-1998-2005/category-442/category-443/", "http://ballooncousin.co.uk/", "http://benzol.ru/equipment/?id=1211&parent=514", "http://blazingseorank.com/blazing-seo-rank-free-website-analysis-to-increase-rank-and-traffic-450.html", "http://blogblaugrana.contadorwebmasters.com/", "http://bristolhash.org.uk/bh3cntct.php", "http://broker.borovichi.ru/category/item/3/1/0/8/28/257", "http://canoncompactcamerax.blogspot.com/", "http://classifieds.smashits.com/p,107881,email-to-friend.htm", "http://conferences.ksde.org/Portals/132/FallAssessment/SAVETHEDAY-FA09.pdf", "http://eway.vn/raovat/325-dien-tu-gia-dung/337-dieu-hoa/98041-b1-sua-may-lanh-quan-binh-tan-sua-may-lanh-quan-binh-chanh-hh-979676119-toan-quoc.html", "http://gallery.e2bn.org/asset73204_8-.html", "http://goplay.nsw.gov.au/activities-for-kids/by/historic-houses-trust/?startdate=2012-07-10", "http://grichards19067.multiply.com/", "http://hotkovo.egent.ru/user/89262269084/", "http://howimetyourself.com/?redirect_to=http://gomiso.com/m/suits/seasons/2/episodes/2", "http://islamqa.com/hi/ref/9014/DEAD%20PEOPLE%20GOOD%20DEEDS", "http://lapras.rutube.ru/", "http://nceluiko.ya.ru/", "http://nyanyanyanyaa.beon.ru/", "http://ozbo.com/Leaf-River-DV-7SS-7-0-MP-Game-Camera-K1-32541.html", "http://sbantom.ru/catalog/chasy/632753.html", "http://shopingoff.com/index.php?option=com_virtuemart&Itemid=65&category_id=&page=shop.browse&manufacturer_id=122&limit=32&limitstart=96", "http://shopingoff.com/katalog-odezhdy/manufacturer/62-christian-audigier.html?limit=32&start=448", "https://webwinkel.ah.nl/process?fh_location=//ecommerce/nl_NL/categories%3C%7Becommerce_shoc1%7D/it_show_product_code_1384%3E%7B10%3B20%7D/pr_startdate%3C20120519/pr_enddate%3E20120519/pr_ltc_allowed%3E%7Bbowi%7D/categories%3C%7Becommerce_shoc1_1al%7D/categories%3C%7Becommerce_shoc1_1al_1ahal%7D&&action=albert_noscript.modules.build", "http://top100.rambler.ru/navi/?theme=208/210/371&rgn=17", "http://volgogradskaya-oblast.extra-m.ru/classifieds/rabota/vakansii/banki-investicii/901467/", "http://wikien4.appspot.com/wiki/Warburg_hypothesis", "http://wola_baranowska.kamerzysta24.com.pl/", "http://www.10dot0dot0dot1.com/", "http://www.anima-redux.ru/index.php?key=gifts+teenage+girls", "http://www.aquaticabyseaworld.com/Calendar.aspx/CP/CP/CP/sp-us/CP/CP/ParkMap/Tickets/Weather.aspx", "http://www.autousa.com/360spin/2012_cadillac_ctssportwagon_3.6awdpremiumcollection.htm", "http://www.booking.com/city/gb/paignton-aireborough.html?inac=0&lang=pl", "http://www.booking.com/city/it/vodo-cadore.en.html", "http://www.booking.com/district/us/new-york/rockefeller-center.html&lang=no", "http://www.booking.com/hotel/bg/crown-fort-club.lv.html", "http://www.booking.com/hotel/ca/gouverneur-rimouski.ar.html", "http://www.booking.com/hotel/ch/l-auberge-du-chalet-a-gobet.fi.html", "http://www.booking.com/hotel/de/mark-garni.ru.html?aid=337384;label=yandex-hotel-mark-garni-68157-%7Bparam1%7D", "http://www.booking.com/hotel/de/mercure-goldschmieding-castrop-rauxel.ro.html", "http://www.booking.com/hotel/de/zollenspieker-fahrhaus.fr.html", "http://www.booking.com/hotel/es/jardin-metropolitano.ca.html", "http://www.booking.com/hotel/fr/clim.fr.html", "http://www.booking.com/hotel/fr/radisson-sas-toulouse-airport.et.html", "http://www.booking.com/hotel/gb/stgileshotel.ro.html?srfid=68c7fe42a03653a8796c84435c5299e4X16?tab=4", "http://www.booking.com/hotel/gr/rodos-park-suites.ru.html", "http://www.booking.com/hotel/id/le-grande-suites-bali.ru.html", "http://www.booking.com/hotel/it/mozart.it.html?aid=321655", "http://www.booking.com/hotel/ni/bahia-del-sol-villas.ru.html?dcid=1;dva=0", "http://www.booking.com/hotel/nl/cpschiphol.ro.html.ro.html?tab=4", "http://www.booking.com/hotel/th/laem-din.en-gb.html", "http://www.booking.com/hotel/th/tinidee-ranong.en.html", "http://www.booking.com/hotel/us/best-western-plus-merrimack-valley.hu.html", "http://www.booking.com/hotel/vn/tan-hai-long.km.html", "http://www.booking.com/landmark/au/royal-brisbane-women-s-hospital.vi.html", "http://www.booking.com/landmark/hk/nam-cheong-station.html&lang=id", "http://www.booking.com/landmark/it/spanish-steps.ca.html", "http://www.booking.com/landmark/sg/asian-civilisations-museum.html&lang=fi", "http://www.booking.com/place/fi-1376029.pt.html", "http://www.booking.com/place/tn257337.pl.html", "http://www.booking.com/region/ca/niagarafalls.ar.html&selected_currency=PLN", "http://www.booking.com/region/mx/queretaro.pt-pt.html&selected_currency=AUD", "http://www.booking.com/searchresults.en.html?city=20063074", "http://www.booking.com/searchresults.et.html?checkin=;checkout=;city=-394632", "http://www.booking.com/searchresults.lv.html?region=3936", "http://www.cevredanismanlari.com/index.php/component/k2/index.php/mevzuat/genel-yazlar/item/dosyalar/index.php?option=com_k2&view=item&id=16:iso-14001-%C3%A7evre-y%C3%B6netim-sistemi&Itemid=132&limitstart=107120", "http://www.dh-wholesaler.com/MENS-POLO-RACING-TEE-RL-p-417.html", "http://www.employabilityonline.net/", "http://www.esso.inc.ru/board/tools.php?event=profile&pname=Invinerrq", "http://www.filesurgery.ru/searchfw/kids_clothes-3.html", "http://www.furnitureandcarpetsource.com/Item.aspx?ItemID=-2107311899&ItemNum=53-T3048", "http://www.gets.cn/product/Gold-Sand-Lampwork-Glass-Beads--Flat-round--28x28x13mm_p260717.html", "http://www.gets.cn/wholesale-Sterling-Silver-Pendant-Findings-3577_S--L-Star-P-1.html?view=1&by=1", "http://www.homeandgardenadvice.com/diy/Mortgages_Loans_and_Financing/9221.html", "http://www.hongkongairport.com/eng/index.html/passenger/passenger/transport/to-from-airport/business/about-the-airport/transport/shopping/entertainment/t2/passenger/interactive-map.html", "http://www.hongkongairport.com/eng/index.html/shopping/insideshopping/all/passenger/transfer-transit/all/airline-information/shopping/entertainment/t2/business/about-the-airport/welcome.html", "http://www.hongkongairport.com/eng/index.html/transport/business/about-the-airport/transport/business/airport-authority/passenger/shopping/dining/all/dining.html", "http://www.idedge.com/index.cfm/fuseaction/category.display/category_id/298/index.cfm", "http://www.istanbulburda.com/aramalar.php", "http://www.jewelryinthenet.com/ads/AdDetail.aspx?AdID=1-0311002490689&stid=22-0111001020877", "http://www.johnnydepp.ru/forum/index.php?showtopic=1629&mode=linearplus&view=findpost&p=186977", "http://www.johnnydepp.ru/forum/index.php?showtopic=476&st=60&p=87379&", "http://www.joseleano.com/joomla/index.php/audio", "http://www.kaplicarehberi.com/tag/sakar-ilicali-kaplicalari/feed", "http://www.khaber.com.tr/arama.html?key=%C3%A7avdar", "http://www.kiz-oyunlari1.com/1783/4437/4363/1056/4170/Bump-Copter2-.html", "http://www.kiz-oyunlari1.com/3752/2612/4175/1166/3649/1047/Angelina-Oyunu.html", "http://www.kiz-oyunlari1.com/4266/3630/3665/3286/4121/301/3274/Sinir-Sinekler-.html", "http://www.kuldiga.lv/index.php?f=8&cat=371", "http://www.kuldiga.lv/index.php/img/index.php?l=lv&art_id=1836&show_c=&cat=85", "http://www.patronessa.ru/remontiruemsya/kuzovnie30raboti.html", "http://www.rapdict.org/Nu_Money?title=Talk:Nu_Money&action=edit", "http://www.serafin-phu.tabor24.com/?page=8", "http://www.shoes-store.org/brand1/Kids/Minnetonka.html", "http://www.shoes-store.org/shoes-store.xml", "http://www.way2allah.com/khotab-download-34695.htm"};
+ static const char* urls[] = {"http://53.ru/car/motors/foreign/opel/tigra/", "http://abakan.24au.ru/tender/85904/", "http://anm15.gulaig.com/", "http://avto-parts.com/mercedes-benz/mercedes-benz-w220-1998-2005/category-442/category-443/", "http://ballooncousin.co.uk/", "http://benzol.ru/equipment/?id=1211&parent=514", "http://blazingseorank.com/blazing-seo-rank-free-website-analysis-to-increase-rank-and-traffic-450.html", "http://blogblaugrana.contadorwebmasters.com/", "http://bristolhash.org.uk/bh3cntct.php", "http://broker.borovichi.ru/category/item/3/1/0/8/28/257", "http://canoncompactcamerax.blogspot.com/", "http://classifieds.smashits.com/p,107881,email-to-friend.htm", "http://conferences.ksde.org/Portals/132/FallAssessment/SAVETHEDAY-FA09.pdf", "http://eway.vn/raovat/325-dien-tu-gia-dung/337-dieu-hoa/98041-b1-sua-may-lanh-quan-binh-tan-sua-may-lanh-quan-binh-chanh-hh-979676119-toan-quoc.html", "http://gallery.e2bn.org/asset73204_8-.html", "http://goplay.nsw.gov.au/activities-for-kids/by/historic-houses-trust/?startdate=2012-07-10", "http://grichards19067.multiply.com/", "http://hotkovo.egent.ru/user/89262269084/", "http://howimetyourself.com/?redirect_to=http://gomiso.com/m/suits/seasons/2/episodes/2", "http://islamqa.com/hi/ref/9014/DEAD%20PEOPLE%20GOOD%20DEEDS", "http://lapras.rutube.ru/", "http://nceluiko.ya.ru/", "http://nyanyanyanyaa.beon.ru/", "http://ozbo.com/Leaf-River-DV-7SS-7-0-MP-Game-Camera-K1-32541.html", "http://sbantom.ru/catalog/chasy/632753.html", "http://shopingoff.com/index.php?option=com_virtuemart&Itemid=65&category_id=&page=shop.browse&manufacturer_id=122&limit=32&limitstart=96", "http://shopingoff.com/katalog-odezhdy/manufacturer/62-christian-audigier.html?limit=32&start=448", "https://webwinkel.ah.nl/process?fh_location=//ecommerce/nl_NL/categories%3C%7Becommerce_shoc1%7D/it_show_product_code_1384%3E%7B10%3B20%7D/pr_startdate%3C20120519/pr_enddate%3E20120519/pr_ltc_allowed%3E%7Bbowi%7D/categories%3C%7Becommerce_shoc1_1al%7D/categories%3C%7Becommerce_shoc1_1al_1ahal%7D&&action=albert_noscript.modules.build", "http://top100.rambler.ru/navi/?theme=208/210/371&rgn=17", "http://volgogradskaya-oblast.extra-m.ru/classifieds/rabota/vakansii/banki-investicii/901467/", "http://wikien4.appspot.com/wiki/Warburg_hypothesis", "http://wola_baranowska.kamerzysta24.com.pl/", "http://www.10dot0dot0dot1.com/", "http://www.anima-redux.ru/index.php?key=gifts+teenage+girls", "http://www.aquaticabyseaworld.com/Calendar.aspx/CP/CP/CP/sp-us/CP/CP/ParkMap/Tickets/Weather.aspx", "http://www.autousa.com/360spin/2012_cadillac_ctssportwagon_3.6awdpremiumcollection.htm", "http://www.booking.com/city/gb/paignton-aireborough.html?inac=0&lang=pl", "http://www.booking.com/city/it/vodo-cadore.en.html", "http://www.booking.com/district/us/new-york/rockefeller-center.html&lang=no", "http://www.booking.com/hotel/bg/crown-fort-club.lv.html", "http://www.booking.com/hotel/ca/gouverneur-rimouski.ar.html", "http://www.booking.com/hotel/ch/l-auberge-du-chalet-a-gobet.fi.html", "http://www.booking.com/hotel/de/mark-garni.ru.html?aid=337384;label=yandex-hotel-mark-garni-68157-%7Bparam1%7D", "http://www.booking.com/hotel/de/mercure-goldschmieding-castrop-rauxel.ro.html", "http://www.booking.com/hotel/de/zollenspieker-fahrhaus.fr.html", "http://www.booking.com/hotel/es/jardin-metropolitano.ca.html", "http://www.booking.com/hotel/fr/clim.fr.html", "http://www.booking.com/hotel/fr/radisson-sas-toulouse-airport.et.html", "http://www.booking.com/hotel/gb/stgileshotel.ro.html?srfid=68c7fe42a03653a8796c84435c5299e4X16?tab=4", "http://www.booking.com/hotel/gr/rodos-park-suites.ru.html", "http://www.booking.com/hotel/id/le-grande-suites-bali.ru.html", "http://www.booking.com/hotel/it/mozart.it.html?aid=321655", "http://www.booking.com/hotel/ni/bahia-del-sol-villas.ru.html?dcid=1;dva=0", "http://www.booking.com/hotel/nl/cpschiphol.ro.html.ro.html?tab=4", "http://www.booking.com/hotel/th/laem-din.en-gb.html", "http://www.booking.com/hotel/th/tinidee-ranong.en.html", "http://www.booking.com/hotel/us/best-western-plus-merrimack-valley.hu.html", "http://www.booking.com/hotel/vn/tan-hai-long.km.html", "http://www.booking.com/landmark/au/royal-brisbane-women-s-hospital.vi.html", "http://www.booking.com/landmark/hk/nam-cheong-station.html&lang=id", "http://www.booking.com/landmark/it/spanish-steps.ca.html", "http://www.booking.com/landmark/sg/asian-civilisations-museum.html&lang=fi", "http://www.booking.com/place/fi-1376029.pt.html", "http://www.booking.com/place/tn257337.pl.html", "http://www.booking.com/region/ca/niagarafalls.ar.html&selected_currency=PLN", "http://www.booking.com/region/mx/queretaro.pt-pt.html&selected_currency=AUD", "http://www.booking.com/searchresults.en.html?city=20063074", "http://www.booking.com/searchresults.et.html?checkin=;checkout=;city=-394632", "http://www.booking.com/searchresults.lv.html?region=3936", "http://www.cevredanismanlari.com/index.php/component/k2/index.php/mevzuat/genel-yazlar/item/dosyalar/index.php?option=com_k2&view=item&id=16:iso-14001-%C3%A7evre-y%C3%B6netim-sistemi&Itemid=132&limitstart=107120", "http://www.dh-wholesaler.com/MENS-POLO-RACING-TEE-RL-p-417.html", "http://www.employabilityonline.net/", "http://www.esso.inc.ru/board/tools.php?event=profile&pname=Invinerrq", "http://www.filesurgery.ru/searchfw/kids_clothes-3.html", "http://www.furnitureandcarpetsource.com/Item.aspx?ItemID=-2107311899&ItemNum=53-T3048", "http://www.gets.cn/product/Gold-Sand-Lampwork-Glass-Beads--Flat-round--28x28x13mm_p260717.html", "http://www.gets.cn/wholesale-Sterling-Silver-Pendant-Findings-3577_S--L-Star-P-1.html?view=1&by=1", "http://www.homeandgardenadvice.com/diy/Mortgages_Loans_and_Financing/9221.html", "http://www.hongkongairport.com/eng/index.html/passenger/passenger/transport/to-from-airport/business/about-the-airport/transport/shopping/entertainment/t2/passenger/interactive-map.html", "http://www.hongkongairport.com/eng/index.html/shopping/insideshopping/all/passenger/transfer-transit/all/airline-information/shopping/entertainment/t2/business/about-the-airport/welcome.html", "http://www.hongkongairport.com/eng/index.html/transport/business/about-the-airport/transport/business/airport-authority/passenger/shopping/dining/all/dining.html", "http://www.idedge.com/index.cfm/fuseaction/category.display/category_id/298/index.cfm", "http://www.istanbulburda.com/aramalar.php", "http://www.jewelryinthenet.com/ads/AdDetail.aspx?AdID=1-0311002490689&stid=22-0111001020877", "http://www.johnnydepp.ru/forum/index.php?showtopic=1629&mode=linearplus&view=findpost&p=186977", "http://www.johnnydepp.ru/forum/index.php?showtopic=476&st=60&p=87379&", "http://www.joseleano.com/joomla/index.php/audio", "http://www.kaplicarehberi.com/tag/sakar-ilicali-kaplicalari/feed", "http://www.khaber.com.tr/arama.html?key=%C3%A7avdar", "http://www.kiz-oyunlari1.com/1783/4437/4363/1056/4170/Bump-Copter2-.html", "http://www.kiz-oyunlari1.com/3752/2612/4175/1166/3649/1047/Angelina-Oyunu.html", "http://www.kiz-oyunlari1.com/4266/3630/3665/3286/4121/301/3274/Sinir-Sinekler-.html", "http://www.kuldiga.lv/index.php?f=8&cat=371", "http://www.kuldiga.lv/index.php/img/index.php?l=lv&art_id=1836&show_c=&cat=85", "http://www.patronessa.ru/remontiruemsya/kuzovnie30raboti.html", "http://www.rapdict.org/Nu_Money?title=Talk:Nu_Money&action=edit", "http://www.serafin-phu.tabor24.com/?page=8", "http://www.shoes-store.org/brand1/Kids/Minnetonka.html", "http://www.shoes-store.org/shoes-store.xml", "http://www.way2allah.com/khotab-download-34695.htm"};
data.clear();
data.insert(data.begin(), urls, urls + Y_ARRAY_SIZE(urls));
}
@@ -128,7 +128,7 @@ class TGreedyDictTest: public TTestBase {
typedef THashMap<TStringBuf, NGreedyDict::TEntry> TDict;
TAutoPtr<NGreedyDict::TEntrySet> DoTestBuilder(const NGreedyDict::TBuildSettings& s,
- TDict& res) {
+ TDict& res) {
using namespace NGreedyDict;
TStringBufs data;