aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/solar_codec.cpp
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/codecs/solar_codec.cpp
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/codecs/solar_codec.cpp')
-rw-r--r--library/cpp/codecs/solar_codec.cpp170
1 files changed, 85 insertions, 85 deletions
diff --git a/library/cpp/codecs/solar_codec.cpp b/library/cpp/codecs/solar_codec.cpp
index 6c08b9e7bd..d0692fe2a4 100644
--- a/library/cpp/codecs/solar_codec.cpp
+++ b/library/cpp/codecs/solar_codec.cpp
@@ -9,125 +9,125 @@
#include <util/ysaveload.h>
namespace NCodecs {
- static inline ui32 Append(TBuffer& pool, TStringBuf data) {
+ static inline ui32 Append(TBuffer& pool, TStringBuf data) {
pool.Append(data.data(), data.size());
- return pool.Size();
- }
+ return pool.Size();
+ }
+
+ void TSolarCodec::DoLearn(ISequenceReader& r) {
+ using namespace NGreedyDict;
- void TSolarCodec::DoLearn(ISequenceReader& r) {
- using namespace NGreedyDict;
+ Decoder.clear();
+ Pool.Clear();
- Decoder.clear();
- Pool.Clear();
+ THolder<TEntrySet> set;
- THolder<TEntrySet> set;
+ {
+ TMemoryPool pool(8112, TMemoryPool::TLinearGrow::Instance());
+ TStringBufs bufs;
- {
- TMemoryPool pool(8112, TMemoryPool::TLinearGrow::Instance());
- TStringBufs bufs;
+ TStringBuf m;
+ while (r.NextRegion(m)) {
+ bufs.push_back(pool.AppendString(m));
+ }
- TStringBuf m;
- while (r.NextRegion(m)) {
- bufs.push_back(pool.AppendString(m));
- }
+ {
+ TDictBuilder b(Settings);
+ b.SetInput(bufs);
+ b.Build(MaxEntries, MaxIterations);
- {
- TDictBuilder b(Settings);
- b.SetInput(bufs);
- b.Build(MaxEntries, MaxIterations);
-
- set = b.ReleaseEntrySet();
- }
+ set = b.ReleaseEntrySet();
+ }
}
- set->SetScores(ES_LEN_COUNT);
-
+ set->SetScores(ES_LEN_COUNT);
+
{
- TVector<std::pair<float, TStringBuf>> tmp;
- tmp.reserve(set->size());
+ TVector<std::pair<float, TStringBuf>> tmp;
+ tmp.reserve(set->size());
- for (const auto& it : *set) {
- tmp.push_back(std::make_pair(-it.Score, TStringBuf(it.Str).Trunc(Max<ui32>() / Max<ui32>(MaxEntries, 1))));
- }
+ for (const auto& it : *set) {
+ tmp.push_back(std::make_pair(-it.Score, TStringBuf(it.Str).Trunc(Max<ui32>() / Max<ui32>(MaxEntries, 1))));
+ }
- Sort(tmp.begin(), tmp.end());
+ Sort(tmp.begin(), tmp.end());
- Decoder.reserve(tmp.size() + 1);
- Decoder.push_back(0);
+ Decoder.reserve(tmp.size() + 1);
+ Decoder.push_back(0);
- for (const auto& it : tmp) {
- Y_ENSURE(Decoder.back() == Pool.Size(), "learning invariant failed");
- ui32 endoff = Append(Pool, it.second);
- Decoder.push_back(endoff);
- }
+ for (const auto& it : tmp) {
+ Y_ENSURE(Decoder.back() == Pool.Size(), "learning invariant failed");
+ ui32 endoff = Append(Pool, it.second);
+ Decoder.push_back(endoff);
+ }
}
- Pool.ShrinkToFit();
- Decoder.shrink_to_fit();
+ Pool.ShrinkToFit();
+ Decoder.shrink_to_fit();
- TBufferOutput bout;
+ TBufferOutput bout;
- {
- TVector<std::pair<TStringBuf, ui32>> tmp2;
- tmp2.reserve(Decoder.size());
+ {
+ TVector<std::pair<TStringBuf, ui32>> tmp2;
+ tmp2.reserve(Decoder.size());
- for (ui32 i = 1, sz = Decoder.size(); i < sz; ++i) {
- TStringBuf s = DoDecode(i);
- tmp2.push_back(std::make_pair(s, i - 1));
+ for (ui32 i = 1, sz = Decoder.size(); i < sz; ++i) {
+ TStringBuf s = DoDecode(i);
+ tmp2.push_back(std::make_pair(s, i - 1));
Y_ENSURE(s.size() == (Decoder[i] - Decoder[i - 1]), "learning invariant failed");
- }
+ }
- Sort(tmp2.begin(), tmp2.end());
+ Sort(tmp2.begin(), tmp2.end());
- {
- TEncoder::TBuilder builder(CTBF_PREFIX_GROUPED);
- for (const auto& it : tmp2) {
+ {
+ TEncoder::TBuilder builder(CTBF_PREFIX_GROUPED);
+ for (const auto& it : tmp2) {
builder.Add(it.first.data(), it.first.size(), it.second);
- }
+ }
- builder.Save(bout);
+ builder.Save(bout);
}
}
- Encoder.Init(TBlob::FromBuffer(bout.Buffer()));
+ Encoder.Init(TBlob::FromBuffer(bout.Buffer()));
+ }
+
+ void TSolarCodec::Save(IOutputStream* out) const {
+ TBlob b = Encoder.Data();
+ ::Save(out, (ui32)b.Size());
+ out->Write(b.Data(), b.Size());
}
- void TSolarCodec::Save(IOutputStream* out) const {
- TBlob b = Encoder.Data();
- ::Save(out, (ui32)b.Size());
- out->Write(b.Data(), b.Size());
- }
-
- void TSolarCodec::Load(IInputStream* in) {
- ui32 sz;
- ::Load(in, sz);
- TLengthLimitedInput lin(in, sz);
- Encoder.Init(TBlob::FromStream(lin));
- Pool.Clear();
- Decoder.clear();
-
- TVector<std::pair<ui32, TString>> tmp;
-
- ui32 poolsz = 0;
- for (TEncoder::TConstIterator it = Encoder.Begin(); it != Encoder.End(); ++it) {
- const TString& s = it.GetKey();
- tmp.push_back(std::make_pair(it.GetValue(), !s ? TString("\0", 1) : s));
+ void TSolarCodec::Load(IInputStream* in) {
+ ui32 sz;
+ ::Load(in, sz);
+ TLengthLimitedInput lin(in, sz);
+ Encoder.Init(TBlob::FromStream(lin));
+ Pool.Clear();
+ Decoder.clear();
+
+ TVector<std::pair<ui32, TString>> tmp;
+
+ ui32 poolsz = 0;
+ for (TEncoder::TConstIterator it = Encoder.Begin(); it != Encoder.End(); ++it) {
+ const TString& s = it.GetKey();
+ tmp.push_back(std::make_pair(it.GetValue(), !s ? TString("\0", 1) : s));
poolsz += Max<ui32>(s.size(), 1);
- }
+ }
- Sort(tmp.begin(), tmp.end());
+ Sort(tmp.begin(), tmp.end());
- Pool.Reserve(poolsz);
- Decoder.reserve(tmp.size() + 1);
- Decoder.push_back(0);
+ Pool.Reserve(poolsz);
+ Decoder.reserve(tmp.size() + 1);
+ Decoder.push_back(0);
- for (ui32 i = 0, sz2 = tmp.size(); i < sz2; ++i) {
- Y_ENSURE(i == tmp[i].first, "oops! " << i << " " << tmp[i].first);
- Decoder.push_back(Append(Pool, tmp[i].second));
- }
+ for (ui32 i = 0, sz2 = tmp.size(); i < sz2; ++i) {
+ Y_ENSURE(i == tmp[i].first, "oops! " << i << " " << tmp[i].first);
+ Decoder.push_back(Append(Pool, tmp[i].second));
+ }
- Pool.ShrinkToFit();
- Decoder.shrink_to_fit();
+ Pool.ShrinkToFit();
+ Decoder.shrink_to_fit();
}
}