aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/on_disk
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/on_disk
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/on_disk')
-rw-r--r--library/cpp/on_disk/aho_corasick/common.h14
-rw-r--r--library/cpp/on_disk/aho_corasick/helpers.h17
-rw-r--r--library/cpp/on_disk/aho_corasick/reader.h341
-rw-r--r--library/cpp/on_disk/aho_corasick/writer.h354
4 files changed, 726 insertions, 0 deletions
diff --git a/library/cpp/on_disk/aho_corasick/common.h b/library/cpp/on_disk/aho_corasick/common.h
new file mode 100644
index 00000000000..0aad93041a7
--- /dev/null
+++ b/library/cpp/on_disk/aho_corasick/common.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+class TAhoCorasickCommon {
+public:
+ static ui32 GetVersion() {
+ return 3;
+ }
+
+ static size_t GetBlockCount() {
+ return 4;
+ }
+};
diff --git a/library/cpp/on_disk/aho_corasick/helpers.h b/library/cpp/on_disk/aho_corasick/helpers.h
new file mode 100644
index 00000000000..1e413c08a6c
--- /dev/null
+++ b/library/cpp/on_disk/aho_corasick/helpers.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "reader.h"
+#include "writer.h"
+
+template <bool>
+struct TDefaultAhoCorasickG;
+
+template <>
+struct TDefaultAhoCorasickG<false> {
+ typedef TDefaultMappedAhoCorasick T;
+};
+
+template <>
+struct TDefaultAhoCorasickG<true> {
+ typedef TDefaultAhoCorasickBuilder T;
+};
diff --git a/library/cpp/on_disk/aho_corasick/reader.h b/library/cpp/on_disk/aho_corasick/reader.h
new file mode 100644
index 00000000000..e5db58685bc
--- /dev/null
+++ b/library/cpp/on_disk/aho_corasick/reader.h
@@ -0,0 +1,341 @@
+#pragma once
+
+#include <util/generic/deque.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/yexception.h>
+#include <util/memory/blob.h>
+#include <util/stream/buffer.h>
+#include <util/stream/mem.h>
+#include <util/system/unaligned_mem.h>
+#include <utility>
+
+#include <library/cpp/on_disk/chunks/chunked_helpers.h>
+
+#include "common.h"
+
+template <class O>
+class TAhoSearchResult: public TDeque<std::pair<ui32, O>> {
+};
+
+/*
+ * Mapped-declaraion
+ */
+
+template <class O>
+class TMappedDefaultOutputContainer {
+private:
+ TGeneralVector<O> List_;
+
+public:
+ TMappedDefaultOutputContainer(const char* data)
+ : List_(TBlob::NoCopy(data, (size_t)-1))
+ {
+ }
+
+ bool IsEmpty() const {
+ return List_.GetSize() == 0;
+ }
+
+ void FillAnswer(TAhoSearchResult<O>& answer, ui32 pos) const {
+ for (ui32 i = 0; i < List_.GetSize(); ++i) {
+ answer.push_back(std::make_pair(pos, O()));
+ List_.Get(i, answer.back().second);
+ }
+ }
+
+ size_t CheckData() const {
+ return List_.RealSize();
+ }
+};
+
+template <class O>
+class TMappedSingleOutputContainer {
+ const ui32* Data;
+
+ ui32 Size() const {
+ return ReadUnaligned<ui32>(Data);
+ }
+
+public:
+ TMappedSingleOutputContainer(const char* data)
+ : Data((const ui32*)data)
+ {
+ }
+
+ bool IsEmpty() const {
+ return Size() == 0;
+ }
+
+ void FillAnswer(TAhoSearchResult<O>& answer, ui32 pos) const {
+ if (!IsEmpty()) {
+ answer.push_back(std::make_pair(pos, O()));
+ TMemoryInput input(Data + 1, Size());
+ TSaveLoadVectorNonPodElement<O>::Load(&input, answer.back().second, Size());
+ }
+ }
+
+ size_t CheckData() const {
+ return sizeof(ui32) + ReadUnaligned<ui32>(Data);
+ }
+};
+
+template <class TStringType, class O, class C>
+class TMappedAhoCorasick;
+
+template <typename TKey, typename TValue>
+class TEmptyMapData : TNonCopyable {
+private:
+ TBufferStream Stream;
+
+public:
+ const char* P;
+
+ TEmptyMapData() {
+ TPlainHashWriter<TKey, TValue> hash;
+ hash.Save(Stream);
+ P = Stream.Buffer().Begin();
+ }
+};
+
+/*
+ * каждая вершина имеет свой ui32-номер
+ * блок данных для вершины:
+ * ui32, ui32, ui32, ui32, степень*char, данные контейнера
+ * fail, suff, степень, самый левый сын, лексикографический список меток исходящих рёбер.
+ * если степень нулевая, то в блоке только 3 инта
+ */
+template <class TStringType, class O, class C>
+class TMappedAhoVertex {
+public:
+ typedef typename TStringType::value_type TCharType;
+ friend class TMappedAhoCorasick<TStringType, O, C>;
+
+private:
+ const char* Data;
+ typedef TPlainHash<TCharType, ui32> TGotoMap;
+ TGotoMap GotoMap;
+ static const TEmptyMapData<TCharType, ui32> EmptyData;
+
+ static const size_t GENERAL_SHIFT = 3 * sizeof(ui32);
+
+private:
+ const ui32* DataAsInt() const {
+ return (const ui32*)Data;
+ }
+
+ ui32 Power() const {
+ return ReadUnaligned<ui32>(DataAsInt() + 2);
+ }
+
+protected:
+ const C Output() const {
+ return C(Power() ? GotoMap.ByteEnd() : Data + GENERAL_SHIFT);
+ }
+
+ ui32 Fail() const {
+ return ReadUnaligned<ui32>(DataAsInt());
+ }
+
+ ui32 Suffix() const {
+ return ReadUnaligned<ui32>(DataAsInt() + 1);
+ }
+
+ bool GotoFunction(const TCharType c, ui32* result) const {
+ if (0 == Power())
+ return false;
+ return GotoMap.Find(c, result);
+ }
+
+ bool operator==(const TMappedAhoVertex& rhs) const {
+ return Data == rhs.Data;
+ }
+
+ size_t CheckData(ui32 totalVertices) const; /// throws yexception in case of bad data
+
+public:
+ TMappedAhoVertex(const char* data)
+ : Data(data)
+ , GotoMap(Power() ? (Data + GENERAL_SHIFT) : EmptyData.P)
+ {
+ }
+};
+
+/*
+ * блок данных для бора:
+ * количество вершин N, ui32
+ * суммарный размер блоков для вершин, ui32
+ * блоки данных для каждой вершины
+ * отображение id->offset для блока вершины id, N*ui32
+ */
+template <class TStringType, class O, class C = TMappedDefaultOutputContainer<O>>
+class TMappedAhoCorasick : TNonCopyable {
+public:
+ typedef TAhoSearchResult<O> TSearchResult;
+ typedef TMappedAhoVertex<TStringType, O, C> TAhoVertexType;
+ typedef typename TStringType::value_type TCharType;
+ typedef TBasicStringBuf<TCharType> TSample;
+
+private:
+ const TBlob Blob;
+ const char* const AhoVertexes;
+ const ui32 VertexAmount;
+ const ui32* const Id2Offset;
+ const TAhoVertexType Root;
+
+private:
+ bool ValidVertex(ui32 id) const {
+ return id < VertexAmount;
+ }
+
+ TAhoVertexType GetVertexAt(ui32 id) const {
+ if (!ValidVertex(id))
+ ythrow yexception() << "TMappedAhoCorasick fatal error: invalid id " << id;
+ return TAhoVertexType(AhoVertexes + Id2Offset[id]);
+ }
+
+public:
+ TMappedAhoCorasick(const TBlob& blob)
+ : Blob(blob)
+ , AhoVertexes(GetBlock(blob, 1).AsCharPtr())
+ , VertexAmount(TSingleValue<ui32>(GetBlock(blob, 2)).Get())
+ , Id2Offset((const ui32*)(GetBlock(Blob, 3).AsCharPtr()))
+ , Root(GetVertexAt(0))
+ {
+ {
+ const ui32 version = TSingleValue<ui32>(GetBlock(blob, 0)).Get();
+ if (version != TAhoCorasickCommon::GetVersion())
+ ythrow yexception() << "Unknown version " << version << " instead of " << TAhoCorasickCommon::GetVersion();
+ }
+ {
+ TChunkedDataReader reader(blob);
+ if (reader.GetBlocksCount() != TAhoCorasickCommon::GetBlockCount())
+ ythrow yexception() << "wrong block count " << reader.GetBlocksCount();
+ }
+ }
+
+ bool AhoContains(const TSample& str) const;
+ TSearchResult AhoSearch(const TSample& str) const;
+ void AhoSearch(const TSample& str, TSearchResult* result) const;
+ size_t CheckData() const; /// throws yexception in case of bad data
+};
+
+using TSimpleMappedAhoCorasick = TMappedAhoCorasick<TString, ui32, TMappedSingleOutputContainer<ui32>>;
+using TDefaultMappedAhoCorasick = TMappedAhoCorasick<TString, ui32>;
+
+/*
+ * Mapped-implementation
+ */
+template <class TStringType, class O, class C>
+bool TMappedAhoCorasick<TStringType, O, C>::AhoContains(const TSample& str) const {
+ TAhoVertexType current = Root;
+ const size_t len = str.size();
+ for (size_t i = 0; i < len; ++i) {
+ bool outer = false;
+ ui32 gotoVertex;
+ while (!current.GotoFunction(str[i], &gotoVertex)) {
+ if (current == Root) { /// nowhere to go
+ outer = true;
+ break;
+ }
+ current = GetVertexAt(current.Fail());
+ }
+ if (outer)
+ continue;
+ current = GetVertexAt(gotoVertex);
+
+ TAhoVertexType v = current;
+ while (true) {
+ if (!v.Output().IsEmpty())
+ return true;
+ if ((ui32)-1 == v.Suffix())
+ break;
+ v = GetVertexAt(v.Suffix());
+ }
+ }
+ return false;
+}
+
+template <class TStringType, class O, class C>
+void TMappedAhoCorasick<TStringType, O, C>::AhoSearch(const TSample& str, typename TMappedAhoCorasick<TStringType, O, C>::TSearchResult* answer) const {
+ answer->clear();
+ TAhoVertexType current = Root;
+ const size_t len = str.length();
+ for (size_t i = 0; i < len; ++i) {
+ bool outer = false;
+ ui32 gotoVertex;
+ while (!current.GotoFunction(str[i], &gotoVertex)) {
+ if (current == Root) { /// nowhere to go
+ outer = true;
+ break;
+ }
+ current = GetVertexAt(current.Fail());
+ }
+ if (outer)
+ continue;
+ current = GetVertexAt(gotoVertex);
+
+ TAhoVertexType v = current;
+ while (true) {
+ v.Output().FillAnswer(*answer, (ui32)i);
+ if ((ui32)-1 == v.Suffix())
+ break;
+ v = GetVertexAt(v.Suffix());
+ }
+ }
+}
+
+template <class TStringType, class O, class C>
+typename TMappedAhoCorasick<TStringType, O, C>::TSearchResult TMappedAhoCorasick<TStringType, O, C>::AhoSearch(const TSample& str) const {
+ TAhoSearchResult<O> answer;
+ AhoSearch(str, &answer);
+ return answer;
+}
+
+/*
+ * implementation of CheckData in Mapped-classes
+ */
+
+static inline void CheckRange(ui32 id, ui32 strictUpperBound) {
+ if (id >= strictUpperBound) {
+ throw yexception() << id << " of " << strictUpperBound << " - index is invalid";
+ }
+}
+
+template <class TStringType, class O, class C>
+const TEmptyMapData<typename TStringType::value_type, ui32> TMappedAhoVertex<TStringType, O, C>::EmptyData;
+
+template <class TStringType, class O, class C>
+size_t TMappedAhoVertex<TStringType, O, C>::CheckData(ui32 totalVertices) const {
+ size_t bytesNeeded = GENERAL_SHIFT;
+ CheckRange(Fail(), totalVertices);
+ if (Suffix() != (ui32)(-1))
+ CheckRange(Suffix(), totalVertices);
+ if (Power()) {
+ for (typename TGotoMap::TConstIterator toItem = GotoMap.Begin(); toItem != GotoMap.End(); ++toItem)
+ CheckRange(toItem->Second(), totalVertices);
+ bytesNeeded += GotoMap.ByteSize();
+ }
+ bytesNeeded += Output().CheckData();
+ return bytesNeeded;
+}
+
+template <class TStringType, class O, class C>
+size_t TMappedAhoCorasick<TStringType, O, C>::CheckData() const {
+ try {
+ size_t bytesNeeded = 0;
+ for (ui32 id = 0; id < VertexAmount; ++id) {
+ if (Id2Offset[id] != bytesNeeded) {
+ ythrow yexception() << "wrong offset[" << id << "]: " << Id2Offset[id];
+ }
+ bytesNeeded += GetVertexAt(id).CheckData(VertexAmount);
+ }
+ bytesNeeded += VertexAmount * sizeof(ui32);
+ const size_t realsize = GetBlock(Blob, 1).Size() + GetBlock(Blob, 3).Size();
+ if (realsize != bytesNeeded) {
+ ythrow yexception() << "extra information " << bytesNeeded << " " << realsize;
+ }
+ return bytesNeeded;
+ } catch (const yexception& e) {
+ ythrow yexception() << "Bad data: " << e.what();
+ }
+}
diff --git a/library/cpp/on_disk/aho_corasick/writer.h b/library/cpp/on_disk/aho_corasick/writer.h
new file mode 100644
index 00000000000..e866200e1c9
--- /dev/null
+++ b/library/cpp/on_disk/aho_corasick/writer.h
@@ -0,0 +1,354 @@
+#pragma once
+
+#include <util/generic/deque.h>
+#include <util/generic/hash.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/stream/buffer.h>
+#include <util/memory/blob.h>
+#include <utility>
+
+#include <library/cpp/on_disk/chunks/writer.h>
+#include <library/cpp/on_disk/chunks/chunked_helpers.h>
+
+#include "common.h"
+
+template <class I>
+struct meta_iterator_pair {
+ typedef typename std::iterator_traits<I>::value_type::first_type first_type;
+ typedef typename std::iterator_traits<I>::value_type::second_type second_type;
+};
+
+/*
+ * Builder implementation
+ */
+
+/*
+ * Builder declaration
+ */
+
+template <class O>
+class TDefaultContainerBuilder {
+ TGeneralVectorWriter<O> List_;
+
+public:
+ void AddOut(const O& o) {
+ List_.PushBack(o);
+ }
+
+ bool IsEmpty() const {
+ return List_.Size() == 0;
+ }
+
+ void SaveContent(IOutputStream* stream) const {
+ List_.Save(*stream);
+ }
+};
+
+template <class O>
+class TSingleContainerBuilder {
+ bool Empty;
+ O Out_;
+
+public:
+ TSingleContainerBuilder()
+ : Empty(true)
+ {
+ }
+
+ void AddOut(const O& o) {
+ Empty = false;
+ Out_ = o;
+ }
+
+ bool IsEmpty() const {
+ return Empty;
+ }
+
+ void SaveContent(IOutputStream* stream) const {
+ if (IsEmpty()) {
+ WriteBin<ui32>(stream, 0);
+ } else {
+ TBuffer buf;
+ {
+ TBufferOutput tempStream(buf);
+ TSaveLoadVectorNonPodElement<O>::Save(&tempStream, Out_);
+ }
+ WriteBin<ui32>(stream, buf.Size());
+ stream->Write(buf.Data(), buf.Size());
+ }
+ }
+};
+
+template <class TStringType, class O, class C>
+class TAhoCorasickBuilder;
+
+template <class TStringType, class O, class C>
+class TAhoVertex : TNonCopyable {
+ typedef TAhoVertex<TStringType, O, C> TMyself;
+ typedef TAhoCorasickBuilder<TStringType, O, C> TParent;
+ typedef typename TStringType::value_type TCharType;
+
+ friend class TAhoCorasickBuilder<TStringType, O, C>;
+
+private:
+ typedef THashMap<TCharType, TMyself*> TGotoMap;
+
+ TGotoMap GotoMap_;
+ C Output_;
+ TMyself* FailVertex_;
+ TMyself* SuffixVertex_;
+
+protected:
+ const C& Output() const {
+ return Output_;
+ }
+
+ void AddVertex(TMyself* v, TCharType c) {
+ GotoMap_.insert(std::make_pair(c, v));
+ }
+
+ TMyself* Fail() const {
+ return FailVertex_;
+ }
+
+ TMyself* Suffix() const {
+ return SuffixVertex_;
+ }
+
+ TMyself* GotoFunction(const TCharType c) {
+ typename TGotoMap::iterator it;
+ it = GotoMap_.find(c);
+ return it != GotoMap_.end() ? it->second : nullptr;
+ }
+
+ TMyself const* GotoFunction(const TCharType c) const {
+ typename TGotoMap::const_iterator it = GotoMap_.find(c);
+ return it != GotoMap_.end() ? it->second : NULL;
+ }
+
+ TMyself* AddString(TParent* ahoCorasick, const TStringType& s, const ui32 position, const O& o) {
+ if (position >= s.size()) {
+ Output_.AddOut(o);
+ return nullptr;
+ } else {
+ TCharType c = s[position];
+ TMyself* v = GotoFunction(c);
+ if (!v) {
+ v = ahoCorasick->CreateAhoVertex();
+ AddVertex(v, c);
+ }
+ return v;
+ }
+ }
+
+ void SetFail(TMyself* v) {
+ FailVertex_ = v;
+ }
+
+ void SetSuffix(TMyself* v) {
+ SuffixVertex_ = v;
+ }
+
+ const TGotoMap& GotoMap() const {
+ return GotoMap_;
+ }
+
+public:
+ TAhoVertex()
+ : FailVertex_(nullptr)
+ , SuffixVertex_(nullptr)
+ {
+ }
+};
+
+template <class TStringType, class O, class C = TDefaultContainerBuilder<O>>
+class TAhoCorasickBuilder : TNonCopyable {
+public:
+ typedef TAhoVertex<TStringType, O, C> TAhoVertexType;
+ typedef typename TStringType::value_type TCharType;
+
+ friend class TAhoVertex<TStringType, O, C>;
+ friend class TTestMappedAhoCorasick;
+
+private:
+ TDeque<TAhoVertexType*> AhoVertexes;
+
+private:
+ TAhoVertexType* GetRoot() {
+ return AhoVertexes.front();
+ }
+
+ TAhoVertexType const* GetRoot() const {
+ return AhoVertexes.front();
+ }
+
+ TAhoVertexType* CreateAhoVertex() {
+ AhoVertexes.push_back(new TAhoVertexType());
+ return AhoVertexes.back();
+ }
+
+ void ConstructFail();
+
+public:
+ TAhoCorasickBuilder()
+ : AhoVertexes(1, new TAhoVertexType())
+ {
+ }
+
+ ~TAhoCorasickBuilder() {
+ for (size_t i = 0; i < AhoVertexes.size(); ++i) {
+ delete AhoVertexes[i];
+ }
+ }
+
+ void AddString(const TStringType& s, const O& value) {
+ TAhoVertexType* c = GetRoot();
+ for (ui32 i = 0; i <= s.size(); ++i) {
+ c = c->AddString(this, s, i, value);
+ }
+ }
+
+ const TBlob Save();
+ const TBlob AtomicSave();
+ void SaveToStream(IOutputStream* stream);
+};
+
+using TSimpleAhoCorasickBuilder = TAhoCorasickBuilder<TString, ui32, TSingleContainerBuilder<ui32>>;
+using TDefaultAhoCorasickBuilder = TAhoCorasickBuilder<TString, ui32>;
+
+template <class AhoCorasick, class Iterator>
+const TBlob BuildAho(AhoCorasick& ahoCorasick, Iterator begin, Iterator end) {
+ for (Iterator it = begin; it != end; ++it)
+ ahoCorasick.AddString(*it, it->size());
+ return ahoCorasick.Save();
+}
+
+template <class TStringType, class Iterator>
+const TBlob BuildAhoIndex(TAhoCorasickBuilder<TStringType, ui32>& ahoCorasick, Iterator begin, Iterator end) {
+ ui32 index = 0;
+ for (Iterator it = begin; it != end; ++it, ++index)
+ ahoCorasick.AddString(*it, index);
+ return ahoCorasick.Save();
+}
+
+template <class TStringType, class Iterator>
+const TBlob BuildAhoObject(TAhoCorasickBuilder<TStringType, typename meta_iterator_pair<Iterator>::second_type>& ahoCorasick, Iterator begin, Iterator end) {
+ for (Iterator it = begin; it != end; ++it)
+ ahoCorasick.AddString(it->first, it->second);
+ return ahoCorasick.Save();
+}
+
+template <class TStringType, class O, class C>
+void TAhoCorasickBuilder<TStringType, O, C>::ConstructFail() {
+ TAhoVertexType* root = GetRoot();
+ root->SetFail(root);
+ TDeque<TAhoVertexType*> q;
+ typename TAhoVertexType::TGotoMap::const_iterator it;
+ for (it = root->GotoMap().begin(); it != root->GotoMap().end(); ++it) {
+ TAhoVertexType* v = it->second;
+ v->SetFail(root);
+ q.push_back(v);
+ }
+ while (!q.empty()) {
+ TAhoVertexType* c = q.front();
+ q.pop_front();
+ for (it = c->GotoMap().begin(); it != c->GotoMap().end(); ++it) {
+ TAhoVertexType* v = it->second;
+ TCharType a = it->first;
+ q.push_back(v);
+ TAhoVertexType* h = c->Fail();
+ bool outer = false;
+ while (!h->GotoFunction(a)) {
+ if (h->Fail() == h) {
+ v->SetFail(h);
+ outer = true;
+ break;
+ }
+ h = h->Fail();
+ }
+ if (outer)
+ continue;
+ TAhoVertexType* fail = h->GotoFunction(a);
+ v->SetFail(fail);
+ if (!fail->Output().IsEmpty())
+ v->SetSuffix(fail);
+ else
+ v->SetSuffix(fail->Suffix());
+ }
+ }
+}
+
+template <class TStringType, class O, class C>
+void TAhoCorasickBuilder<TStringType, O, C>::SaveToStream(IOutputStream* out) {
+ ConstructFail(); /// the reason of non-const declaration
+
+ Y_ASSERT(AhoVertexes.size() < Max<ui32>());
+ const ui32 vertexAmount = (ui32)AhoVertexes.size();
+
+ TChunkedDataWriter writer(*out);
+ {
+ TSingleValueWriter<ui32> versionWriter(TAhoCorasickCommon::GetVersion());
+ WriteBlock(writer, versionWriter);
+ }
+ writer.NewBlock();
+
+ TVector<TAhoVertexType const*> q(1, GetRoot());
+ THashMap<TAhoVertexType const*, ui32> vertex2id(vertexAmount + 1);
+ TVector<ui32> id2offset(vertexAmount, 0);
+
+ TAhoVertexType* vt = nullptr;
+ vertex2id[vt] = (ui32)-1;
+ q.reserve(vertexAmount);
+
+ for (ui32 curId = 0; curId < vertexAmount; ++curId) {
+ TAhoVertexType const* c = q[curId];
+ vertex2id[c] = curId;
+ id2offset[curId] = (ui32)writer.GetCurrentBlockOffset();
+
+ WriteBin<ui32>(&writer, vertex2id[c->Fail()]);
+ WriteBin<ui32>(&writer, vertex2id[c->Suffix()]);
+
+ typedef TVector<std::pair<TCharType, TAhoVertexType const*>> TChildren;
+ TChildren children(c->GotoMap().begin(), c->GotoMap().end());
+ WriteBin<ui32>(&writer, static_cast<ui32>(children.size()));
+
+ if (!children.empty()) {
+ TPlainHashWriter<TCharType, ui32> hashWriter;
+ const ui32 id = static_cast<ui32>(q.size());
+ for (size_t i = 0; i < children.size(); ++i) {
+ hashWriter.Add(children[i].first, ui32(id + i));
+ q.push_back(children[i].second);
+ }
+
+ hashWriter.Save(writer);
+ }
+
+ c->Output().SaveContent(&writer);
+ }
+
+ {
+ Y_ASSERT(id2offset.size() < Max<ui32>());
+ TSingleValueWriter<ui32> lenWriter((ui32)id2offset.size());
+ WriteBlock(writer, lenWriter);
+ }
+ writer.NewBlock();
+ writer.Write((const char*)id2offset.data(), id2offset.size() * sizeof(ui32));
+ writer.WriteFooter();
+ Y_ASSERT(TAhoCorasickCommon::GetBlockCount() == writer.GetBlockCount());
+}
+
+template <class TStringType, class O, class C>
+const TBlob TAhoCorasickBuilder<TStringType, O, C>::Save() {
+ TBufferStream buffer;
+ SaveToStream(&buffer);
+ return TBlob::FromStream(buffer);
+}
+
+template <class TStringType, class O, class C>
+const TBlob TAhoCorasickBuilder<TStringType, O, C>::AtomicSave() {
+ TBufferStream buffer;
+ SaveToStream(&buffer);
+ return TBlob::FromStream(buffer);
+}