aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/erasure/codec.h
diff options
context:
space:
mode:
authormax42 <max42@yandex-team.com>2023-06-30 03:37:03 +0300
committermax42 <max42@yandex-team.com>2023-06-30 03:37:03 +0300
commitfac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch)
treeb8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /library/cpp/erasure/codec.h
parent7bf166b1a7ed0af927f230022b245af618e998c1 (diff)
downloadydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text. This commit has zero effect on all projects that depend on YQL. The summary of changes: - `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library; - `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes; - `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`; - `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`; - `yql/core` is gone; - `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`. **NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'library/cpp/erasure/codec.h')
-rw-r--r--library/cpp/erasure/codec.h80
1 files changed, 80 insertions, 0 deletions
diff --git a/library/cpp/erasure/codec.h b/library/cpp/erasure/codec.h
new file mode 100644
index 0000000000..c03d25e9c8
--- /dev/null
+++ b/library/cpp/erasure/codec.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "public.h"
+
+#include <optional>
+#include <vector>
+
+namespace NErasure {
+
+//! Describes a generic way to generate parity blocks from data blocks and
+//! to recover (repair) missing blocks.
+/*!
+ * Given N data blocks (numbered from 0 to N - 1) one can call #Encode to generate
+ * another M parity blocks (numbered from N to N + M - 1).
+ *
+ * If some of the resulting N + M blocks ever become missing one can attempt to
+ * repair the missing blocks by calling #Decode.
+ *
+ * Here N and M are fixed (codec-specific) parameters.
+ * Call #GetDataPartCount and #GetParityPartCount to figure out the
+ * the values for N and M, respectively.
+ *
+ */
+template <class TBlobType>
+struct ICodec {
+ //! Computes a sequence of parity blocks for given data blocks.
+ /*!
+ * The size of #blocks must be equal to #GetDataPartCount.
+ * The size of the returned array is equal to #GetParityPartCount.
+ */
+ virtual std::vector<TBlobType> Encode(const std::vector<TBlobType>& blocks) const = 0;
+
+ //! Decodes (repairs) missing blocks.
+ /*!
+ * #erasedIndices must contain the set of erased blocks indices.
+ * #blocks must contain known blocks (in the order specified by #GetRepairIndices).
+ * \returns The repaired blocks.
+ */
+ virtual std::vector<TBlobType> Decode(
+ const std::vector<TBlobType>& blocks,
+ const TPartIndexList& erasedIndices) const = 0;
+
+ //! Given a set of missing block indices, returns |true| if missing blocks can be repaired.
+ //! Due to performance reasons the elements of #erasedIndices must unique and sorted.
+ virtual bool CanRepair(const TPartIndexList& erasedIndices) const = 0;
+
+ //! Rapid version that works with set instead of list.
+ virtual bool CanRepair(const TPartIndexSet& erasedIndices) const = 0;
+
+ //! Given a set of missing block indices, checks if missing blocks can be repaired.
+ /*!
+ * \returns
+ * If repair is not possible, returns |std::nullopt|.
+ * Otherwise returns the indices of blocks (both data and parity) to be passed to #Decode
+ * (in this very order). Not all known blocks may be needed for repair.
+ */
+ virtual std::optional<TPartIndexList> GetRepairIndices(const TPartIndexList& erasedIndices) const = 0;
+
+ //! Returns the number of data blocks this codec can handle.
+ virtual int GetDataPartCount() const = 0;
+
+ //! Returns the number of parity blocks this codec can handle.
+ virtual int GetParityPartCount() const = 0;
+
+ //! Returns the maximum number of blocks that can always be repaired when missing.
+ virtual int GetGuaranteedRepairablePartCount() const = 0;
+
+ //! Every block passed to this codec must have size divisible by the result of #GetWordSize.
+ virtual int GetWordSize() const = 0;
+
+ // Extension methods
+
+ //! Returns the sum of #GetDataPartCount and #GetParityPartCount.
+ int GetTotalPartCount() const {
+ return GetDataPartCount() + GetParityPartCount();
+ }
+};
+
+} // namespace NErasure
+