aboutsummaryrefslogtreecommitdiffstats
path: root/yt/cpp/mapreduce/interface/common.h
diff options
context:
space:
mode:
authormax42 <max42@yandex-team.com>2023-07-29 00:02:16 +0300
committermax42 <max42@yandex-team.com>2023-07-29 00:02:16 +0300
commit73b89de71748a21e102d27b9f3ed1bf658766cb5 (patch)
tree188bbd2d622fa91cdcbb1b6d6d77fbc84a0646f5 /yt/cpp/mapreduce/interface/common.h
parent528e321bcc2a2b67b53aeba58c3bd88305a141ee (diff)
downloadydb-73b89de71748a21e102d27b9f3ed1bf658766cb5.tar.gz
YT-19210: expose YQL shared library for YT.
After this, a new target libyqlplugin.so appears. in open-source cmake build. Diff in open-source YDB repo looks like the following: https://paste.yandex-team.ru/f302bdb4-7ef2-4362-91c7-6ca45f329264
Diffstat (limited to 'yt/cpp/mapreduce/interface/common.h')
-rw-r--r--yt/cpp/mapreduce/interface/common.h1301
1 files changed, 1301 insertions, 0 deletions
diff --git a/yt/cpp/mapreduce/interface/common.h b/yt/cpp/mapreduce/interface/common.h
new file mode 100644
index 00000000000..b1754ade70d
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/common.h
@@ -0,0 +1,1301 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/common.h
+///
+/// Header containing miscellaneous structs and classes used in library.
+
+#include "fwd.h"
+
+#include <library/cpp/type_info/type_info.h>
+#include <library/cpp/yson/node/node.h>
+
+#include <util/generic/guid.h>
+#include <util/generic/map.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/system/type_name.h>
+#include <util/generic/vector.h>
+
+#include <google/protobuf/message.h>
+
+#include <initializer_list>
+#include <type_traits>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @cond Doxygen_Suppress
+#define FLUENT_FIELD(type, name) \
+ type name##_; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_ENCAPSULATED(type, name) \
+private: \
+ type name##_; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const type& name() const & \
+ { \
+ return name##_; \
+ } \
+ type name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_OPTION(type, name) \
+ TMaybe<type> name##_; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \
+private: \
+ TMaybe<type> name##_; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf& Reset##name() & \
+ { \
+ name##_ = Nothing(); \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf Reset##name() && \
+ { \
+ name##_ = Nothing(); \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const TMaybe<type>& name() const& \
+ { \
+ return name##_; \
+ } \
+ TMaybe<type> name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \
+ type name##_ = defaultValue; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \
+private: \
+ type name##_ = defaultValue; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const type& name() const & \
+ { \
+ return name##_; \
+ } \
+ type name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_VECTOR_FIELD(type, name) \
+ TVector<type> name##s_; \
+ TSelf& Add##name(const type& value) \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> values) \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ static_assert(true)
+
+#define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \
+private: \
+ TMaybe<TVector<type>> name##s_; \
+public: \
+ const TMaybe<TVector<type>>& name##s() const & { \
+ return name##s_; \
+ } \
+ TMaybe<TVector<type>>& name##s() & { \
+ return name##s_; \
+ } \
+ TMaybe<TVector<type>> name##s() && { \
+ return std::move(name##s_); \
+ } \
+ TSelf& Add##name(const type& value) & \
+ { \
+ if (name##s_.Empty()) { \
+ name##s_.ConstructInPlace(); \
+ } \
+ name##s_->push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Add##name(const type& value) && \
+ { \
+ if (name##s_.Empty()) { \
+ name##s_.ConstructInPlace(); \
+ } \
+ name##s_->push_back(value); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> values) & \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TVector<type> values) && \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& name##s(TNothing) & \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TNothing) && \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& Reset##name##s() & \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Reset##name##s() && \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ static_assert(true)
+
+#define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \
+private: \
+ TVector<type> name##s_; \
+public: \
+ TSelf& Add##name(const type& value) & \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Add##name(const type& value) && \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> value) & \
+ { \
+ name##s_ = std::move(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TVector<type> value) && \
+ { \
+ name##s_ = std::move(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ const TVector<type>& name##s() const & \
+ { \
+ return name##s_; \
+ } \
+ TVector<type> name##s() && \
+ { \
+ return name##s_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_MAP_FIELD(keytype, valuetype, name) \
+ TMap<keytype,valuetype> name##_; \
+ TSelf& Add##name(const keytype& key, const valuetype& value) \
+ { \
+ name##_.emplace(key, value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ static_assert(true)
+
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Convenience class that keeps sequence of items.
+///
+/// Designed to be used as function parameter.
+///
+/// Users of such function can then pass:
+/// - single item,
+/// - initializer list of items,
+/// - vector of items;
+/// as argument to this function.
+///
+/// Example:
+/// ```
+/// void Foo(const TOneOrMany<int>& arg);
+/// ...
+/// Foo(1); // ok
+/// Foo({1, 2, 3}); // ok
+/// ```
+template <class T, class TDerived>
+struct TOneOrMany
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>;
+ /// @endcond
+
+ /// Initialize with empty sequence.
+ TOneOrMany() = default;
+
+ // Initialize from initializer list.
+ template<class U>
+ TOneOrMany(std::initializer_list<U> il)
+ {
+ Parts_.assign(il.begin(), il.end());
+ }
+
+ /// Put arguments to sequence
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TOneOrMany(U&& arg, TArgs&&... args)
+ {
+ Add(arg, std::forward<TArgs>(args)...);
+ }
+
+ /// Initialize from vector.
+ TOneOrMany(TVector<T> args)
+ : Parts_(std::move(args))
+ { }
+
+ /// @brief Order is defined the same way as in TVector
+ bool operator==(const TOneOrMany& rhs) const
+ {
+ // N.B. We would like to make this method to be `= default`,
+ // but this breaks MSVC compiler for the cases when T doesn't
+ // support comparison.
+ return Parts_ == rhs.Parts_;
+ }
+
+ ///
+ /// @{
+ ///
+ /// @brief Add all arguments to sequence
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TSelf& Add(U&& part, TArgs&&... args) &
+ {
+ Parts_.push_back(std::forward<U>(part));
+ if constexpr (sizeof...(args) > 0) {
+ [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... };
+ }
+ return static_cast<TSelf&>(*this);
+ }
+
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TSelf Add(U&& part, TArgs&&... args) &&
+ {
+ return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...));
+ }
+ /// @}
+
+ /// Content of sequence.
+ TVector<T> Parts_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Type of the value that can occur in YT table.
+///
+/// @ref NYT::TTableSchema
+/// https://yt.yandex-team.ru/docs/description/storage/data_types
+enum EValueType : int
+{
+ /// Int64, signed integer of 64 bits.
+ VT_INT64,
+
+ /// Uint64, unsigned integer of 64 bits.
+ VT_UINT64,
+
+ /// Double, floating point number of double precision (64 bits).
+ VT_DOUBLE,
+ /// Boolean, `true` or `false`.
+ VT_BOOLEAN,
+
+ /// String, arbitrary byte sequence.
+ VT_STRING,
+
+ /// Any, arbitrary yson document.
+ VT_ANY,
+
+ /// Int8, signed integer of 8 bits.
+ VT_INT8,
+ /// Int16, signed integer of 16 bits.
+ VT_INT16,
+ /// Int32, signed integer of 32 bits.
+ VT_INT32,
+
+ /// Uint8, unsigned integer of 8 bits.
+ VT_UINT8,
+ /// Uint16, unsigned integer of 16 bits.
+ VT_UINT16,
+ /// Uint32, unsigned integer of 32 bits.
+ VT_UINT32,
+
+ /// Utf8, byte sequence that is valid utf8.
+ VT_UTF8,
+
+ /// Null, absence of value (almost never used in schemas)
+ VT_NULL,
+ /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific.
+ VT_VOID,
+
+ /// Date, number of days since Unix epoch (unsigned)
+ VT_DATE,
+ /// Datetime, number of seconds since Unix epoch (unsigned)
+ VT_DATETIME,
+ /// Timestamp, number of milliseconds since Unix epoch (unsigned)
+ VT_TIMESTAMP,
+ /// Interval, difference between two timestamps (signed)
+ VT_INTERVAL,
+
+ /// Float, floating point number (32 bits)
+ VT_FLOAT,
+ /// Json, sequence of bytes that is valid json.
+ VT_JSON,
+};
+
+///
+/// @brief Sort order.
+///
+/// @ref NYT::TTableSchema
+enum ESortOrder : int
+{
+ /// Ascending sort order.
+ SO_ASCENDING /* "ascending" */,
+ /// Descending sort order.
+ SO_DESCENDING /* "descending" */,
+};
+
+///
+/// @brief Value of "optimize_for" attribute.
+///
+/// @ref NYT::TRichYPath
+enum EOptimizeForAttr : i8
+{
+ /// Optimize for scan
+ OF_SCAN_ATTR /* "scan" */,
+
+ /// Optimize for lookup
+ OF_LOOKUP_ATTR /* "lookup" */,
+};
+
+///
+/// @brief Value of "erasure_codec" attribute.
+///
+/// @ref NYT::TRichYPath
+enum EErasureCodecAttr : i8
+{
+ /// @cond Doxygen_Suppress
+ EC_NONE_ATTR /* "none" */,
+ EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */,
+ EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */,
+ EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */,
+ /// @endcond
+};
+
+///
+/// @brief Value of "schema_modification" attribute.
+///
+/// @ref NYT::TRichYPath
+enum ESchemaModificationAttr : i8
+{
+ SM_NONE_ATTR /* "none" */,
+ SM_UNVERSIONED_UPDATE /* "unversioned_update" */,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Table key column description.
+///
+/// The description includes column name and sort order.
+///
+/// @anchor TSortOrder_backward_compatibility
+/// @note
+/// Many functions that use `TSortOrder` as argument used to take `TString`
+/// (the only allowed sort order was "ascending" and user didn't have to specify it).
+/// @note
+/// This class is designed to provide backward compatibility for such code and therefore
+/// objects of this class can be constructed and assigned from TString-like objects only.
+///
+/// @see NYT::TSortOperationSpec
+class TSortColumn
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TSortColumn;
+ /// @endcond
+
+ /// Column name
+ FLUENT_FIELD_ENCAPSULATED(TString, Name);
+
+ /// Sort order
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING);
+
+ ///
+ /// @{
+ ///
+ /// @brief Construct object from name and sort order
+ ///
+ /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code.
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ /// @}
+
+ /// Check that sort order is ascending, throw exception otherwise.
+ const TSortColumn& EnsureAscending() const;
+
+ /// @brief Convert sort to yson representation as YT API expects it.
+ TNode ToNode() const;
+
+ /// @brief Comparison is default and checks both name and sort order.
+ bool operator == (const TSortColumn& rhs) const = default;
+
+ ///
+ /// @{
+ ///
+ /// @brief Assign object from column name, and set sort order to `ascending`.
+ ///
+ /// This is backward compatibility methods.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumn& operator = (TStringBuf name);
+ TSortColumn& operator = (const TString& name);
+ TSortColumn& operator = (const char* name);
+ /// @}
+
+ bool operator == (const TStringBuf rhsName) const;
+ bool operator != (const TStringBuf rhsName) const;
+ bool operator == (const TString& rhsName) const;
+ bool operator != (const TString& rhsName) const;
+ bool operator == (const char* rhsName) const;
+ bool operator != (const char* rhsName) const;
+
+ // Intentionally implicit conversions.
+ operator TString() const;
+ operator TStringBuf() const;
+ operator std::string() const;
+
+ Y_SAVELOAD_DEFINE(Name_, SortOrder_);
+};
+
+///
+/// @brief List of @ref TSortColumn
+///
+/// Contains a bunch of helper methods such as constructing from single object.
+class TSortColumns
+ : public TOneOrMany<TSortColumn, TSortColumns>
+{
+public:
+ using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany;
+
+ /// Construct empty list.
+ TSortColumns();
+
+ ///
+ /// @{
+ ///
+ /// @brief Construct list of ascending sort order columns by their names.
+ ///
+ /// Required for backward compatibility.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumns(const TVector<TString>& names);
+ TSortColumns(const TColumnNames& names);
+ /// @}
+
+
+ ///
+ /// @brief Implicit conversion to column list.
+ ///
+ /// If all columns has ascending sort order return list of their names.
+ /// Throw exception otherwise.
+ ///
+ /// Required for backward compatibility.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ operator TColumnNames() const;
+
+ /// Make sure that all columns are of ascending sort order.
+ const TSortColumns& EnsureAscending() const;
+
+ /// Get list of column names.
+ TVector<TString> GetNames() const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Helper function to create new style type from old style one.
+NTi::TTypePtr ToTypeV3(EValueType type, bool required);
+
+///
+/// @brief Single column description
+///
+/// Each field describing column has setter and getter.
+///
+/// Example reading field:
+/// ```
+/// ... columnSchema.Name() ...
+/// ```
+///
+/// Example setting field:
+/// ```
+/// columnSchema.Name("my-column").Type(VT_INT64); // set name and type
+/// ```
+///
+/// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema
+class TColumnSchema
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TColumnSchema;
+ /// @endcond
+
+ ///
+ /// @brief Construct empty column schemas
+ ///
+ /// @note
+ /// Such schema cannot be used in schema as it it doesn't have name.
+ TColumnSchema();
+
+ ///
+ /// @{
+ ///
+ /// @brief Copy and move constructors are default.
+ TColumnSchema(const TColumnSchema&) = default;
+ TColumnSchema& operator=(const TColumnSchema&) = default;
+ /// @}
+
+
+ FLUENT_FIELD_ENCAPSULATED(TString, Name);
+
+ ///
+ /// @brief Functions to work with type in old manner.
+ ///
+ /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library.
+ TColumnSchema& Type(EValueType type) &;
+ TColumnSchema Type(EValueType type) &&;
+ EValueType Type() const;
+
+ /// @brief Set and get column type.
+ /// @{
+ TColumnSchema& Type(const NTi::TTypePtr& type) &;
+ TColumnSchema Type(const NTi::TTypePtr& type) &&;
+
+ TColumnSchema& TypeV3(const NTi::TTypePtr& type) &;
+ TColumnSchema TypeV3(const NTi::TTypePtr& type) &&;
+ NTi::TTypePtr TypeV3() const;
+ /// @}
+
+ ///
+ /// @brief Raw yson representation of column type
+ /// @deprecated Prefer to use `TypeV3` methods.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3);
+
+ /// Column sort order
+ FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder);
+
+ ///
+ /// @brief Lock group name
+ ///
+ /// @ref https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#blokirovka-stroki
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock);
+
+ /// Expression defining column value
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression);
+
+ /// Aggregating function name
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate);
+
+ ///
+ /// @brief Storage group name
+ ///
+ /// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group);
+
+ ///
+ /// @brief Column requiredness.
+ ///
+ /// Required columns doesn't accept NULL values.
+ /// Usually if column is required it means that it has Optional<...> type
+ bool Required() const;
+
+ ///
+ /// @{
+ ///
+ /// @brief Set type in old-style manner
+ TColumnSchema& Type(EValueType type, bool required) &;
+ TColumnSchema Type(EValueType type, bool required) &&;
+ /// @}
+
+private:
+ friend void Deserialize(TColumnSchema& columnSchema, const TNode& node);
+ NTi::TTypePtr TypeV3_;
+ bool Required_ = false;
+};
+
+/// Equality check checks all fields of column schema.
+bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs);
+
+///
+/// @brief Description of table schema
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema
+class TTableSchema
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TTableSchema;
+ /// @endcond
+
+ /// Column schema
+ FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column);
+
+ ///
+ /// @brief Strictness of the schema
+ ///
+ /// Strict schemas are not allowed to have columns not described in schema.
+ /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true);
+
+ ///
+ /// @brief Whether keys are unique
+ ///
+ /// This flag can be set only for schemas that have sorted columns.
+ /// If flag is set table cannot have multiple rows with same key.
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false);
+
+ /// Get modifiable column list
+ TVector<TColumnSchema>& MutableColumns();
+
+ /// Check if schema has any described column
+ [[nodiscard]] bool Empty() const;
+
+ /// Add column
+ TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&;
+
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&;
+
+ /// Add optional column of specified type
+ TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&;
+
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema& AddColumn(const TString& name, EValueType type) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, EValueType type) &&;
+
+ ///
+ /// @brief Make table schema sorted by specified columns
+ ///
+ /// Resets old key columns if any
+ TTableSchema& SortBy(const TSortColumns& columns) &;
+
+ /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&;
+ TTableSchema SortBy(const TSortColumns& columns) &&;
+
+ /// Get yson description of table schema
+ [[nodiscard]] TNode ToNode() const;
+
+ /// Parse schema from yson node
+ static NYT::TTableSchema FromNode(const TNode& node);
+
+ friend void Deserialize(TTableSchema& tableSchema, const TNode& node);
+};
+
+/// Check for equality of all columns and all schema attributes
+bool operator==(const TTableSchema& lhs, const TTableSchema& rhs);
+
+/// Create table schema by protobuf message descriptor
+TTableSchema CreateTableSchema(
+ const ::google::protobuf::Descriptor& messageDescriptor,
+ const TSortColumns& sortColumns = TSortColumns(),
+ bool keepFieldsWithoutExtension = true);
+
+/// Create table schema by protobuf message type
+template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>>
+inline TTableSchema CreateTableSchema(
+ const TSortColumns& sortColumns = TSortColumns(),
+ bool keepFieldsWithoutExtension = true)
+{
+ static_assert(
+ std::is_base_of_v<::google::protobuf::Message, TProtoType>,
+ "Template argument must be derived from ::google::protobuf::Message");
+
+ return CreateTableSchema(
+ *TProtoType::descriptor(),
+ sortColumns,
+ keepFieldsWithoutExtension);
+}
+
+///
+/// @brief Create strict table schema from `struct` type.
+///
+/// Names and types of columns are taken from struct member names and types.
+/// `Strict` flag is set to true, all other attribute of schema and columns
+/// are left with default values
+TTableSchema CreateTableSchema(NTi::TTypePtr type);
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Enumeration describing comparison operation used in key bound.
+///
+/// ERelation is a part of @ref NYT::TKeyBound that can be used as
+/// lower or upper key limit in @ref TReadLimit.
+///
+/// Relations `Less` and `LessOrEqual` are for upper limit and
+/// relations `Greater` and `GreaterOrEqual` are for lower limit.
+///
+/// It is a error to use relation in the limit of wrong kind.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+enum class ERelation
+{
+ ///
+ /// @brief Relation "less"
+ ///
+ /// Specifies range of keys that are before specified key.
+ /// Can only be used in upper limit.
+ Less /* "<" */,
+
+ ///
+ /// @brief Relation "less or equal"
+ ///
+ /// Specifies range of keys that are before or equal specified key.
+ /// Can only be used in upper limit.
+ LessOrEqual /* "<=" */,
+
+ ///
+ /// @brief Relation "greater"
+ ///
+ /// Specifies range of keys that are after specified key.
+ /// Can only be used in lower limit.
+ Greater /* ">" */,
+
+ ///
+ /// @brief Relation "greater or equal"
+ ///
+ /// Specifies range of keys that are after or equal than specified key.
+ /// Can only be used in lower limit.
+ GreaterOrEqual /* ">=" */,
+};
+
+///
+/// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TKeyBound
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TKeyBound;
+
+ explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{});
+
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less);
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{});
+ /// @endcond
+};
+
+///
+/// @brief Description of the read limit.
+///
+/// It is actually a variant and must store exactly one field.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TReadLimit
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TReadLimit;
+ /// @endcond
+
+ ///
+ /// @brief KeyBound specifies table key and whether to include it
+ ///
+ /// It can be used in lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(TKeyBound, KeyBound);
+
+ ///
+ /// @brief Table key
+ ///
+ /// It can be used in exact, lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(TKey, Key);
+
+ ///
+ /// @brief Row index
+ ///
+ /// It can be used in exact, lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(i64, RowIndex);
+
+ ///
+ /// @brief File offset
+ ///
+ /// It can be used in lower or upper limit when reading files.
+ FLUENT_FIELD_OPTION(i64, Offset);
+
+ ///
+ /// @brief Tablet index
+ ///
+ /// It can be used in lower or upper limit in dynamic table operations
+ FLUENT_FIELD_OPTION(i64, TabletIndex);
+};
+
+///
+/// @brief Range of a table or a file
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TReadRange
+{
+ using TSelf = TReadRange;
+
+ ///
+ /// @brief Lower limit of the range
+ ///
+ /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used).
+ FLUENT_FIELD(TReadLimit, LowerLimit);
+
+ ///
+ /// @brief Lower limit of the range
+ ///
+ /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used).
+ FLUENT_FIELD(TReadLimit, UpperLimit);
+
+ /// Exact key or row index.
+ FLUENT_FIELD(TReadLimit, Exact);
+
+ /// Create read range from row indexes.
+ static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit)
+ {
+ return TReadRange()
+ .LowerLimit(TReadLimit().RowIndex(lowerLimit))
+ .UpperLimit(TReadLimit().RowIndex(upperLimit));
+ }
+
+ /// Create read range from keys.
+ static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive)
+ {
+ return TReadRange()
+ .LowerLimit(TReadLimit().Key(lowerKeyInclusive))
+ .UpperLimit(TReadLimit().Key(upperKeyExclusive));
+ }
+};
+
+///
+/// @brief Path with additional attributes.
+///
+/// Allows to specify additional attributes for path used in some operations.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TRichYPath
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TRichYPath;
+ /// @endcond
+
+ /// Path itself.
+ FLUENT_FIELD(TYPath, Path);
+
+ /// Specifies that path should be appended not overwritten
+ FLUENT_FIELD_OPTION(bool, Append);
+
+ /// @deprecated Deprecated attribute.
+ FLUENT_FIELD_OPTION(bool, PartiallySorted);
+
+ /// Specifies that path is expected to be sorted by these columns.
+ FLUENT_FIELD(TSortColumns, SortedBy);
+
+ /// Add range to read.
+ TRichYPath& AddRange(TReadRange range)
+ {
+ if (!Ranges_) {
+ Ranges_.ConstructInPlace();
+ }
+ Ranges_->push_back(std::move(range));
+ return *this;
+ }
+
+ TRichYPath& ResetRanges()
+ {
+ Ranges_.Clear();
+ return *this;
+ }
+
+ ///
+ /// @{
+ ///
+ /// Return ranges to read.
+ ///
+ /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges.
+ /// Nothing represents universal range (reader reads all table rows).
+ /// Empty TVector represents empty range (reader returns empty set of rows).
+ const TMaybe<TVector<TReadRange>>& GetRanges() const
+ {
+ return Ranges_;
+ }
+
+ TMaybe<TVector<TReadRange>>& MutableRanges()
+ {
+ return Ranges_;
+ }
+
+ ///
+ /// @{
+ ///
+ /// Get range view, that is convenient way to iterate through all ranges.
+ TArrayRef<TReadRange> MutableRangesView()
+ {
+ if (Ranges_.Defined()) {
+ return TArrayRef(Ranges_->data(), Ranges_->size());
+ } else {
+ return {};
+ }
+ }
+
+ TArrayRef<const TReadRange> GetRangesView() const
+ {
+ if (Ranges_.Defined()) {
+ return TArrayRef(Ranges_->data(), Ranges_->size());
+ } else {
+ return {};
+ }
+ }
+ /// @}
+
+ /// @{
+ ///
+ /// Get range by index.
+ const TReadRange& GetRange(ssize_t i) const
+ {
+ return Ranges_.GetRef()[i];
+ }
+
+ TReadRange& MutableRange(ssize_t i)
+ {
+ return Ranges_.GetRef()[i];
+ }
+ /// @}
+
+ ///
+ /// @brief Specifies columns that should be read.
+ ///
+ /// If it's set to Nothing then all columns will be read.
+ /// If empty TColumnNames is specified then each read row will be empty.
+ FLUENT_FIELD_OPTION(TColumnNames, Columns);
+
+ FLUENT_FIELD_OPTION(bool, Teleport);
+ FLUENT_FIELD_OPTION(bool, Primary);
+ FLUENT_FIELD_OPTION(bool, Foreign);
+ FLUENT_FIELD_OPTION(i64, RowCountLimit);
+
+ FLUENT_FIELD_OPTION(TString, FileName);
+
+ /// Specifies original path to be shown in Web UI
+ FLUENT_FIELD_OPTION(TYPath, OriginalPath);
+
+ ///
+ /// @brief Specifies that this path points to executable file
+ ///
+ /// Used in operation specs.
+ FLUENT_FIELD_OPTION(bool, Executable);
+
+ ///
+ /// @brief Specify format to use when loading table.
+ ///
+ /// Used in operation specs.
+ FLUENT_FIELD_OPTION(TNode, Format);
+
+ /// @brief Specifies table schema that will be set on the path
+ FLUENT_FIELD_OPTION(TTableSchema, Schema);
+
+ /// Specifies compression codec that will be set on the path
+ FLUENT_FIELD_OPTION(TString, CompressionCodec);
+
+ /// Specifies erasure codec that will be set on the path
+ FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec);
+
+ /// Specifies schema modification that will be set on the path
+ FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification);
+
+ /// Specifies optimize_for attribute that will be set on the path
+ FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor);
+
+ ///
+ /// @brief Do not put file used in operation into node cache
+ ///
+ /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node.
+ /// It helps jobs that use tmpfs to start faster,
+ /// because files will be loaded into tmpfs directly bypassing disk cache
+ FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
+
+ ///
+ /// @brief Timestamp of dynamic table.
+ ///
+ /// NOTE: it is _not_ unix timestamp
+ /// (instead it's transaction timestamp, that is more complex structure).
+ FLUENT_FIELD_OPTION(i64, Timestamp);
+
+ ///
+ /// @brief Specify transaction that should be used to access this path.
+ ///
+ /// Allows to start cross-transactional operations.
+ FLUENT_FIELD_OPTION(TTransactionId, TransactionId);
+
+ using TRenameColumnsDescriptor = THashMap<TString, TString>;
+
+ /// Specifies columnar mapping which will be applied to columns before transfer to job.
+ FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
+
+ /// Create empty path with no attributes
+ TRichYPath()
+ { }
+
+ ///
+ /// @{
+ ///
+ /// @brief Create path from string
+ TRichYPath(const char* path)
+ : Path_(path)
+ { }
+
+ TRichYPath(const TYPath& path)
+ : Path_(path)
+ { }
+ /// @}
+
+private:
+ TMaybe<TVector<TReadRange>> Ranges_;
+};
+
+///
+/// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message.
+///
+///
+template <typename TProtoType>
+TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
+{
+ static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message");
+
+ auto schemedPath = path;
+ if (!schemedPath.Schema_) {
+ schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy));
+ }
+ return schemedPath;
+}
+
+///
+/// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible.
+///
+/// If TRowType is protobuf message schema is derived from it and set to returned path.
+/// Otherwise schema of original path is left unchanged (and probably unset).
+template <typename TRowType>
+TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
+{
+ if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) {
+ return WithSchema<TRowType>(path, sortBy);
+ } else {
+ return path;
+ }
+}
+
+///
+/// @brief Get the list of ranges related to path in compatibility mode.
+///
+/// - If path is missing ranges, empty list is returned.
+/// - If path has associated range list and the list is not empty, function returns this list.
+/// - If path has associated range list and this list is empty, exception is thrown.
+///
+/// Before YT-17683 RichYPath didn't support empty range list and empty range actualy meant universal range.
+/// This function emulates this old behavior.
+///
+/// @see https://st.yandex-team.ru/YT-17683
+const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path);
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Statistics about table columns.
+struct TTableColumnarStatistics
+{
+ /// Total data weight for all chunks for each of requested columns.
+ THashMap<TString, i64> ColumnDataWeight;
+
+ /// Total weight of all old chunks that don't keep columnar statistics.
+ i64 LegacyChunksDataWeight = 0;
+
+ /// Timestamps total weight (only for dynamic tables).
+ TMaybe<i64> TimestampTotalWeight;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Description of a partition.
+struct TMultiTablePartition
+{
+ struct TStatistics
+ {
+ i64 ChunkCount = 0;
+ i64 DataWeight = 0;
+ i64 RowCount = 0;
+ };
+
+ /// Ranges of input tables for this partition.
+ TVector<TRichYPath> TableRanges;
+
+ /// Aggregate statistics of all the table ranges in the partition.
+ TStatistics AggregateStatistics;
+};
+
+/// Table partitions from GetTablePartitions command.
+struct TMultiTablePartitions
+{
+ /// Disjoint partitions into which the input tables were divided.
+ TVector<TMultiTablePartition> Partitions;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Contains information about tablet
+///
+/// @see NYT::IClient::GetTabletInfos
+struct TTabletInfo
+{
+ ///
+ /// @brief Indicates the total number of rows added to the tablet (including trimmed ones).
+ ///
+ /// Currently only provided for ordered tablets.
+ i64 TotalRowCount = 0;
+
+ ///
+ /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible.
+ ///
+ /// Only makes sense for ordered tablet.
+ i64 TrimmedRowCount = 0;
+
+ ///
+ /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp
+ ///
+ /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed;
+ /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount).
+ /// Mostly makes sense for ordered tablets.
+ ui64 BarrierTimestamp;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get
+struct TAttributeFilter
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAttributeFilter;
+ /// @endcond
+
+ /// List of attributes.
+ FLUENT_VECTOR_FIELD(TString, Attribute);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Check if none of the fields of @ref NYT::TReadLimit is set.
+///
+/// @return true if any field of readLimit is set and false otherwise.
+bool IsTrivial(const TReadLimit& readLimit);
+
+/// Convert yson node type to table schema type
+EValueType NodeTypeToValueType(TNode::EType nodeType);
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Enumeration for specifying how reading from master is performed.
+///
+/// Used in operations like NYT::ICypressClient::Get
+enum class EMasterReadKind : int
+{
+ ///
+ /// @brief Reading from leader.
+ ///
+ /// Should almost never be used since it's expensive and for regular uses has no difference from
+ /// "follower" read.
+ Leader /* "leader" */,
+
+ /// @brief Reading from master follower (default).
+ Follower /* "follower" */,
+ Cache /* "cache" */,
+ MasterCache /* "master_cache" */,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @cond Doxygen_Suppress
+namespace NDetail {
+
+// MUST NOT BE USED BY CLIENTS
+// TODO: we should use default GENERATE_ENUM_SERIALIZATION
+TString ToString(EValueType type);
+
+} // namespace NDetail
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT