aboutsummaryrefslogtreecommitdiffstats
path: root/yt/cpp/mapreduce/interface/format.h
blob: 920b9589a2fecac426493a5f53bf11f36620588d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#pragma once

///
/// @file yt/cpp/mapreduce/interface/format.h
///
/// Header containing class to work with raw [YT formats](https://ytsaurus.tech/docs/en/user-guide/storage/formats).

#include "node.h"

#include <google/protobuf/descriptor.h>

namespace NYT {

////////////////////////////////////////////////////////////////////////////////

/// @deprecated
struct TYamredDsvAttributes
{
    /// Names of key columns.
    TVector<TString> KeyColumnNames;

    /// Names of subkey columns.
    TVector<TString> SubkeyColumnNames;
};

////////////////////////////////////////////////////////////////////////////////

/// @brief Class representing YT data format.
///
/// Normally the user does not need to use it.
/// However, the class is handy for "raw" operations and table reading and writing,
/// e.g. @ref NYT::IOperationClient::RawMap and other raw operations,
/// @ref NYT::IIOClient::CreateRawReader and @ref NYT::IIOClient::CreateRawWriter.
/// Anyway, the static factory methods should be preferred to the constructor.
///
/// @see [YT doc](https://ytsaurus.tech/docs/en/user-guide/storage/formats).
struct TFormat
{
public:
    /// Format representation understandable by YT.
    TNode Config;

public:
    /// @brief Construct format from given YT format representation.
    ///
    /// @note Prefer using static factory methods (e.g. @ref NYT::TFormat::YsonBinary, @ref NYT::TFormat::YsonText, @ref NYT::TFormat::Protobuf).
    explicit TFormat(const TNode& config = TNode());

    /// @brief Create text YSON format.
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/user-guide/storage/formats#yson)
    static TFormat YsonText();

    /// @brief Create binary YSON format.
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/user-guide/storage/formats#yson)
    static TFormat YsonBinary();

    /// @brief Create YaMR format.
    ///
    /// @deprecated
    static TFormat YaMRLenval();

    /// @brief Create protobuf format from protobuf message descriptors.
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/api/c++/protobuf.html).
    static TFormat Protobuf(
        const TVector<const ::google::protobuf::Descriptor*>& descriptors,
        bool withDescriptors = false);

    /// @brief Create JSON format.
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/user-guide/storage/formats#json)
    static TFormat Json();

    /// @brief Create DSV (TSKV) format.
    ///
    /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#DSV)
    static TFormat Dsv();

    /// @brief Create protobuf format for the message specified in template parameter.
    ///
    /// `T` must be inherited from `Message`.
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/api/c++/protobuf.html).
    template<typename T>
    static inline TFormat Protobuf(bool withDescriptors = false);

    /// @brief Is the format text YSON?
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/user-guide/storage/formats#yson)
    bool IsTextYson() const;

    /// @brief Is the format protobuf?
    ///
    /// @see [the doc](https://ytsaurus.tech/docs/en/api/c++/protobuf.html)
    bool IsProtobuf() const;

    /// @brief Is the format YaMR?
    ///
    /// @deprecated
    bool IsYamredDsv() const;

    /// @brief For YAMR format returns its attributes in structured way.
    ///
    /// @deprecated
    TYamredDsvAttributes GetYamredDsvAttributes() const;
};

////////////////////////////////////////////////////////////////////////////////

template<typename T>
TFormat TFormat::Protobuf(bool withDescriptors) {
    return TFormat::Protobuf({T::descriptor()}, withDescriptors);
}

/// @brief Create table schema from protobuf message descriptor.
///
/// @param messageDescriptor Message descriptor
/// @param keepFieldsWithoutExtension Add to schema fields without "column_name" or "key_column_name" extensions.
TTableSchema CreateTableSchema(
    const ::google::protobuf::Descriptor& messageDescriptor,
    bool keepFieldsWithoutExtension);

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT