aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/string/format_analyser.h
blob: 9f194144dcb685a2b0e6cc96aa85aab45efdcee4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#pragma once

#include "format_arg.h"

#include <util/generic/strbuf.h>

#include <algorithm>
#include <array>
#include <string_view>

namespace NYT::NDetail {

////////////////////////////////////////////////////////////////////////////////

struct TFormatAnalyser
{
public:
    using TMarkerLocation = std::tuple<int, int>;
    // NB(arkady-e1ppa): Location is considered invalid (e.g. not filled)
    // if get<0> == get<1> == 0.
    template <class... TArgs>
    using TMarkerLocations = std::array<TMarkerLocation, sizeof...(TArgs)>;
    // NB(arkady-e1ppa): We can't cover all of them since that would require
    // dynamic storage for their coordinates and we do not have
    // constexpr context large enough to deallocate dynamic memory at the
    // correct time. Thus we store first 5 position and scanning afterwards
    // is pessimized. |-1| is for no position at all.
    // |-2| is used to imply runtime format.
    using TEscapeLocations = std::array<int, 5>;

    // TODO(arkady-e1ppa): Until clang-19 consteval functions
    // defined out of line produce symbols in rare cases
    // causing linker to crash.
    template <class... TArgs>
    static consteval auto AnalyzeFormat(std::string_view fmt)
    {
        return DoAnalyzeFormat<TArgs...>(fmt);
    }

private:
    static consteval bool Contains(std::string_view sv, char symbol)
    {
        return sv.find(symbol) != std::string_view::npos;
    }

    struct TSpecifiers
    {
        std::string_view Conversion;
        std::string_view Flags;
    };

    template <class TArg>
    static consteval auto GetSpecifiers()
    {
        return TSpecifiers{
            .Conversion = std::string_view{
                std::data(TFormatArg<TArg>::ConversionSpecifiers),
                std::size(TFormatArg<TArg>::ConversionSpecifiers)},
            .Flags = std::string_view{
                std::data(TFormatArg<TArg>::FlagSpecifiers),
                std::size(TFormatArg<TArg>::FlagSpecifiers)},
        };
    }

    static constexpr char IntroductorySymbol = '%';

    template <class... TArgs>
    static consteval auto DoAnalyzeFormat(std::string_view format)
    {
        std::array<TSpecifiers, sizeof...(TArgs)> specifiers{GetSpecifiers<TArgs>()...};

        TMarkerLocations<TArgs...> markerLocations = {};
        TEscapeLocations escapeLocations = {};
        std::ranges::fill(escapeLocations, -1);

        int escapesCount = 0;

        int markerCount = 0;
        int currentMarkerStart = -1;

        for (int index = 0; index < std::ssize(format); ++index) {
            auto symbol = format[index];

            // Parse verbatim text.
            if (currentMarkerStart == -1) {
                if (symbol == IntroductorySymbol) {
                    // Marker maybe begins.
                    currentMarkerStart = index;
                }
                continue;
            }

            // NB: We check for %% first since
            // in order to verify if symbol is a specifier
            // we need markerCount to be within range of our
            // specifier array.
            if (symbol == IntroductorySymbol) {
                if (currentMarkerStart + 1 != index) {
                    // '%a% detected'
                    throw "You may not terminate flag sequence other than %% with \'%\' symbol";
                }
                // '%%' detected --- skip
                if (escapesCount < std::ssize(escapeLocations)) {
                    escapeLocations[escapesCount] = currentMarkerStart;
                    ++escapesCount;
                }

                currentMarkerStart = -1;
                continue;
            }

            // We are inside of marker.
            if (markerCount == std::ssize(markerLocations)) {
                // Too many markers
                throw "Number of arguments supplied to format is smaller than the number of flag sequences";
            }

            if (Contains(specifiers[markerCount].Conversion, symbol)) {
                // Marker has finished.

                markerLocations[markerCount]
                    = std::tuple{currentMarkerStart, index + 1};
                currentMarkerStart = -1;
                ++markerCount;

                continue;
            }

            if (!Contains(specifiers[markerCount].Flags, symbol)) {
                throw "Symbol is not a valid flag specifier; See FlagSpecifiers";
            }
        }

        if (currentMarkerStart != -1) {
            // Runaway marker.
            throw "Unterminated flag sequence detected; Use \'%%\' to type plain %";
        }

        if (markerCount < std::ssize(markerLocations)) {
            // Missing markers.
            throw "Number of arguments supplied to format is greater than the number of flag sequences";
        }

        // TODO(arkady-e1ppa): Consider per-type verification
        // of markers.
        return std::tuple(markerLocations, escapeLocations);
    }
};

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT::NDetail