aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/uri/encode.h
blob: aff7a76c4d7e3e88f3e79d1ee767142bbf2426dd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#pragma once

#include "common.h"

#include <util/stream/output.h>

namespace NUri {
    namespace NEncode {
#define CHAR_TYPE_NAME(f) _ECT##f
#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)

        enum ECharType {
            CHAR_TYPE_NAME(Digit),
            CHAR_TYPE_NAME(Lower),
            CHAR_TYPE_NAME(Upper),
            CHAR_TYPE_NAME(Unres),
            CHAR_TYPE_NAME(Stdrd),
        };

        enum ECharFlag {
            CHAR_TYPE_FLAG(Digit),
            CHAR_TYPE_FLAG(Lower),
            CHAR_TYPE_FLAG(Upper),
            CHAR_TYPE_FLAG(Unres),
            CHAR_TYPE_FLAG(Stdrd),
            // compound group flags
            ECGAlpha = ECFUpper | ECFLower,
            ECGAlnum = ECGAlpha | ECFDigit,
            ECGUnres = ECGAlnum | ECFUnres,
            ECGStdrd = ECGUnres | ECFStdrd,
        };

#undef CHAR_TYPE_NAME
#undef CHAR_TYPE_FLAG

        struct TCharFlags {
            ui32 TypeFlags;
            ui64 FeatFlags;
            ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed
            ui32 EncodeFld; // encode if shouldn't be treated as delimiter
            TCharFlags(ui64 feat = 0)
                : TypeFlags(0)
                , FeatFlags(feat)
                , DecodeFld(0)
                , EncodeFld(0)
            {
            }
            TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0)
                : TypeFlags(type)
                , FeatFlags(feat)
                , DecodeFld(decmask)
                , EncodeFld(encmask)
            {
            }
            TCharFlags& Add(const TCharFlags& val) {
                TypeFlags |= val.TypeFlags;
                FeatFlags |= val.FeatFlags;
                DecodeFld |= val.DecodeFld;
                EncodeFld |= val.EncodeFld;
                return *this;
            }
            bool IsAllowed(ui32 fldmask) const {
                return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask);
            }
            // should we decode an encoded character
            bool IsDecode(ui32 fldmask, ui64 flags) const;
        };

        class TEncodeMapperBase {
        protected:
            TEncodeMapperBase()
                : Flags(0)
                , FldMask(0)
                , Q_DecodeAny(false)
            {
            }
            TEncodeMapperBase(ui64 flags, TField::EField fld)
                : Flags(flags)
                , FldMask(1u << fld)
                , Q_DecodeAny(flags & TFeature::FeatureDecodeANY)
            {
            }

        public:
            bool Is(TField::EField fld) const {
                return FldMask & (1u << fld);
            }

        protected:
            const ui64 Flags;
            const ui32 FldMask;
            const bool Q_DecodeAny; // this is a special option for username/password
        };

        // maps a sym or hex character and indicates whether it has to be encoded
        class TEncodeMapper
           : public TEncodeMapperBase {
        public:
            TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
                : TEncodeMapperBase(flags, fld)
                , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus)
            {
            }
            // negative=sym, positive=hex, zero=maybesym
            int EncodeSym(unsigned char&) const;
            int EncodeHex(unsigned char&) const;

        protected:
            const bool Q_EncodeSpcAsPlus;
        };

        // indicates whether a character has to be encoded when copying to a field
        class TEncodeToMapper
           : public TEncodeMapperBase {
        public:
            TEncodeToMapper()
                : TEncodeMapperBase()
            {
            }
            TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
                : TEncodeMapperBase(flags, fld)
            {
            }
            bool Enabled() const {
                return 0 != FldMask;
            }
            bool Encode(unsigned char) const;
        };

        class TEncoder {
        public:
            TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper());

            ui64 ReEncode(const TStringBuf& url);
            ui64 ReEncode(const char* str, size_t len) {
                return ReEncode(TStringBuf(str, len));
            }

        protected:
            static bool IsType(unsigned char c, ui64 flags) {
                return GetFlags(c).TypeFlags & flags;
            }

        public:
            static bool IsDigit(unsigned char c) {
                return IsType(c, ECFDigit);
            }
            static bool IsUpper(unsigned char c) {
                return IsType(c, ECFUpper);
            }
            static bool IsLower(unsigned char c) {
                return IsType(c, ECFLower);
            }
            static bool IsAlpha(unsigned char c) {
                return IsType(c, ECGAlpha);
            }
            static bool IsAlnum(unsigned char c) {
                return IsType(c, ECGAlnum);
            }
            static bool IsUnres(unsigned char c) {
                return IsType(c, ECGUnres);
            }
            static const TCharFlags& GetFlags(unsigned char c) {
                return Grammar().Get(c);
            }

        public:
            // process an encoded string, decoding safe chars and encoding unsafe
            static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
                TEncoder(out, srcfld).ReEncode(val);
                return out;
            }
            static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) {
                TEncoder(out, srcfld, dstfld).ReEncode(val);
                return out;
            }

            // see also UrlUnescape() from string/quote.h
            static IOutputStream& Decode(
                IOutputStream& out, const TStringBuf& val, ui64 flags) {
                return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
            }

        public:
            // process a raw string or char, encode as needed
            static IOutputStream& Hex(IOutputStream& out, unsigned char val);
            static IOutputStream& Encode(IOutputStream& out, unsigned char val) {
                out << '%';
                return Hex(out, val);
            }
            static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
            static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);

            static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
            static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);

            static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
                return EncodeField(out, val, TField::FieldAllMAX);
            }

            static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
                return EncodeField(out, val, TField::FieldAllMAX, flags);
            }

        public:
            class TGrammar {
                TCharFlags Map_[256];

            public:
                TGrammar();
                const TCharFlags& Get(unsigned char ch) const {
                    return Map_[ch];
                }

                TCharFlags& GetMutable(unsigned char ch) {
                    return Map_[ch];
                }
                TCharFlags& Add(unsigned char ch, const TCharFlags& val) {
                    return GetMutable(ch).Add(val);
                }

                void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) {
                    for (unsigned i = lo; i <= hi; ++i)
                        Add(i, val);
                }
                void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
                    AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask));
                }

                void Add(const TStringBuf& set, const TCharFlags& val) {
                    for (size_t i = 0; i != set.length(); ++i)
                        Add(set[i], val);
                }
                void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
                    Add(set, TCharFlags(type, feat, decmask, encmask));
                }
            };

            static const TGrammar& Grammar();

        protected:
            IOutputStream& Out;
            const TEncodeMapper FldSrc;
            const TEncodeToMapper FldDst;
            ui64 OutFlags;
            int HexValue;

        protected:
            void HexReset() {
                HexValue = 0;
            }

            void HexDigit(char c) {
                HexAdd(c - '0');
            }
            void HexUpper(char c) {
                HexAdd(c - 'A' + 10);
            }
            void HexLower(char c) {
                HexAdd(c - 'a' + 10);
            }

            void HexAdd(int val) {
                HexValue <<= 4;
                HexValue += val;
            }

        protected:
            void DoSym(unsigned char ch) {
                const int res = FldSrc.EncodeSym(ch);
                Do(ch, res);
            }
            void DoHex(unsigned char ch) {
                const int res = FldSrc.EncodeHex(ch);
                Do(ch, res);
            }
            void DoHex() {
                DoHex(HexValue);
                HexValue = 0;
            }
            void Do(unsigned char, int);
        };
    }

    using TEncoder = NEncode::TEncoder;

}