aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yson_pull/detail/cescape_decode.h
blob: 2ee5dd950073f1ab20acc2d24f85e7b25f4dc479 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#pragma once

#include <util/system/types.h>

#include <algorithm>
#include <cstring>

namespace NYsonPull {
    namespace NDetail {
        namespace NCEscape {
            namespace NImpl {
                inline ui8 as_digit(ui8 c) {
                    return c - ui8{'0'};
                }

                inline ui8 as_hexdigit(ui8 c) {
                    static constexpr ui8 hex_decode_map[256] = {
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
                        255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                        255, 255, 255, 255};

                    return hex_decode_map[c];
                }

                inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) {
                    auto digit = ui8{0};
                    while (n-- && (digit = as_digit(*p)) < 8) {
                        result = result * 8 + digit;
                        ++p;
                    }
                    return p;
                }

                inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) {
                    auto digit = ui8{0};
                    while (n-- && (digit = as_hexdigit(*p)) < 16) {
                        result = result * 16 + digit;
                        ++p;
                    }
                    return p;
                }

                inline const ui8* unescape_char_and_advance(
                    ui8& result,
                    const ui8* p,
                    const ui8* end) {
                    switch (*p) {
                        default:
                            result = *p;
                            ++p;
                            break;
                        case 'b':
                            result = '\b';
                            ++p;
                            break;
                        case 'f':
                            result = '\f';
                            ++p;
                            break;
                        case 'n':
                            result = '\n';
                            ++p;
                            break;
                        case 'r':
                            result = '\r';
                            ++p;
                            break;
                        case 't':
                            result = '\t';
                            ++p;
                            break;

                        case 'x': {
                            ++p;
                            result = 0;
                            auto* next = read_hex(
                                result,
                                p, std::min<ptrdiff_t>(2, end - p));
                            if (next > p) {
                                p = next;
                            } else {
                                result = 'x';
                            }
                        } break;

                        case '0':
                        case '1':
                        case '2':
                        case '3':
                            result = 0;
                            p = read_oct(
                                result,
                                p, std::min<ptrdiff_t>(3, end - p));
                            break;

                        case '4':
                        case '5':
                        case '6':
                        case '7':
                            result = 0;
                            p = read_oct(
                                result,
                                p, std::min<ptrdiff_t>(2, end - p));
                            break;
                    }
                    return p;
                }

                template <typename T, typename U>
                inline void unescape_impl(
                    const ui8* p,
                    const ui8* end,
                    T&& consume_one,
                    U&& consume_span) {
                    while (p < end) {
                        auto* escaped = static_cast<const ui8*>(
                            ::memchr(p, '\\', end - p));
                        if (escaped == nullptr) {
                            consume_span(p, end - p);
                            return;
                        } else {
                            consume_span(p, escaped - p);
                            auto c = ui8{'\\'};
                            p = escaped + 1;
                            if (p < end) {
                                p = unescape_char_and_advance(c, p, end);
                            }
                            consume_one(c);
                        }
                    }
                }
            }
        }     // namespace NCEscape
    }         // namespace NDetail
}