aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yson_pull/detail/cescape_decode.h
blob: 8e10cf3892208002fd20b2bfe84b51ef086aad35 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#pragma once

#include <util/system/types.h>

#include <algorithm>
#include <cstring>

namespace NYsonPull { 
    namespace NDetail { 
        namespace NCEscape { 
            namespace NImpl { 
                inline ui8 as_digit(ui8 c) { 
                    return c - ui8{'0'}; 
                } 

                inline ui8 as_hexdigit(ui8 c) { 
                    static constexpr ui8 hex_decode_map[256] = { 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 
                        255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
                        255, 255, 255, 255}; 

                    return hex_decode_map[c]; 
                } 

                inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) { 
                    auto digit = ui8{0}; 
                    while (n-- && (digit = as_digit(*p)) < 8) { 
                        result = result * 8 + digit; 
                        ++p; 
                    } 
                    return p; 
                } 

                inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) { 
                    auto digit = ui8{0}; 
                    while (n-- && (digit = as_hexdigit(*p)) < 16) { 
                        result = result * 16 + digit; 
                        ++p; 
                    } 
                    return p; 
                } 

                inline const ui8* unescape_char_and_advance( 
                    ui8& result, 
                    const ui8* p, 
                    const ui8* end) { 
                    switch (*p) { 
                        default: 
                            result = *p; 
                            ++p; 
                            break; 
                        case 'b': 
                            result = '\b'; 
                            ++p; 
                            break; 
                        case 'f': 
                            result = '\f'; 
                            ++p; 
                            break; 
                        case 'n': 
                            result = '\n'; 
                            ++p; 
                            break; 
                        case 'r': 
                            result = '\r'; 
                            ++p; 
                            break; 
                        case 't': 
                            result = '\t'; 
                            ++p; 
                            break; 

                        case 'x': { 
                            ++p; 
                            result = 0; 
                            auto* next = read_hex( 
                                result, 
                                p, std::min<ptrdiff_t>(2, end - p)); 
                            if (next > p) { 
                                p = next; 
                            } else { 
                                result = 'x'; 
                            } 
                        } break; 

                        case '0': 
                        case '1': 
                        case '2': 
                        case '3': 
                            result = 0; 
                            p = read_oct( 
                                result, 
                                p, std::min<ptrdiff_t>(3, end - p)); 
                            break; 

                        case '4': 
                        case '5': 
                        case '6': 
                        case '7': 
                            result = 0; 
                            p = read_oct( 
                                result, 
                                p, std::min<ptrdiff_t>(2, end - p)); 
                            break; 
                    } 
                    return p; 
                } 

                template <typename T, typename U> 
                inline void unescape_impl( 
                    const ui8* p, 
                    const ui8* end, 
                    T&& consume_one, 
                    U&& consume_span) { 
                    while (p < end) { 
                        auto* escaped = static_cast<const ui8*>( 
                            ::memchr(p, '\\', end - p)); 
                        if (escaped == nullptr) { 
                            consume_span(p, end - p); 
                            return; 
                        } else { 
                            consume_span(p, escaped - p); 
                            auto c = ui8{'\\'}; 
                            p = escaped + 1; 
                            if (p < end) { 
                                p = unescape_char_and_advance(c, p, end); 
                            } 
                            consume_one(c); 
                        } 
                    } 
                } 
            }
        }     // namespace NCEscape 
    }         // namespace NDetail 
}