1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
#include <sstream>
#include "exp.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
namespace YAML {
struct Mark;
} // namespace YAML
namespace YAML {
namespace Exp {
unsigned ParseHex(const std::string& str, const Mark& mark) {
unsigned value = 0;
for (char ch : str) {
int digit = 0;
if ('a' <= ch && ch <= 'f')
digit = ch - 'a' + 10;
else if ('A' <= ch && ch <= 'F')
digit = ch - 'A' + 10;
else if ('0' <= ch && ch <= '9')
digit = ch - '0';
else
throw ParserException(mark, ErrorMsg::INVALID_HEX);
value = (value << 4) + digit;
}
return value;
}
std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns
// the result.
// . Throws if it's not actually hex.
std::string Escape(Stream& in, int codeLength) {
// grab string
std::string str;
for (int i = 0; i < codeLength; i++)
str += in.get();
// get the value
unsigned value = ParseHex(str, in.mark());
// legal unicode?
if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
std::stringstream msg;
msg << ErrorMsg::INVALID_UNICODE << value;
throw ParserException(in.mark(), msg.str());
}
// now break it up into chars
if (value <= 0x7F)
return Str(value);
if (value <= 0x7FF)
return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
if (value <= 0xFFFF)
return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
Str(0x80 + (value & 0x3F));
return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
}
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\' or single
// quote)
// and returns the result.
// . Throws if it's an unknown escape character.
std::string Escape(Stream& in) {
// eat slash
char escape = in.get();
// switch on escape character
char ch = in.get();
// first do single quote, since it's easier
if (escape == '\'' && ch == '\'')
return "\'";
// now do the slash (we're not gonna check if it's a slash - you better pass
// one!)
switch (ch) {
case '0':
return std::string(1, '\x00');
case 'a':
return "\x07";
case 'b':
return "\x08";
case 't':
case '\t':
return "\x09";
case 'n':
return "\x0A";
case 'v':
return "\x0B";
case 'f':
return "\x0C";
case 'r':
return "\x0D";
case 'e':
return "\x1B";
case ' ':
return R"( )";
case '\"':
return "\"";
case '\'':
return "\'";
case '\\':
return "\\";
case '/':
return "/";
case 'N':
return "\x85";
case '_':
return "\xA0";
case 'L':
return "\xE2\x80\xA8"; // LS (#x2028)
case 'P':
return "\xE2\x80\xA9"; // PS (#x2029)
case 'x':
return Escape(in, 2);
case 'u':
return Escape(in, 4);
case 'U':
return Escape(in, 8);
}
std::stringstream msg;
throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
}
} // namespace Exp
} // namespace YAML
|