1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/utf8_fix.h"
#include <google/protobuf/stubs/port.h>
#include <algorithm>
#include <cassert>
namespace protobuf_mutator {
namespace {
void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
while (--size) {
*(--e) = 0x80 | (code & 0x3F);
code >>= 6;
}
*(--e) = prefix | code;
}
char* FixCode(char* b, const char* e, RandomEngine* random) {
const char* start = b;
assert(b < e);
e = std::min<const char*>(e, b + 4);
char32_t c = *b++;
for (; b < e && (*b & 0xC0) == 0x80; ++b) {
c = (c << 6) + (*b & 0x3F);
}
uint8_t size = b - start;
switch (size) {
case 1:
c &= 0x7F;
StoreCode(b, c, size, 0);
break;
case 2:
c &= 0x7FF;
if (c < 0x80) {
// Use uint32_t because uniform_int_distribution does not support
// char32_t on Windows.
c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random);
}
StoreCode(b, c, size, 0xC0);
break;
case 3:
c &= 0xFFFF;
// [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
uint32_t halves = 0xE000 - 0xD800;
c = std::uniform_int_distribution<uint32_t>(0x800,
0xFFFF - halves)(*random);
if (c >= 0xD800) c += halves;
}
StoreCode(b, c, size, 0xE0);
break;
case 4:
c &= 0x1FFFFF;
if (c < 0x10000 || c > 0x10FFFF) {
c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random);
}
StoreCode(b, c, size, 0xF0);
break;
default:
assert(false && "Unexpected size of UTF-8 sequence");
}
return b;
}
} // namespace
void FixUtf8String(TProtoStringType* str, RandomEngine* random) {
if (str->empty()) return;
char* b = &(*str)[0];
const char* e = b + str->size();
while (b < e) {
b = FixCode(b, e, random);
}
}
} // namespace protobuf_mutator
|