1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
|
#ifndef SIMDJSON_SRC_ICELAKE_CPP
#define SIMDJSON_SRC_ICELAKE_CPP
#ifndef SIMDJSON_CONDITIONAL_INCLUDE
#include <base.h>
#endif // SIMDJSON_CONDITIONAL_INCLUDE
#include <simdjson/icelake.h>
#include <simdjson/icelake/implementation.h>
// defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write
#define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER
#include <simdjson/icelake/begin.h>
#include <generic/amalgamated.h>
#include <generic/stage1/amalgamated.h>
#include <generic/stage2/amalgamated.h>
#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER
//
// Stage 1
//
namespace simdjson {
namespace icelake {
simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
if (auto err = dst->set_capacity(capacity))
return err;
if (auto err = dst->set_max_depth(max_depth))
return err;
return SUCCESS;
}
namespace {
using namespace simd;
// This identifies structural characters (comma, colon, braces, brackets),
// and ASCII white-space ('\r','\n','\t',' ').
simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
// we can't use the generic lookup_16.
const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
// The 6 operators (:,[]{}) have these values:
//
// , 2C
// : 3A
// [ 5B
// { 7B
// ] 5D
// } 7D
//
// If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
// We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
// match it (against | 0x20).
//
// To prevent recognizing other characters, everything else gets compared with 0, which cannot
// match due to the | 0x20.
//
// NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
// and :. This gets caught in stage 2, which checks the actual character to ensure the right
// operators are in the right places.
const auto op_table = simd8<uint8_t>::repeat_16(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D
);
// We compute whitespace and op separately. If later code only uses one or the
// other, given the fact that all functions are aggressively inlined, we can
// hope that useless computations will be omitted. This is namely case when
// minifying (we only need whitespace).
const uint64_t whitespace = in.eq({
_mm512_shuffle_epi8(whitespace_table, in.chunks[0])
});
// Turn [ and ] into { and }
const simd8x64<uint8_t> curlified{
in.chunks[0] | 0x20
};
const uint64_t op = curlified.eq({
_mm512_shuffle_epi8(op_table, in.chunks[0])
});
return { whitespace, op };
}
simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
return input.reduce_or().is_ascii();
}
simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}
} // unnamed namespace
} // namespace icelake
} // namespace simdjson
/**
* We provide a custom version of bit_indexer::write using
* naked intrinsics.
* TODO: make this code more elegant.
*/
// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'.
// as a workaround, we disable warnings within the following function.
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
namespace simdjson { namespace icelake { namespace { namespace stage1 {
simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0) { return; }
const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32(
0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130,
0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120,
0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110,
0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100
));
const __m512i start_index = _mm512_set1_epi32(idx);
const auto count = count_ones(bits);
__m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes));
_mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index));
if(count > 16) {
const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1));
_mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index));
if(count > 32) {
const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2));
_mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index));
if(count > 48) {
const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3));
_mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index));
}
}
}
this->tail += count;
}
}}}}
SIMDJSON_POP_DISABLE_WARNINGS
//
// Stage 2
//
//
// Implementation-specific overrides
//
namespace simdjson {
namespace icelake {
simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
this->buf = _buf;
this->len = _len;
return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
}
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return icelake::stage1::generic_validate_utf8(buf,len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<false>(*this, _doc);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<true>(*this, _doc);
}
simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept {
return icelake::stringparsing::parse_string(src, dst, replacement_char);
}
simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept {
return icelake::stringparsing::parse_wobbly_string(src, dst);
}
simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, stage1_mode::regular);
if (error) { return error; }
return stage2(_doc);
}
} // namespace icelake
} // namespace simdjson
#include <simdjson/icelake/end.h>
#endif // SIMDJSON_SRC_ICELAKE_CPP
|