blob: bbfb8848fd4cbdbd4e2878a66cd4dae6f2b56ef5 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
|
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/Interval.h"
#include "Exceptions.h"
#include "support/Utf8.h"
#include "UnbufferedCharStream.h"
using namespace antlrcpp;
using namespace antlr4;
using namespace antlr4::misc;
UnbufferedCharStream::UnbufferedCharStream(std::wistream &input)
: _p(0), _numMarkers(0), _lastChar(0), _lastCharBufferStart(0), _currentCharIndex(0), _input(input) {
// The vector's size is what used to be n in Java code.
fill(1); // prime
}
void UnbufferedCharStream::consume() {
if (LA(1) == EOF) {
throw IllegalStateException("cannot consume EOF");
}
// buf always has at least data[p==0] in this method due to ctor
_lastChar = _data[_p]; // track last char for LA(-1)
if (_p == _data.size() - 1 && _numMarkers == 0) {
size_t capacity = _data.capacity();
_data.clear();
_data.reserve(capacity);
_p = 0;
_lastCharBufferStart = _lastChar;
} else {
_p++;
}
_currentCharIndex++;
sync(1);
}
void UnbufferedCharStream::sync(size_t want) {
if (_p + want <= _data.size()) // Already enough data loaded?
return;
fill(_p + want - _data.size());
}
size_t UnbufferedCharStream::fill(size_t n) {
for (size_t i = 0; i < n; i++) {
if (_data.size() > 0 && _data.back() == 0xFFFF) {
return i;
}
try {
char32_t c = nextChar();
add(c);
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026
} catch (IOException &ioe) {
// throw_with_nested is not available before VS 2015.
throw ioe;
#else
} catch (IOException & /*ioe*/) {
std::throw_with_nested(RuntimeException());
#endif
}
}
return n;
}
char32_t UnbufferedCharStream::nextChar() {
return _input.get();
}
void UnbufferedCharStream::add(char32_t c) {
_data += c;
}
size_t UnbufferedCharStream::LA(ssize_t i) {
if (i == -1) { // special case
return _lastChar;
}
// We can look back only as many chars as we have buffered.
ssize_t index = static_cast<ssize_t>(_p) + i - 1;
if (index < 0) {
throw IndexOutOfBoundsException();
}
if (i > 0) {
sync(static_cast<size_t>(i)); // No need to sync if we look back.
}
if (static_cast<size_t>(index) >= _data.size()) {
return EOF;
}
if (_data[static_cast<size_t>(index)] == std::char_traits<wchar_t>::eof()) {
return EOF;
}
return _data[static_cast<size_t>(index)];
}
ssize_t UnbufferedCharStream::mark() {
if (_numMarkers == 0) {
_lastCharBufferStart = _lastChar;
}
ssize_t mark = -static_cast<ssize_t>(_numMarkers) - 1;
_numMarkers++;
return mark;
}
void UnbufferedCharStream::release(ssize_t marker) {
ssize_t expectedMark = -static_cast<ssize_t>(_numMarkers);
if (marker != expectedMark) {
throw IllegalStateException("release() called with an invalid marker.");
}
_numMarkers--;
if (_numMarkers == 0 && _p > 0) {
_data.erase(0, _p);
_p = 0;
_lastCharBufferStart = _lastChar;
}
}
size_t UnbufferedCharStream::index() {
return _currentCharIndex;
}
void UnbufferedCharStream::seek(size_t index) {
if (index == _currentCharIndex) {
return;
}
if (index > _currentCharIndex) {
sync(index - _currentCharIndex);
index = std::min(index, getBufferStartIndex() + _data.size() - 1);
}
// index == to bufferStartIndex should set p to 0
ssize_t i = static_cast<ssize_t>(index) - static_cast<ssize_t>(getBufferStartIndex());
if (i < 0) {
throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index));
} else if (i >= static_cast<ssize_t>(_data.size())) {
throw UnsupportedOperationException("Seek to index outside buffer: " + std::to_string(index) +
" not in " + std::to_string(getBufferStartIndex()) + ".." +
std::to_string(getBufferStartIndex() + _data.size()));
}
_p = static_cast<size_t>(i);
_currentCharIndex = index;
if (_p == 0) {
_lastChar = _lastCharBufferStart;
} else {
_lastChar = _data[_p - 1];
}
}
size_t UnbufferedCharStream::size() {
throw UnsupportedOperationException("Unbuffered stream cannot know its size");
}
std::string UnbufferedCharStream::getSourceName() const {
if (name.empty()) {
return UNKNOWN_SOURCE_NAME;
}
return name;
}
std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
if (interval.a < 0 || interval.b < interval.a - 1) {
throw IllegalArgumentException("invalid interval");
}
size_t bufferStartIndex = getBufferStartIndex();
if (!_data.empty() && _data.back() == 0xFFFF) {
if (interval.a + interval.length() > bufferStartIndex + _data.size()) {
throw IllegalArgumentException("the interval extends past the end of the stream");
}
}
if (interval.a < static_cast<ssize_t>(bufferStartIndex) || interval.b >= ssize_t(bufferStartIndex + _data.size())) {
throw UnsupportedOperationException("interval " + interval.toString() + " outside buffer: " +
std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _data.size() - 1));
}
// convert from absolute to local index
size_t i = interval.a - bufferStartIndex;
auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length()));
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}
std::string UnbufferedCharStream::toString() const {
throw UnsupportedOperationException("Unbuffered stream cannot be materialized to a string");
}
size_t UnbufferedCharStream::getBufferStartIndex() const {
return _currentCharIndex - _p;
}
|