1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
|
#pragma once
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the NumericLiteralParser, CharLiteralParser, and
// StringLiteralParser interfaces.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
#define LLVM_CLANG_LEX_LITERALSUPPORT_H
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
namespace clang {
class DiagnosticsEngine;
class Preprocessor;
class Token;
class SourceLocation;
class TargetInfo;
class SourceManager;
class LangOptions;
/// Copy characters from Input to Buf, expanding any UCNs.
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
/// NumericLiteralParser - This performs strict semantic analysis of the content
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
/// determines the radix of the value and can convert it to a useful value.
class NumericLiteralParser {
const SourceManager &SM;
const LangOptions &LangOpts;
DiagnosticsEngine &Diags;
const char *const ThisTokBegin;
const char *const ThisTokEnd;
const char *DigitsBegin, *SuffixBegin; // markers
const char *s; // cursor
unsigned radix;
bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
SmallString<32> UDSuffixBuf;
public:
NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
const SourceManager &SM, const LangOptions &LangOpts,
const TargetInfo &Target, DiagnosticsEngine &Diags);
bool hadError : 1;
bool isUnsigned : 1;
bool isLong : 1; // This is *not* set for long long.
bool isLongLong : 1;
bool isSizeT : 1; // 1z, 1uz (C++2b)
bool isHalf : 1; // 1.0h
bool isFloat : 1; // 1.0f
bool isImaginary : 1; // 1.0i
bool isFloat16 : 1; // 1.0f16
bool isFloat128 : 1; // 1.0q
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
bool isFixedPointLiteral() const {
return (saw_period || saw_exponent) && saw_fixed_point_suffix;
}
bool isIntegerLiteral() const {
return !saw_period && !saw_exponent && !isFixedPointLiteral();
}
bool isFloatingLiteral() const {
return (saw_period || saw_exponent) && !isFixedPointLiteral();
}
bool hasUDSuffix() const {
return saw_ud_suffix;
}
StringRef getUDSuffix() const {
assert(saw_ud_suffix);
return UDSuffixBuf;
}
unsigned getUDSuffixOffset() const {
assert(saw_ud_suffix);
return SuffixBegin - ThisTokBegin;
}
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
unsigned getRadix() const { return radix; }
/// GetIntegerValue - Convert this numeric literal value to an APInt that
/// matches Val's input width. If there is an overflow (i.e., if the unsigned
/// value read is larger than the APInt's bits will hold), set Val to the low
/// bits of the result and return true. Otherwise, return false.
bool GetIntegerValue(llvm::APInt &Val);
/// GetFloatValue - Convert this numeric literal to a floating value, using
/// the specified APFloat fltSemantics (specifying float, double, etc).
/// The optional bool isExact (passed-by-reference) has its value
/// set to true if the returned APFloat can represent the number in the
/// literal exactly, and false otherwise.
llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
/// GetFixedPointValue - Convert this numeric literal value into a
/// scaled integer that represents this value. Returns true if an overflow
/// occurred when calculating the integral part of the scaled integer or
/// calculating the digit sequence of the exponent.
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
private:
void ParseNumberStartingWithZero(SourceLocation TokLoc);
void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
static bool isDigitSeparator(char C) { return C == '\''; }
/// Determine whether the sequence of characters [Start, End) contains
/// any real digits (not digit separators).
bool containsDigits(const char *Start, const char *End) {
return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
}
enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
/// Ensure that we don't have a digit separator here.
void checkSeparator(SourceLocation TokLoc, const char *Pos,
CheckSeparatorKind IsAfterDigits);
/// SkipHexDigits - Read and skip over any hex digits, up to End.
/// Return a pointer to the first non-hex digit or End.
const char *SkipHexDigits(const char *ptr) {
while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
ptr++;
return ptr;
}
/// SkipOctalDigits - Read and skip over any octal digits, up to End.
/// Return a pointer to the first non-hex digit or End.
const char *SkipOctalDigits(const char *ptr) {
while (ptr != ThisTokEnd &&
((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
ptr++;
return ptr;
}
/// SkipDigits - Read and skip over any digits, up to End.
/// Return a pointer to the first non-hex digit or End.
const char *SkipDigits(const char *ptr) {
while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
ptr++;
return ptr;
}
/// SkipBinaryDigits - Read and skip over any binary digits, up to End.
/// Return a pointer to the first non-binary digit or End.
const char *SkipBinaryDigits(const char *ptr) {
while (ptr != ThisTokEnd &&
(*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
ptr++;
return ptr;
}
};
/// CharLiteralParser - Perform interpretation and semantic analysis of a
/// character literal.
class CharLiteralParser {
uint64_t Value;
tok::TokenKind Kind;
bool IsMultiChar;
bool HadError;
SmallString<32> UDSuffixBuf;
unsigned UDSuffixOffset;
public:
CharLiteralParser(const char *begin, const char *end,
SourceLocation Loc, Preprocessor &PP,
tok::TokenKind kind);
bool hadError() const { return HadError; }
bool isAscii() const { return Kind == tok::char_constant; }
bool isWide() const { return Kind == tok::wide_char_constant; }
bool isUTF8() const { return Kind == tok::utf8_char_constant; }
bool isUTF16() const { return Kind == tok::utf16_char_constant; }
bool isUTF32() const { return Kind == tok::utf32_char_constant; }
bool isMultiChar() const { return IsMultiChar; }
uint64_t getValue() const { return Value; }
StringRef getUDSuffix() const { return UDSuffixBuf; }
unsigned getUDSuffixOffset() const {
assert(!UDSuffixBuf.empty() && "no ud-suffix");
return UDSuffixOffset;
}
};
/// StringLiteralParser - This decodes string escape characters and performs
/// wide string analysis and Translation Phase #6 (concatenation of string
/// literals) (C99 5.1.1.2p1).
class StringLiteralParser {
const SourceManager &SM;
const LangOptions &Features;
const TargetInfo &Target;
DiagnosticsEngine *Diags;
unsigned MaxTokenLength;
unsigned SizeBound;
unsigned CharByteWidth;
tok::TokenKind Kind;
SmallString<512> ResultBuf;
char *ResultPtr; // cursor
SmallString<32> UDSuffixBuf;
unsigned UDSuffixToken;
unsigned UDSuffixOffset;
public:
StringLiteralParser(ArrayRef<Token> StringToks,
Preprocessor &PP);
StringLiteralParser(ArrayRef<Token> StringToks,
const SourceManager &sm, const LangOptions &features,
const TargetInfo &target,
DiagnosticsEngine *diags = nullptr)
: SM(sm), Features(features), Target(target), Diags(diags),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks);
}
bool hadError;
bool Pascal;
StringRef GetString() const {
return StringRef(ResultBuf.data(), GetStringLength());
}
unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
unsigned GetNumStringChars() const {
return GetStringLength() / CharByteWidth;
}
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
/// advancing over escape sequences in the string.
///
/// If the Diagnostics pointer is non-null, then this will do semantic
/// checking of the string literal and emit errors and warnings.
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
bool isAscii() const { return Kind == tok::string_literal; }
bool isWide() const { return Kind == tok::wide_string_literal; }
bool isUTF8() const { return Kind == tok::utf8_string_literal; }
bool isUTF16() const { return Kind == tok::utf16_string_literal; }
bool isUTF32() const { return Kind == tok::utf32_string_literal; }
bool isPascal() const { return Pascal; }
StringRef getUDSuffix() const { return UDSuffixBuf; }
/// Get the index of a token containing a ud-suffix.
unsigned getUDSuffixToken() const {
assert(!UDSuffixBuf.empty() && "no ud-suffix");
return UDSuffixToken;
}
/// Get the spelling offset of the first byte of the ud-suffix.
unsigned getUDSuffixOffset() const {
assert(!UDSuffixBuf.empty() && "no ud-suffix");
return UDSuffixOffset;
}
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
private:
void init(ArrayRef<Token> StringToks);
bool CopyStringFragment(const Token &Tok, const char *TokBegin,
StringRef Fragment);
void DiagnoseLexingError(SourceLocation Loc);
};
} // end namespace clang
#endif
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
|