aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2024-02-06 20:01:22 +0300
committerAlexander Smirnov <alex@ydb.tech>2024-02-09 19:18:27 +0300
commitee2b7fbda052aa09b6fdb83b8c6f0305fef3e193 (patch)
tree102765416c3866bde98a82facc7752d329ee0226 /contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp
parent7494ca32d3a5aca00b7ac527b5f127989335102c (diff)
downloadydb-ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193.tar.gz
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp')
-rw-r--r--contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp367
1 files changed, 367 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp b/contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp
new file mode 100644
index 0000000000..a8f40abdf8
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-rc/ResourceScriptToken.cpp
@@ -0,0 +1,367 @@
+//===-- ResourceScriptToken.cpp ---------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This file implements an interface defined in ResourceScriptToken.h.
+// In particular, it defines an .rc script tokenizer.
+//
+//===---------------------------------------------------------------------===//
+
+#include "ResourceScriptToken.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+#include <utility>
+
+using namespace llvm;
+
+using Kind = RCToken::Kind;
+
+// Checks if Representation is a correct description of an RC integer.
+// It should be a 32-bit unsigned integer, either decimal, octal (0[0-7]+),
+// or hexadecimal (0x[0-9a-f]+). It might be followed by a single 'L'
+// character (that is the difference between our representation and
+// StringRef's one). If Representation is correct, 'true' is returned and
+// the return value is put back in Num.
+static bool rcGetAsInteger(StringRef Representation, uint32_t &Num) {
+ size_t Length = Representation.size();
+ if (Length == 0)
+ return false;
+ // Strip the last 'L' if unnecessary.
+ if (std::toupper(Representation.back()) == 'L')
+ Representation = Representation.drop_back(1);
+
+ return !Representation.getAsInteger<uint32_t>(0, Num);
+}
+
+RCToken::RCToken(RCToken::Kind RCTokenKind, StringRef Value)
+ : TokenKind(RCTokenKind), TokenValue(Value) {}
+
+uint32_t RCToken::intValue() const {
+ assert(TokenKind == Kind::Int);
+ // We assume that the token already is a correct integer (checked by
+ // rcGetAsInteger).
+ uint32_t Result;
+ bool IsSuccess = rcGetAsInteger(TokenValue, Result);
+ assert(IsSuccess);
+ (void)IsSuccess; // Silence the compiler warning when -DNDEBUG flag is on.
+ return Result;
+}
+
+bool RCToken::isLongInt() const {
+ return TokenKind == Kind::Int && std::toupper(TokenValue.back()) == 'L';
+}
+
+StringRef RCToken::value() const { return TokenValue; }
+
+Kind RCToken::kind() const { return TokenKind; }
+
+bool RCToken::isBinaryOp() const {
+ switch (TokenKind) {
+ case Kind::Plus:
+ case Kind::Minus:
+ case Kind::Pipe:
+ case Kind::Amp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static Error getStringError(const Twine &message) {
+ return make_error<StringError>("Error parsing file: " + message,
+ inconvertibleErrorCode());
+}
+
+namespace {
+
+class Tokenizer {
+public:
+ Tokenizer(StringRef Input) : Data(Input), DataLength(Input.size()), Pos(0) {}
+
+ Expected<std::vector<RCToken>> run();
+
+private:
+ // All 'advancing' methods return boolean values; if they're equal to false,
+ // the stream has ended or failed.
+ bool advance(size_t Amount = 1);
+ bool skipWhitespaces();
+
+ // Consumes a token. If any problem occurred, a non-empty Error is returned.
+ Error consumeToken(const Kind TokenKind);
+
+ // Check if tokenizer is about to read FollowingChars.
+ bool willNowRead(StringRef FollowingChars) const;
+
+ // Check if tokenizer can start reading an identifier at current position.
+ // The original tool did non specify the rules to determine what is a correct
+ // identifier. We assume they should follow the C convention:
+ // [a-zA-Z_][a-zA-Z0-9_]*.
+ bool canStartIdentifier() const;
+ // Check if tokenizer can continue reading an identifier.
+ bool canContinueIdentifier() const;
+
+ // Check if tokenizer can start reading an integer.
+ // A correct integer always starts with a 0-9 digit,
+ // can contain characters 0-9A-Fa-f (digits),
+ // Ll (marking the integer is 32-bit), Xx (marking the representation
+ // is hexadecimal). As some kind of separator should come after the
+ // integer, we can consume the integer until a non-alphanumeric
+ // character.
+ bool canStartInt() const;
+ bool canContinueInt() const;
+
+ bool canStartString() const;
+
+ // Check if tokenizer can start reading a single line comment (e.g. a comment
+ // that begins with '//')
+ bool canStartLineComment() const;
+
+ // Check if tokenizer can start or finish reading a block comment (e.g. a
+ // comment that begins with '/*' and ends with '*/')
+ bool canStartBlockComment() const;
+
+ // Throw away all remaining characters on the current line.
+ void skipCurrentLine();
+
+ bool streamEof() const;
+
+ // Classify the token that is about to be read from the current position.
+ Kind classifyCurrentToken() const;
+
+ // Process the Kind::Identifier token - check if it is
+ // an identifier describing a block start or end.
+ void processIdentifier(RCToken &token) const;
+
+ StringRef Data;
+ size_t DataLength, Pos;
+};
+
+void Tokenizer::skipCurrentLine() {
+ Pos = Data.find_first_of("\r\n", Pos);
+ Pos = Data.find_first_not_of("\r\n", Pos);
+
+ if (Pos == StringRef::npos)
+ Pos = DataLength;
+}
+
+Expected<std::vector<RCToken>> Tokenizer::run() {
+ Pos = 0;
+ std::vector<RCToken> Result;
+
+ // Consume an optional UTF-8 Byte Order Mark.
+ if (willNowRead("\xef\xbb\xbf"))
+ advance(3);
+
+ while (!streamEof()) {
+ if (!skipWhitespaces())
+ break;
+
+ Kind TokenKind = classifyCurrentToken();
+ if (TokenKind == Kind::Invalid)
+ return getStringError("Invalid token found at position " + Twine(Pos));
+
+ const size_t TokenStart = Pos;
+ if (Error TokenError = consumeToken(TokenKind))
+ return std::move(TokenError);
+
+ // Comments are just deleted, don't bother saving them.
+ if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
+ continue;
+
+ RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
+ if (TokenKind == Kind::Identifier) {
+ processIdentifier(Token);
+ } else if (TokenKind == Kind::Int) {
+ uint32_t TokenInt;
+ if (!rcGetAsInteger(Token.value(), TokenInt)) {
+ // The integer has incorrect format or cannot be represented in
+ // a 32-bit integer.
+ return getStringError("Integer invalid or too large: " +
+ Token.value().str());
+ }
+ }
+
+ Result.push_back(Token);
+ }
+
+ return Result;
+}
+
+bool Tokenizer::advance(size_t Amount) {
+ Pos += Amount;
+ return !streamEof();
+}
+
+bool Tokenizer::skipWhitespaces() {
+ while (!streamEof() && isSpace(Data[Pos]))
+ advance();
+ return !streamEof();
+}
+
+Error Tokenizer::consumeToken(const Kind TokenKind) {
+ switch (TokenKind) {
+ // One-character token consumption.
+#define TOKEN(Name)
+#define SHORT_TOKEN(Name, Ch) case Kind::Name:
+#include "ResourceScriptTokenList.def"
+ advance();
+ return Error::success();
+
+ case Kind::LineComment:
+ advance(2);
+ skipCurrentLine();
+ return Error::success();
+
+ case Kind::StartComment: {
+ advance(2);
+ auto EndPos = Data.find("*/", Pos);
+ if (EndPos == StringRef::npos)
+ return getStringError(
+ "Unclosed multi-line comment beginning at position " + Twine(Pos));
+ advance(EndPos - Pos);
+ advance(2);
+ return Error::success();
+ }
+ case Kind::Identifier:
+ while (!streamEof() && canContinueIdentifier())
+ advance();
+ return Error::success();
+
+ case Kind::Int:
+ while (!streamEof() && canContinueInt())
+ advance();
+ return Error::success();
+
+ case Kind::String:
+ // Consume the preceding 'L', if there is any.
+ if (std::toupper(Data[Pos]) == 'L')
+ advance();
+ // Consume the double-quote.
+ advance();
+
+ // Consume the characters until the end of the file, line or string.
+ while (true) {
+ if (streamEof()) {
+ return getStringError("Unterminated string literal.");
+ } else if (Data[Pos] == '"') {
+ // Consume the ending double-quote.
+ advance();
+ // However, if another '"' follows this double-quote, the string didn't
+ // end and we just included '"' into the string.
+ if (!willNowRead("\""))
+ return Error::success();
+ } else if (Data[Pos] == '\n') {
+ return getStringError("String literal not terminated in the line.");
+ }
+
+ advance();
+ }
+
+ case Kind::Invalid:
+ assert(false && "Cannot consume an invalid token.");
+ }
+
+ llvm_unreachable("Unknown RCToken::Kind");
+}
+
+bool Tokenizer::willNowRead(StringRef FollowingChars) const {
+ return Data.drop_front(Pos).startswith(FollowingChars);
+}
+
+bool Tokenizer::canStartIdentifier() const {
+ assert(!streamEof());
+
+ const char CurChar = Data[Pos];
+ return std::isalpha(CurChar) || CurChar == '_' || CurChar == '.';
+}
+
+bool Tokenizer::canContinueIdentifier() const {
+ assert(!streamEof());
+ const char CurChar = Data[Pos];
+ return std::isalnum(CurChar) || CurChar == '_' || CurChar == '.' ||
+ CurChar == '/' || CurChar == '\\' || CurChar == '-';
+}
+
+bool Tokenizer::canStartInt() const {
+ assert(!streamEof());
+ return std::isdigit(Data[Pos]);
+}
+
+bool Tokenizer::canStartBlockComment() const {
+ assert(!streamEof());
+ return Data.drop_front(Pos).startswith("/*");
+}
+
+bool Tokenizer::canStartLineComment() const {
+ assert(!streamEof());
+ return Data.drop_front(Pos).startswith("//");
+}
+
+bool Tokenizer::canContinueInt() const {
+ assert(!streamEof());
+ return std::isalnum(Data[Pos]);
+}
+
+bool Tokenizer::canStartString() const {
+ return willNowRead("\"") || willNowRead("L\"") || willNowRead("l\"");
+}
+
+bool Tokenizer::streamEof() const { return Pos == DataLength; }
+
+Kind Tokenizer::classifyCurrentToken() const {
+ if (canStartBlockComment())
+ return Kind::StartComment;
+ if (canStartLineComment())
+ return Kind::LineComment;
+
+ if (canStartInt())
+ return Kind::Int;
+ if (canStartString())
+ return Kind::String;
+ // BEGIN and END are at this point of lexing recognized as identifiers.
+ if (canStartIdentifier())
+ return Kind::Identifier;
+
+ const char CurChar = Data[Pos];
+
+ switch (CurChar) {
+ // One-character token classification.
+#define TOKEN(Name)
+#define SHORT_TOKEN(Name, Ch) \
+ case Ch: \
+ return Kind::Name;
+#include "ResourceScriptTokenList.def"
+
+ default:
+ return Kind::Invalid;
+ }
+}
+
+void Tokenizer::processIdentifier(RCToken &Token) const {
+ assert(Token.kind() == Kind::Identifier);
+ StringRef Name = Token.value();
+
+ if (Name.equals_insensitive("begin"))
+ Token = RCToken(Kind::BlockBegin, Name);
+ else if (Name.equals_insensitive("end"))
+ Token = RCToken(Kind::BlockEnd, Name);
+}
+
+} // anonymous namespace
+
+namespace llvm {
+
+Expected<std::vector<RCToken>> tokenizeRC(StringRef Input) {
+ return Tokenizer(Input).run();
+}
+
+} // namespace llvm