diff options
| author | robot-piglet <[email protected]> | 2023-12-01 16:59:11 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2023-12-01 19:54:31 +0300 |
| commit | 3715aa9254f65ae1058290101351a72a6d3a67d4 (patch) | |
| tree | 9ac5a1cdab42dfc7cd095a06a362e0681cb1482f /contrib/libs/antlr4_cpp_runtime/src/support | |
| parent | b20a8c04fb7e595955ca9d1b943033342b6580cb (diff) | |
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/support')
14 files changed, 0 insertions, 1137 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp deleted file mode 100644 index a1ed50d4563..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp +++ /dev/null @@ -1,8 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#include "Any.h" - -using namespace antlrcpp; diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.h b/contrib/libs/antlr4_cpp_runtime/src/support/Any.h deleted file mode 100644 index fa5df58946e..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Any.h +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -// A standard C++ class loosely modeled after boost::Any. - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - using Any = std::any; - -} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp deleted file mode 100644 index b3c4f94f2fa..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#include "tree/ParseTree.h" -#include "Exceptions.h" - -#include "support/Arrays.h" - -using namespace antlrcpp; - -std::string Arrays::listToString(const std::vector<std::string> &list, const std::string &separator) -{ - std::stringstream ss; - bool firstEntry = true; - - ss << '['; - for (const auto &entry : list) { - ss << entry; - if (firstEntry) { - ss << separator; - firstEntry = false; - } - } - - ss << ']'; - return ss.str(); -} - -template <> -std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree*> &source) { - std::string result = "["; - bool firstEntry = true; - for (auto *value : source) { - result += value->toStringTree(); - if (firstEntry) { - result += ", "; - firstEntry = false; - } - } - return result + "]"; -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h deleted file mode 100644 index 04b852d9860..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - class ANTLR4CPP_PUBLIC Arrays { - public: - - static std::string listToString(const std::vector<std::string> &list, const std::string &separator); - - template <typename T> - static bool equals(const std::vector<T> &a, const std::vector<T> &b) { - if (a.size() != b.size()) - return false; - - for (size_t i = 0; i < a.size(); ++i) - if (!(a[i] == b[i])) - return false; - - return true; - } - - template <typename T> - static bool equals(const std::vector<T *> &a, const std::vector<T *> &b) { - if (a.size() != b.size()) - return false; - - for (size_t i = 0; i < a.size(); ++i) { - if (!a[i] && !b[i]) - continue; - if (!a[i] || !b[i]) - return false; - if (a[i] == b[i]) - continue; - - if (!(*a[i] == *b[i])) - return false; - } - - return true; - } - - template <typename T> - static bool equals(const std::vector<Ref<T>> &a, const std::vector<Ref<T>> &b) { - if (a.size() != b.size()) - return false; - - for (size_t i = 0; i < a.size(); ++i) { - if (!a[i] && !b[i]) - continue; - if (!a[i] || !b[i]) - return false; - if (a[i] == b[i]) - continue; - - if (!(*a[i] == *b[i])) - return false; - } - - return true; - } - - template <typename T> - static bool equals(const std::vector<std::unique_ptr<T>> &a, const std::vector<std::unique_ptr<T>> &b) { - if (a.size() != b.size()) - return false; - - for (size_t i = 0; i < a.size(); ++i) { - if (!a[i] && !b[i]) - continue; - if (!a[i] || !b[i]) - return false; - if (a[i] == b[i]) - continue; - - if (!(*a[i] == *b[i])) - return false; - } - - return true; - } - - template <typename T> - static std::string toString(const std::vector<T> &source) { - std::string result = "["; - bool firstEntry = true; - for (auto &value : source) { - result += value.toString(); - if (firstEntry) { - result += ", "; - firstEntry = false; - } - } - return result + "]"; - } - - template <typename T> - static std::string toString(const std::vector<Ref<T>> &source) { - std::string result = "["; - bool firstEntry = true; - for (auto &value : source) { - result += value->toString(); - if (firstEntry) { - result += ", "; - firstEntry = false; - } - } - return result + "]"; - } - - template <typename T> - static std::string toString(const std::vector<std::unique_ptr<T>> &source) { - std::string result = "["; - bool firstEntry = true; - for (auto &value : source) { - result += value->toString(); - if (firstEntry) { - result += ", "; - firstEntry = false; - } - } - return result + "]"; - } - - template <typename T> - static std::string toString(const std::vector<T *> &source) { - std::string result = "["; - bool firstEntry = true; - for (auto value : source) { - result += value->toString(); - if (firstEntry) { - result += ", "; - firstEntry = false; - } - } - return result + "]"; - } - - }; - - template <> - std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree *> &source); -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h b/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h deleted file mode 100644 index bb30364be08..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - class ANTLR4CPP_PUBLIC BitSet : public std::bitset<2048> { - public: - size_t nextSetBit(size_t pos) const { - for (size_t i = pos; i < size(); i++){ - if (test(i)) { - return i; - } - } - - return INVALID_INDEX; - } - - // Prints a list of every index for which the bitset contains a bit in true. - friend std::wostream& operator << (std::wostream& os, const BitSet& obj) - { - os << "{"; - size_t total = obj.count(); - for (size_t i = 0; i < obj.size(); i++){ - if (obj.test(i)){ - os << i; - --total; - if (total > 1){ - os << ", "; - } - } - } - - os << "}"; - return os; - } - - static std::string subStringRepresentation(const std::vector<BitSet>::iterator &begin, - const std::vector<BitSet>::iterator &end) { - std::string result; - std::vector<BitSet>::iterator vectorIterator; - - for (vectorIterator = begin; vectorIterator != end; vectorIterator++) { - result += vectorIterator->toString(); - } - // Grab the end - result += end->toString(); - - return result; - } - - std::string toString() const { - std::stringstream stream; - stream << "{"; - bool valueAdded = false; - for (size_t i = 0; i < size(); ++i){ - if (test(i)){ - if (valueAdded) { - stream << ", "; - } - stream << i; - valueAdded = true; - } - } - - stream << "}"; - return stream.str(); - } - - }; -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp deleted file mode 100644 index 95321b3dc17..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#include "support/CPPUtils.h" - -namespace antlrcpp { - - std::string join(const std::vector<std::string> &strings, const std::string &separator) { - std::string str; - bool firstItem = true; - for (const std::string &s : strings) { - if (!firstItem) { - str.append(separator); - } - firstItem = false; - str.append(s); - } - return str; - } - - std::map<std::string, size_t> toMap(const std::vector<std::string> &keys) { - std::map<std::string, size_t> result; - for (size_t i = 0; i < keys.size(); ++i) { - result.insert({ keys[i], i }); - } - return result; - } - - std::string escapeWhitespace(std::string str, bool escapeSpaces) { - std::string result; - for (auto c : str) { - switch (c) { - case '\n': - result += "\\n"; - break; - - case '\r': - result += "\\r"; - break; - - case '\t': - result += "\\t"; - break; - - case ' ': - if (escapeSpaces) { - result += "\u00B7"; - break; - } - result += c; - break; - - default: - result += c; - break; - } - } - - return result; - } - - std::string toHexString(const int t) { - std::stringstream stream; - stream << std::uppercase << std::hex << t; - return stream.str(); - } - - std::string arrayToString(const std::vector<std::string> &data) { - std::string answer; - size_t toReserve = 0; - for (const auto &sub : data) { - toReserve += sub.size(); - } - answer.reserve(toReserve); - for (const auto &sub: data) { - answer.append(sub); - } - return answer; - } - - std::string replaceString(const std::string &s, const std::string &from, const std::string &to) { - std::string::size_type p; - std::string ss, res; - - ss = s; - p = ss.find(from); - while (p != std::string::npos) { - if (p > 0) - res.append(ss.substr(0, p)).append(to); - else - res.append(to); - ss = ss.substr(p + from.size()); - p = ss.find(from); - } - res.append(ss); - - return res; - } - - std::vector<std::string> split(const std::string &s, const std::string &sep, int count) { - std::vector<std::string> parts; - std::string ss = s; - - std::string::size_type p; - - if (s.empty()) - return parts; - - if (count == 0) - count= -1; - - p = ss.find(sep); - while (!ss.empty() && p != std::string::npos && (count < 0 || count > 0)) { - parts.push_back(ss.substr(0, p)); - ss = ss.substr(p+sep.size()); - - --count; - p = ss.find(sep); - } - parts.push_back(ss); - - return parts; - } - - //-------------------------------------------------------------------------------------------------- - - // Debugging helper. Adds indentation to all lines in the given string. - std::string indent(const std::string &s, const std::string &indentation, bool includingFirst) { - std::vector<std::string> parts = split(s, "\n", -1); - for (size_t i = 0; i < parts.size(); ++i) { - if (i == 0 && !includingFirst) - continue; - parts[i].insert(0, indentation); - } - - return join(parts, "\n"); - } - - //-------------------------------------------------------------------------------------------------- - - // Recursively get the error from a, possibly nested, exception. -#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 - // No nested exceptions before VS 2015. - template <typename T> - std::exception_ptr get_nested(const T &/*e*/) { - try { - return nullptr; - } - catch (const std::bad_cast &) { - return nullptr; - } - } -#else - template <typename T> - std::exception_ptr get_nested(const T &e) { - try { - auto nested = dynamic_cast<const std::nested_exception&>(e); - return nested.nested_ptr(); - } - catch (const std::bad_cast &) { - return nullptr; - } - } -#endif - - std::string what(std::exception_ptr eptr) { - if (!eptr) { - throw std::bad_exception(); - } - - std::string result; - std::size_t nestCount = 0; - - next: { - try { - std::exception_ptr yeptr; - std::swap(eptr, yeptr); - std::rethrow_exception(yeptr); - } - catch (const std::exception &e) { - result += e.what(); - eptr = get_nested(e); - } - catch (const std::string &e) { - result += e; - } - catch (const char *e) { - result += e; - } - catch (...) { - result += "cannot be determined"; - } - - if (eptr) { - result += " ("; - ++nestCount; - goto next; - } - } - - result += std::string(nestCount, ')'); - return result; - } - -} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h deleted file mode 100644 index 2eb1a36037a..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - ANTLR4CPP_PUBLIC std::string join(const std::vector<std::string> &strings, const std::string &separator); - ANTLR4CPP_PUBLIC std::map<std::string, size_t> toMap(const std::vector<std::string> &keys); - ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string str, bool escapeSpaces); - ANTLR4CPP_PUBLIC std::string toHexString(const int t); - ANTLR4CPP_PUBLIC std::string arrayToString(const std::vector<std::string> &data); - ANTLR4CPP_PUBLIC std::string replaceString(const std::string &s, const std::string &from, const std::string &to); - ANTLR4CPP_PUBLIC std::vector<std::string> split(const std::string &s, const std::string &sep, int count); - ANTLR4CPP_PUBLIC std::string indent(const std::string &s, const std::string &indentation, bool includingFirst = true); - - // Using RAII + a lambda to implement a "finally" replacement. - template <typename OnEnd> - struct FinalAction { - FinalAction(OnEnd f) : _cleanUp { std::move(f) } {} - FinalAction(FinalAction &&other) : - _cleanUp(std::move(other._cleanUp)), _enabled(other._enabled) { - other._enabled = false; // Don't trigger the lambda after ownership has moved. - } - ~FinalAction() { if (_enabled) _cleanUp(); } - - void disable() { _enabled = false; } - private: - OnEnd _cleanUp; - bool _enabled {true}; - }; - - template <typename OnEnd> - FinalAction<OnEnd> finally(OnEnd f) { - return FinalAction<OnEnd>(std::move(f)); - } - - // Convenience functions to avoid lengthy dynamic_cast() != nullptr checks in many places. - template <typename T1, typename T2> - inline bool is(T2 *obj) { // For pointer types. - return dynamic_cast<typename std::add_const<T1>::type>(obj) != nullptr; - } - - template <typename T1, typename T2> - inline bool is(Ref<T2> const& obj) { // For shared pointers. - return dynamic_cast<T1 *>(obj.get()) != nullptr; - } - - template <typename T> - std::string toString(const T &o) { - std::stringstream ss; - // typeid gives the mangled class name, but that's all what's possible - // in a portable way. - ss << typeid(o).name() << "@" << std::hex << reinterpret_cast<uintptr_t>(&o); - return ss.str(); - } - - // Get the error text from an exception pointer or the current exception. - ANTLR4CPP_PUBLIC std::string what(std::exception_ptr eptr = std::current_exception()); - -} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h b/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h deleted file mode 100644 index 2ded955dcd7..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright (c) 2012-2021 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include <cassert> -#include <memory> -#include <type_traits> - -namespace antlrcpp { - - template <typename To, typename From> - To downCast(From* from) { - static_assert(std::is_pointer_v<To>, "Target type not a pointer."); - static_assert(std::is_base_of_v<From, std::remove_pointer_t<To>>, "Target type not derived from source type."); - #if !defined(__GNUC__) || defined(__GXX_RTTI) - assert(from == nullptr || dynamic_cast<To>(from) != nullptr); - #endif - return static_cast<To>(from); - } - - template <typename To, typename From> - To downCast(From& from) { - static_assert(std::is_lvalue_reference_v<To>, "Target type not a lvalue reference."); - static_assert(std::is_base_of_v<From, std::remove_reference_t<To>>, "Target type not derived from source type."); - #if !defined(__GNUC__) || defined(__GXX_RTTI) - assert(dynamic_cast<std::add_pointer_t<std::remove_reference_t<To>>>(std::addressof(from)) != nullptr); - #endif - return static_cast<To>(from); - } - -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h b/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h deleted file mode 100644 index 8e960676cf2..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -namespace antlr4 { - class ANTLRErrorListener; - class ANTLRErrorStrategy; - class ANTLRFileStream; - class ANTLRInputStream; - class BailErrorStrategy; - class BaseErrorListener; - class BufferedTokenStream; - class CharStream; - class CommonToken; - class CommonTokenFactory; - class CommonTokenStream; - class ConsoleErrorListener; - class DefaultErrorStrategy; - class DiagnosticErrorListener; - class EmptyStackException; - class FailedPredicateException; - class IllegalArgumentException; - class IllegalStateException; - class InputMismatchException; - class IntStream; - class InterpreterRuleContext; - class Lexer; - class LexerInterpreter; - class LexerNoViableAltException; - class ListTokenSource; - class NoSuchElementException; - class NoViableAltException; - class NullPointerException; - class ParseCancellationException; - class Parser; - class ParserInterpreter; - class ParserRuleContext; - class ProxyErrorListener; - class RecognitionException; - class Recognizer; - class RuleContext; - class Token; - template<typename Symbol> class TokenFactory; - class TokenSource; - class TokenStream; - class TokenStreamRewriter; - class UnbufferedCharStream; - class UnbufferedTokenStream; - class WritableToken; - - namespace misc { - class InterpreterDataReader; - class Interval; - class IntervalSet; - class MurmurHash; - class Utils; - class Predicate; - } - namespace atn { - class ATN; - class ATNConfig; - class ATNConfigSet; - class ATNDeserializationOptions; - class ATNDeserializer; - class ATNSerializer; - class ATNSimulator; - class ATNState; - enum class ATNType; - class ActionTransition; - class ArrayPredictionContext; - class AtomTransition; - class BasicBlockStartState; - class BasicState; - class BlockEndState; - class BlockStartState; - class DecisionState; - class EpsilonTransition; - class LL1Analyzer; - class LexerAction; - class LexerActionExecutor; - class LexerATNConfig; - class LexerATNSimulator; - class LexerMoreAction; - class LexerPopModeAction; - class LexerSkipAction; - class LookaheadEventInfo; - class LoopEndState; - class NotSetTransition; - class OrderedATNConfigSet; - class ParseInfo; - class ParserATNSimulator; - class PlusBlockStartState; - class PlusLoopbackState; - class PrecedencePredicateTransition; - class PredicateTransition; - class PredictionContext; - enum class PredictionMode; - class PredictionModeClass; - class RangeTransition; - class RuleStartState; - class RuleStopState; - class RuleTransition; - class SemanticContext; - class SetTransition; - class SingletonPredictionContext; - class StarBlockStartState; - class StarLoopEntryState; - class StarLoopbackState; - class TokensStartState; - class Transition; - class WildcardTransition; - } - namespace dfa { - class DFA; - class DFASerializer; - class DFAState; - class LexerDFASerializer; - class Vocabulary; - } - namespace tree { - class AbstractParseTreeVisitor; - class ErrorNode; - class ErrorNodeImpl; - class ParseTree; - class ParseTreeListener; - template<typename T> class ParseTreeProperty; - class ParseTreeVisitor; - class ParseTreeWalker; - class SyntaxTree; - class TerminalNode; - class TerminalNodeImpl; - class Tree; - class Trees; - - namespace pattern { - class Chunk; - class ParseTreeMatch; - class ParseTreePattern; - class ParseTreePatternMatcher; - class RuleTagToken; - class TagChunk; - class TextChunk; - class TokenTagToken; - } - - namespace xpath { - class XPath; - class XPathElement; - class XPathLexerErrorListener; - class XPathRuleAnywhereElement; - class XPathRuleElement; - class XPathTokenAnywhereElement; - class XPathTokenElement; - class XPathWildcardAnywhereElement; - class XPathWildcardElement; - } - } -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp deleted file mode 100644 index 9ee274c8de4..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#include "support/StringUtils.h" - -namespace antlrcpp { - - std::string escapeWhitespace(std::string_view in) { - std::string out; - escapeWhitespace(out, in); - out.shrink_to_fit(); - return out; - } - - std::string& escapeWhitespace(std::string& out, std::string_view in) { - out.reserve(in.size()); // Best case, no escaping. - for (const auto &c : in) { - switch (c) { - case '\t': - out.append("\\t"); - break; - case '\r': - out.append("\\r"); - break; - case '\n': - out.append("\\n"); - break; - default: - out.push_back(c); - break; - } - } - return out; - } - -} // namespace antrlcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h deleted file mode 100644 index aee0d46d6e7..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string_view in); - - ANTLR4CPP_PUBLIC std::string& escapeWhitespace(std::string& out, std::string_view in); - -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h b/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h deleted file mode 100644 index f0f84375add..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2021 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include "antlr4-common.h" - -namespace antlrcpp { - - class ANTLR4CPP_PUBLIC Unicode final { - public: - static constexpr char32_t REPLACEMENT_CHARACTER = 0xfffd; - - static constexpr bool isValid(char32_t codePoint) { - return codePoint < 0xd800 || (codePoint > 0xdfff && codePoint <= 0x10ffff); - } - - private: - Unicode() = delete; - Unicode(const Unicode&) = delete; - Unicode(Unicode&&) = delete; - Unicode& operator=(const Unicode&) = delete; - Unicode& operator=(Unicode&&) = delete; - }; - -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp deleted file mode 100644 index 294e9f1b215..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2021 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#include <cassert> -#include <cstdint> - -#include "support/Utf8.h" -#include "support/Unicode.h" - -// The below implementation is based off of https://github.com/google/cel-cpp/internal/utf8.cc, -// which is itself based off of https://go.googlesource.com/go/+/refs/heads/master/src/unicode/utf8/utf8.go. -// If for some reason you feel the need to copy this implementation, please retain a comment -// referencing the two source files and giving credit, as well as maintaining any and all -// obligations required by the BSD 3-clause license that governs this file. - -namespace antlrcpp { - -namespace { - -#undef SELF - constexpr uint8_t SELF = 0x80; - -#undef LOW - constexpr uint8_t LOW = 0x80; -#undef HIGH - constexpr uint8_t HIGH = 0xbf; - -#undef MASKX - constexpr uint8_t MASKX = 0x3f; -#undef MASK2 - constexpr uint8_t MASK2 = 0x1f; -#undef MASK3 - constexpr uint8_t MASK3 = 0xf; -#undef MASK4 - constexpr uint8_t MASK4 = 0x7; - -#undef TX - constexpr uint8_t TX = 0x80; -#undef T2 - constexpr uint8_t T2 = 0xc0; -#undef T3 - constexpr uint8_t T3 = 0xe0; -#undef T4 - constexpr uint8_t T4 = 0xf0; - -#undef XX - constexpr uint8_t XX = 0xf1; -#undef AS - constexpr uint8_t AS = 0xf0; -#undef S1 - constexpr uint8_t S1 = 0x02; -#undef S2 - constexpr uint8_t S2 = 0x13; -#undef S3 - constexpr uint8_t S3 = 0x03; -#undef S4 - constexpr uint8_t S4 = 0x23; -#undef S5 - constexpr uint8_t S5 = 0x34; -#undef S6 - constexpr uint8_t S6 = 0x04; -#undef S7 - constexpr uint8_t S7 = 0x44; - - // NOLINTBEGIN - // clang-format off -#undef LEADING - constexpr uint8_t LEADING[256] = { - // 1 2 3 4 5 6 7 8 9 A B C D E F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F - AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F - // 1 2 3 4 5 6 7 8 9 A B C D E F - XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F - XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F - XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF - XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF - XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF - S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF - S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF - S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF - }; - // clang-format on - // NOLINTEND - -#undef ACCEPT - constexpr std::pair<uint8_t, uint8_t> ACCEPT[16] = { - {LOW, HIGH}, {0xa0, HIGH}, {LOW, 0x9f}, {0x90, HIGH}, - {LOW, 0x8f}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, - {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, - {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, - }; - -} // namespace - - std::pair<char32_t, size_t> Utf8::decode(std::string_view input) { - assert(!input.empty()); - const auto b = static_cast<uint8_t>(input.front()); - input.remove_prefix(1); - if (b < SELF) { - return {static_cast<char32_t>(b), 1}; - } - const auto leading = LEADING[b]; - if (leading == XX) { - return {Unicode::REPLACEMENT_CHARACTER, 1}; - } - auto size = static_cast<size_t>(leading & 7) - 1; - if (size > input.size()) { - return {Unicode::REPLACEMENT_CHARACTER, 1}; - } - const auto& accept = ACCEPT[leading >> 4]; - const auto b1 = static_cast<uint8_t>(input.front()); - input.remove_prefix(1); - if (b1 < accept.first || b1 > accept.second) { - return {Unicode::REPLACEMENT_CHARACTER, 1}; - } - if (size <= 1) { - return {(static_cast<char32_t>(b & MASK2) << 6) | - static_cast<char32_t>(b1 & MASKX), - 2}; - } - const auto b2 = static_cast<uint8_t>(input.front()); - input.remove_prefix(1); - if (b2 < LOW || b2 > HIGH) { - return {Unicode::REPLACEMENT_CHARACTER, 1}; - } - if (size <= 2) { - return {(static_cast<char32_t>(b & MASK3) << 12) | - (static_cast<char32_t>(b1 & MASKX) << 6) | - static_cast<char32_t>(b2 & MASKX), - 3}; - } - const auto b3 = static_cast<uint8_t>(input.front()); - input.remove_prefix(1); - if (b3 < LOW || b3 > HIGH) { - return {Unicode::REPLACEMENT_CHARACTER, 1}; - } - return {(static_cast<char32_t>(b & MASK4) << 18) | - (static_cast<char32_t>(b1 & MASKX) << 12) | - (static_cast<char32_t>(b2 & MASKX) << 6) | - static_cast<char32_t>(b3 & MASKX), - 4}; - } - - std::optional<std::u32string> Utf8::strictDecode(std::string_view input) { - std::u32string output; - char32_t codePoint; - size_t codeUnits; - output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. - for (size_t index = 0; index < input.size(); index += codeUnits) { - std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); - if (codePoint == Unicode::REPLACEMENT_CHARACTER && codeUnits == 1) { - // Condition is only met when an illegal byte sequence is encountered. See Utf8::decode. - return std::nullopt; - } - output.push_back(codePoint); - } - output.shrink_to_fit(); - return output; - } - - std::u32string Utf8::lenientDecode(std::string_view input) { - std::u32string output; - char32_t codePoint; - size_t codeUnits; - output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. - for (size_t index = 0; index < input.size(); index += codeUnits) { - std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); - output.push_back(codePoint); - } - output.shrink_to_fit(); - return output; - } - - std::string& Utf8::encode(std::string* buffer, char32_t codePoint) { - assert(buffer != nullptr); - if (!Unicode::isValid(codePoint)) { - codePoint = Unicode::REPLACEMENT_CHARACTER; - } - if (codePoint <= 0x7f) { - buffer->push_back(static_cast<char>(static_cast<uint8_t>(codePoint))); - } else if (codePoint <= 0x7ff) { - buffer->push_back( - static_cast<char>(T2 | static_cast<uint8_t>(codePoint >> 6))); - buffer->push_back( - static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); - } else if (codePoint <= 0xffff) { - buffer->push_back( - static_cast<char>(T3 | static_cast<uint8_t>(codePoint >> 12))); - buffer->push_back(static_cast<char>( - TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); - buffer->push_back( - static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); - } else { - buffer->push_back( - static_cast<char>(T4 | static_cast<uint8_t>(codePoint >> 18))); - buffer->push_back(static_cast<char>( - TX | (static_cast<uint8_t>(codePoint >> 12) & MASKX))); - buffer->push_back(static_cast<char>( - TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); - buffer->push_back( - static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); - } - return *buffer; - } - - std::optional<std::string> Utf8::strictEncode(std::u32string_view input) { - std::string output; - output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. - for (size_t index = 0; index < input.size(); index++) { - char32_t codePoint = input[index]; - if (!Unicode::isValid(codePoint)) { - return std::nullopt; - } - Utf8::encode(&output, codePoint); - } - output.shrink_to_fit(); - return output; - } - - std::string Utf8::lenientEncode(std::u32string_view input) { - std::string output; - output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. - for (size_t index = 0; index < input.size(); index++) { - char32_t codePoint = input[index]; - if (!Unicode::isValid(codePoint)) { - codePoint = Unicode::REPLACEMENT_CHARACTER; - } - Utf8::encode(&output, codePoint); - } - output.shrink_to_fit(); - return output; - } - -} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h deleted file mode 100644 index e4828441cdc..00000000000 --- a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2021 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -#pragma once - -#include <optional> -#include <string> -#include <string_view> -#include <tuple> - -#include "antlr4-common.h" - -namespace antlrcpp { - - class ANTLR4CPP_PUBLIC Utf8 final { - public: - // Decodes the next code point, returning the decoded code point and the number - // of code units (a.k.a. bytes) consumed. In the event that an invalid code unit - // sequence is returned the replacement character, U+FFFD, is returned with a - // code unit count of 1. As U+FFFD requires 3 code units when encoded, this can - // be used to differentiate valid input from malformed input. - static std::pair<char32_t, size_t> decode(std::string_view input); - - // Decodes the given UTF-8 encoded input into a string of code points. - static std::optional<std::u32string> strictDecode(std::string_view input); - - // Decodes the given UTF-8 encoded input into a string of code points. Unlike strictDecode(), - // each byte in an illegal byte sequence is replaced with the Unicode replacement character, - // U+FFFD. - static std::u32string lenientDecode(std::string_view input); - - // Encodes the given code point and appends it to the buffer. If the code point - // is an unpaired surrogate or outside of the valid Unicode range it is replaced - // with the replacement character, U+FFFD. - static std::string& encode(std::string *buffer, char32_t codePoint); - - // Encodes the given Unicode code point string as UTF-8. - static std::optional<std::string> strictEncode(std::u32string_view input); - - // Encodes the given Unicode code point string as UTF-8. Unlike strictEncode(), - // each invalid Unicode code point is replaced with the Unicode replacement character, U+FFFD. - static std::string lenientEncode(std::u32string_view input); - - private: - Utf8() = delete; - Utf8(const Utf8&) = delete; - Utf8(Utf8&&) = delete; - Utf8& operator=(const Utf8&) = delete; - Utf8& operator=(Utf8&&) = delete; - }; - -} |
