diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h')
-rw-r--r-- | contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h new file mode 100644 index 0000000000..e4828441cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <optional> +#include <string> +#include <string_view> +#include <tuple> + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Utf8 final { + public: + // Decodes the next code point, returning the decoded code point and the number + // of code units (a.k.a. bytes) consumed. In the event that an invalid code unit + // sequence is returned the replacement character, U+FFFD, is returned with a + // code unit count of 1. As U+FFFD requires 3 code units when encoded, this can + // be used to differentiate valid input from malformed input. + static std::pair<char32_t, size_t> decode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. + static std::optional<std::u32string> strictDecode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. Unlike strictDecode(), + // each byte in an illegal byte sequence is replaced with the Unicode replacement character, + // U+FFFD. + static std::u32string lenientDecode(std::string_view input); + + // Encodes the given code point and appends it to the buffer. If the code point + // is an unpaired surrogate or outside of the valid Unicode range it is replaced + // with the replacement character, U+FFFD. + static std::string& encode(std::string *buffer, char32_t codePoint); + + // Encodes the given Unicode code point string as UTF-8. + static std::optional<std::string> strictEncode(std::u32string_view input); + + // Encodes the given Unicode code point string as UTF-8. Unlike strictEncode(), + // each invalid Unicode code point is replaced with the Unicode replacement character, U+FFFD. + static std::string lenientEncode(std::u32string_view input); + + private: + Utf8() = delete; + Utf8(const Utf8&) = delete; + Utf8(Utf8&&) = delete; + Utf8& operator=(const Utf8&) = delete; + Utf8& operator=(Utf8&&) = delete; + }; + +} |