diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/yaml-cpp/src/emitterutils.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/yaml-cpp/src/emitterutils.cpp')
-rw-r--r-- | contrib/libs/yaml-cpp/src/emitterutils.cpp | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/contrib/libs/yaml-cpp/src/emitterutils.cpp b/contrib/libs/yaml-cpp/src/emitterutils.cpp new file mode 100644 index 0000000000..147738ad8a --- /dev/null +++ b/contrib/libs/yaml-cpp/src/emitterutils.cpp @@ -0,0 +1,483 @@ +#include <iomanip> +#include <sstream> + +#include "emitterutils.h" +#include "exp.h" +#include "indentation.h" +#include "regex_yaml.h" +#include "regeximpl.h" +#include "stringsource.h" +#include "yaml-cpp/binary.h" // IWYU pragma: keep +#include "yaml-cpp/ostream_wrapper.h" +#include "yaml-cpp/null.h" + +namespace YAML { +namespace Utils { +namespace { +enum { REPLACEMENT_CHARACTER = 0xFFFD }; + +bool IsAnchorChar(int ch) { // test for ns-anchor-char + switch (ch) { + case ',': + case '[': + case ']': + case '{': + case '}': // c-flow-indicator + case ' ': + case '\t': // s-white + case 0xFEFF: // c-byte-order-mark + case 0xA: + case 0xD: // b-char + return false; + case 0x85: + return true; + } + + if (ch < 0x20) { + return false; + } + + if (ch < 0x7E) { + return true; + } + + if (ch < 0xA0) { + return false; + } + if (ch >= 0xD800 && ch <= 0xDFFF) { + return false; + } + if ((ch & 0xFFFE) == 0xFFFE) { + return false; + } + if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) { + return false; + } + if (ch > 0x10FFFF) { + return false; + } + + return true; +} + +int Utf8BytesIndicated(char ch) { + int byteVal = static_cast<unsigned char>(ch); + switch (byteVal >> 4) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + return 1; + case 12: + case 13: + return 2; + case 14: + return 3; + case 15: + return 4; + default: + return -1; + } +} + +bool IsTrailingByte(char ch) { return (ch & 0xC0) == 0x80; } + +bool GetNextCodePointAndAdvance(int& codePoint, + std::string::const_iterator& first, + std::string::const_iterator last) { + if (first == last) + return false; + + int nBytes = Utf8BytesIndicated(*first); + if (nBytes < 1) { + // Bad lead byte + ++first; + codePoint = REPLACEMENT_CHARACTER; + return true; + } + + if (nBytes == 1) { + codePoint = *first++; + return true; + } + + // Gather bits from trailing bytes + codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes)); + ++first; + --nBytes; + for (; nBytes > 0; ++first, --nBytes) { + if ((first == last) || !IsTrailingByte(*first)) { + codePoint = REPLACEMENT_CHARACTER; + break; + } + codePoint <<= 6; + codePoint |= *first & 0x3F; + } + + // Check for illegal code points + if (codePoint > 0x10FFFF) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xD800 && codePoint <= 0xDFFF) + codePoint = REPLACEMENT_CHARACTER; + else if ((codePoint & 0xFFFE) == 0xFFFE) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF) + codePoint = REPLACEMENT_CHARACTER; + return true; +} + +void WriteCodePoint(ostream_wrapper& out, int codePoint) { + if (codePoint < 0 || codePoint > 0x10FFFF) { + codePoint = REPLACEMENT_CHARACTER; + } + if (codePoint < 0x7F) { + out << static_cast<char>(codePoint); + } else if (codePoint < 0x7FF) { + out << static_cast<char>(0xC0 | (codePoint >> 6)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } else if (codePoint < 0xFFFF) { + out << static_cast<char>(0xE0 | (codePoint >> 12)) + << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } else { + out << static_cast<char>(0xF0 | (codePoint >> 18)) + << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F)) + << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast<char>(0x80 | (codePoint & 0x3F)); + } +} + +bool IsValidPlainScalar(const std::string& str, FlowType::value flowType, + bool allowOnlyAscii) { + // check against null + if (IsNullString(str)) { + return false; + } + + // check the start + const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow() + : Exp::PlainScalar()); + if (!start.Matches(str)) { + return false; + } + + // and check the end for plain whitespace (which can't be faithfully kept in a + // plain scalar) + if (!str.empty() && *str.rbegin() == ' ') { + return false; + } + + // then check until something is disallowed + static const RegEx& disallowed_flow = + Exp::EndScalarInFlow() || (Exp::BlankOrBreak() + Exp::Comment()) || + Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() || + Exp::Tab(); + static const RegEx& disallowed_block = + Exp::EndScalar() || (Exp::BlankOrBreak() + Exp::Comment()) || + Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() || + Exp::Tab(); + const RegEx& disallowed = + flowType == FlowType::Flow ? disallowed_flow : disallowed_block; + + StringCharSource buffer(str.c_str(), str.size()); + while (buffer) { + if (disallowed.Matches(buffer)) { + return false; + } + if (allowOnlyAscii && (0x80 <= static_cast<unsigned char>(buffer[0]))) { + return false; + } + ++buffer; + } + + return true; +} + +bool IsValidSingleQuotedScalar(const std::string& str, bool escapeNonAscii) { + // TODO: check for non-printable characters? + for (std::size_t i = 0; i < str.size(); i++) { + if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) { + return false; + } + if (str[i] == '\n') { + return false; + } + } + return true; +} + +bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType, + bool escapeNonAscii) { + if (flowType == FlowType::Flow) { + return false; + } + + // TODO: check for non-printable characters? + for (std::size_t i = 0; i < str.size(); i++) { + if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) { + return false; + } + } + return true; +} + +void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) { + static const char hexDigits[] = "0123456789abcdef"; + + out << "\\"; + int digits = 8; + if (codePoint < 0xFF) { + out << "x"; + digits = 2; + } else if (codePoint < 0xFFFF) { + out << "u"; + digits = 4; + } else { + out << "U"; + digits = 8; + } + + // Write digits into the escape sequence + for (; digits > 0; --digits) + out << hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF]; +} + +bool WriteAliasName(ostream_wrapper& out, const std::string& str) { + int codePoint; + for (std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end());) { + if (!IsAnchorChar(codePoint)) { + return false; + } + + WriteCodePoint(out, codePoint); + } + return true; +} +} + +StringFormat::value ComputeStringFormat(const std::string& str, + EMITTER_MANIP strFormat, + FlowType::value flowType, + bool escapeNonAscii) { + switch (strFormat) { + case Auto: + if (IsValidPlainScalar(str, flowType, escapeNonAscii)) { + return StringFormat::Plain; + } + return StringFormat::DoubleQuoted; + case SingleQuoted: + if (IsValidSingleQuotedScalar(str, escapeNonAscii)) { + return StringFormat::SingleQuoted; + } + return StringFormat::DoubleQuoted; + case DoubleQuoted: + return StringFormat::DoubleQuoted; + case Literal: + if (IsValidLiteralScalar(str, flowType, escapeNonAscii)) { + return StringFormat::Literal; + } + return StringFormat::DoubleQuoted; + default: + break; + } + + return StringFormat::DoubleQuoted; +} + +bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) { + out << "'"; + int codePoint; + for (std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end());) { + if (codePoint == '\n') { + return false; // We can't handle a new line and the attendant indentation + // yet + } + + if (codePoint == '\'') { + out << "''"; + } else { + WriteCodePoint(out, codePoint); + } + } + out << "'"; + return true; +} + +bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str, + bool escapeNonAscii) { + out << "\""; + int codePoint; + for (std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end());) { + switch (codePoint) { + case '\"': + out << "\\\""; + break; + case '\\': + out << "\\\\"; + break; + case '\n': + out << "\\n"; + break; + case '\t': + out << "\\t"; + break; + case '\r': + out << "\\r"; + break; + case '\b': + out << "\\b"; + break; + default: + if (codePoint < 0x20 || + (codePoint >= 0x80 && + codePoint <= 0xA0)) { // Control characters and non-breaking space + WriteDoubleQuoteEscapeSequence(out, codePoint); + } else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be + // escaped (YAML 1.2, sec. 5.2) + WriteDoubleQuoteEscapeSequence(out, codePoint); + } else if (escapeNonAscii && codePoint > 0x7E) { + WriteDoubleQuoteEscapeSequence(out, codePoint); + } else { + WriteCodePoint(out, codePoint); + } + } + } + out << "\""; + return true; +} + +bool WriteLiteralString(ostream_wrapper& out, const std::string& str, + std::size_t indent) { + out << "|\n"; + out << IndentTo(indent); + int codePoint; + for (std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end());) { + if (codePoint == '\n') { + out << "\n" << IndentTo(indent); + } else { + WriteCodePoint(out, codePoint); + } + } + return true; +} + +bool WriteChar(ostream_wrapper& out, char ch) { + if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) { + out << ch; + } else if (ch == '\"') { + out << "\"\\\"\""; + } else if (ch == '\t') { + out << "\"\\t\""; + } else if (ch == '\n') { + out << "\"\\n\""; + } else if (ch == '\b') { + out << "\"\\b\""; + } else if (ch == '\\') { + out << "\"\\\\\""; + } else if ((0x20 <= ch && ch <= 0x7e) || ch == ' ') { + out << "\"" << ch << "\""; + } else { + out << "\""; + WriteDoubleQuoteEscapeSequence(out, ch); + out << "\""; + } + return true; +} + +bool WriteComment(ostream_wrapper& out, const std::string& str, + std::size_t postCommentIndent) { + const std::size_t curIndent = out.col(); + out << "#" << Indentation(postCommentIndent); + out.set_comment(); + int codePoint; + for (std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end());) { + if (codePoint == '\n') { + out << "\n" << IndentTo(curIndent) << "#" + << Indentation(postCommentIndent); + out.set_comment(); + } else { + WriteCodePoint(out, codePoint); + } + } + return true; +} + +bool WriteAlias(ostream_wrapper& out, const std::string& str) { + out << "*"; + return WriteAliasName(out, str); +} + +bool WriteAnchor(ostream_wrapper& out, const std::string& str) { + out << "&"; + return WriteAliasName(out, str); +} + +bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) { + out << (verbatim ? "!<" : "!"); + StringCharSource buffer(str.c_str(), str.size()); + const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag(); + while (buffer) { + int n = reValid.Match(buffer); + if (n <= 0) { + return false; + } + + while (--n >= 0) { + out << buffer[0]; + ++buffer; + } + } + if (verbatim) { + out << ">"; + } + return true; +} + +bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix, + const std::string& tag) { + out << "!"; + StringCharSource prefixBuffer(prefix.c_str(), prefix.size()); + while (prefixBuffer) { + int n = Exp::URI().Match(prefixBuffer); + if (n <= 0) { + return false; + } + + while (--n >= 0) { + out << prefixBuffer[0]; + ++prefixBuffer; + } + } + + out << "!"; + StringCharSource tagBuffer(tag.c_str(), tag.size()); + while (tagBuffer) { + int n = Exp::Tag().Match(tagBuffer); + if (n <= 0) { + return false; + } + + while (--n >= 0) { + out << tagBuffer[0]; + ++tagBuffer; + } + } + return true; +} + +bool WriteBinary(ostream_wrapper& out, const Binary& binary) { + WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), + false); + return true; +} +} +} |