diff options
author | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
---|---|---|
committer | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
commit | 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch) | |
tree | bffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/string_utils | |
parent | 332b99e2173f0425444abb759eebcb2fafaa9209 (diff) | |
download | ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz |
validate canons without yatest_common
Diffstat (limited to 'library/cpp/string_utils')
-rw-r--r-- | library/cpp/string_utils/secret_string/secret_string.cpp | 68 | ||||
-rw-r--r-- | library/cpp/string_utils/secret_string/secret_string.h | 74 | ||||
-rw-r--r-- | library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp | 147 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/builder.cpp | 1 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/builder.h | 67 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/escape.cpp | 112 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/escape.h | 10 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/tskv_map.cpp | 60 | ||||
-rw-r--r-- | library/cpp/string_utils/tskv_format/tskv_map.h | 62 |
9 files changed, 601 insertions, 0 deletions
diff --git a/library/cpp/string_utils/secret_string/secret_string.cpp b/library/cpp/string_utils/secret_string/secret_string.cpp new file mode 100644 index 0000000000..3b68d3cd27 --- /dev/null +++ b/library/cpp/string_utils/secret_string/secret_string.cpp @@ -0,0 +1,68 @@ +#include "secret_string.h" + +#include <util/system/madvise.h> + +namespace NSecretString { + TSecretString::TSecretString(TStringBuf value) { + Init(value); + } + + TSecretString::~TSecretString() { + try { + Clear(); + } catch (...) { + } + } + + TSecretString& TSecretString::operator=(const TSecretString& o) { + if (&o == this) { + return *this; + } + + Init(o.Value_); + + return *this; + } + + /** + * It is not honest "move". Actually it is copy-assignment with cleaning of other instance. + * This way allowes to avoid side effects of string optimizations: + * Copy-On-Write or Short-String-Optimization + */ + TSecretString& TSecretString::operator=(TSecretString&& o) { + if (&o == this) { + return *this; + } + + Init(o.Value_); + o.Clear(); + + return *this; + } + + TSecretString& TSecretString::operator=(const TStringBuf o) { + Init(o); + + return *this; + } + + void TSecretString::Init(TStringBuf value) { + Clear(); + if (value.empty()) { + return; + } + + Value_ = value; + MadviseExcludeFromCoreDump(Value_); + } + + void TSecretString::Clear() { + if (Value_.empty()) { + return; + } + + SecureZero((void*)Value_.data(), Value_.size()); + MadviseIncludeIntoCoreDump(Value_); + Value_.clear(); + } +} diff --git a/library/cpp/string_utils/secret_string/secret_string.h b/library/cpp/string_utils/secret_string/secret_string.h new file mode 100644 index 0000000000..fdb9f6a85c --- /dev/null +++ b/library/cpp/string_utils/secret_string/secret_string.h @@ -0,0 +1,74 @@ +#pragma once + +#include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h> + +#include <util/generic/string.h> + +namespace NSecretString { + /** + * TSecretString allowes to store some long lived secrets in "secure" storage in memory. + * Common usage: + * 1) read secret value from disk/env/etc + * 2) put it into TSecretString + * 3) destory secret copy from 1) + * + * Useful scenerios for TSecretString: + * - in memory only tasks: using key to create crypto signature; + * - rare network cases: db password on connection or OAuth token in background tasks. + * These cases disclosure the secret + * because of sending it over network with some I/O frameworks. + * Usually such frameworks copy input params to provide network protocol: gRPC, for example. + * + * Supported features: + * 1. Exclude secret from core dump. + * madvise(MADV_DONTDUMP) in ctor excludes full memory page from core dump. + * madvise(MADV_DODUMP) in dtor reverts previous action. + * 2. Zero memory before free. + * + * Code dump looks like this: +(gdb) print s +$1 = (const TSecretString &) @0x7fff23c4c560: { + Value_ = {<TStringBase<TBasicString<char, std::__y1::char_traits<char> >, char, std::__y1::char_traits<char> >> = { + static npos = <optimized out>}, Data_ = 0x107c001d8 <error: Cannot access memory at address 0x107c001d8>}} + */ + + class TSecretString { + public: + TSecretString() = default; + TSecretString(TStringBuf value); + ~TSecretString(); + + TSecretString(const TSecretString& o) + : TSecretString(o.Value()) + { + } + + TSecretString(TSecretString&& o) + : TSecretString(o.Value()) + { + o.Clear(); + } + + TSecretString& operator=(const TSecretString& o); + TSecretString& operator=(TSecretString&& o); + + TSecretString& operator=(const TStringBuf o); + + operator TZtStringBuf() const { + return Value(); + } + + // Provides zero terminated string + TZtStringBuf Value() const { + return TZtStringBuf(Value_); + } + + private: + // TStringBuf breaks Copy-On-Write to provide correct copy-ctor and copy-assignment + void Init(TStringBuf value); + void Clear(); + + private: + TString Value_; + }; +} diff --git a/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp b/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp new file mode 100644 index 0000000000..681b75368f --- /dev/null +++ b/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp @@ -0,0 +1,147 @@ +#include <library/cpp/string_utils/secret_string/secret_string.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NSecretString; + +Y_UNIT_TEST_SUITE(SecretTest) { + Y_UNIT_TEST(Common) { + TSecretString s; + UNIT_ASSERT_VALUES_EQUAL("", s.Value()); + UNIT_ASSERT_VALUES_EQUAL("", (TStringBuf)s); + + TSecretString s2("qwerty"); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", (TStringBuf)s2); + } + + Y_UNIT_TEST(CopyCtor1) { + TSecretString s1("qwerty"); + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + + { + TSecretString s2(s1); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + } + + Y_UNIT_TEST(CopyCtor2) { + auto s1 = MakeHolder<TSecretString>("qwerty"); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + + TSecretString s2(*s1); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + s1.Reset(); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + Y_UNIT_TEST(MoveCtor1) { + TSecretString s1("qwerty"); + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + + { + TSecretString s2(std::move(s1)); + UNIT_ASSERT_VALUES_EQUAL("", s1.Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + UNIT_ASSERT_VALUES_EQUAL("", s1.Value()); + } + + Y_UNIT_TEST(MoveCtor2) { + auto s1 = MakeHolder<TSecretString>("qwerty"); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + + TSecretString s2(std::move(*s1)); + UNIT_ASSERT_VALUES_EQUAL("", s1->Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + s1.Reset(); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + Y_UNIT_TEST(CopyAssignment1) { + TSecretString s1("qwerty"); + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + + { + TSecretString s2; + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + + s2 = s1; + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + } + + Y_UNIT_TEST(CopyAssignment2) { + auto s1 = MakeHolder<TSecretString>("qwerty"); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + + TSecretString s2; + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + + s2 = *s1; + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + s1.Reset(); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + TSecretString s3; + s2 = s3; + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + } + + Y_UNIT_TEST(MoveAssignment1) { + TSecretString s1("qwerty"); + + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value()); + + { + TSecretString s2; + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + + s2 = std::move(s1); + UNIT_ASSERT_VALUES_EQUAL("", s1.Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + } + + UNIT_ASSERT_VALUES_EQUAL("", s1.Value()); + } + + Y_UNIT_TEST(MoveAssignment2) { + auto s1 = MakeHolder<TSecretString>("qwerty"); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value()); + + TSecretString s2; + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + + s2 = std::move(*s1); + UNIT_ASSERT_VALUES_EQUAL("", s1->Value()); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + s1.Reset(); + UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value()); + + TSecretString s3; + s2 = std::move(s3); + UNIT_ASSERT_VALUES_EQUAL("", s2.Value()); + } + + Y_UNIT_TEST(ZeroTerminated) { + TSecretString s("qwerty"); + + UNIT_ASSERT_VALUES_EQUAL(s.Value().size(), strlen(s.Value().data())); + } +} diff --git a/library/cpp/string_utils/tskv_format/builder.cpp b/library/cpp/string_utils/tskv_format/builder.cpp new file mode 100644 index 0000000000..ede9074022 --- /dev/null +++ b/library/cpp/string_utils/tskv_format/builder.cpp @@ -0,0 +1 @@ +#include "builder.h" diff --git a/library/cpp/string_utils/tskv_format/builder.h b/library/cpp/string_utils/tskv_format/builder.h new file mode 100644 index 0000000000..40689ddc85 --- /dev/null +++ b/library/cpp/string_utils/tskv_format/builder.h @@ -0,0 +1,67 @@ +#pragma once + +#include "escape.h" + +#include <util/stream/str.h> + +namespace NTskvFormat { + class TLogBuilder { + private: + TStringStream Out; + + public: + TLogBuilder() = default; + + TLogBuilder(TStringBuf logType, ui32 unixtime) { + Begin(logType, unixtime); + } + + TLogBuilder(TStringBuf logType) { + Begin(logType); + } + + TLogBuilder& Add(TStringBuf fieldName, TStringBuf fieldValue) { + if (!Out.Empty()) { + Out << '\t'; + } + Escape(fieldName, Out.Str()); + Out << '='; + Escape(fieldValue, Out.Str()); + + return *this; + } + + TLogBuilder& AddUnescaped(TStringBuf fieldName, TStringBuf fieldValue) { + if (!Out.Empty()) { + Out << '\t'; + } + Out << fieldName << '=' << fieldValue; + return *this; + } + + TLogBuilder& Begin(TStringBuf logType, ui32 unixtime) { + Out << "tskv\ttskv_format=" << logType << "\tunixtime=" << unixtime; + return *this; + } + + TLogBuilder& Begin(TStringBuf logType) { + Out << "tskv\ttskv_format=" << logType; + return *this; + } + + TLogBuilder& End() { + Out << '\n'; + return *this; + } + + TLogBuilder& Clear() { + Out.Clear(); + return *this; + } + + TString& Str() { + return Out.Str(); + } + }; + +} diff --git a/library/cpp/string_utils/tskv_format/escape.cpp b/library/cpp/string_utils/tskv_format/escape.cpp new file mode 100644 index 0000000000..3dc78bec8c --- /dev/null +++ b/library/cpp/string_utils/tskv_format/escape.cpp @@ -0,0 +1,112 @@ +#include <util/generic/yexception.h> +#include "escape.h" + +namespace NTskvFormat { + namespace { + const TStringBuf ESCAPE_CHARS("\t\n\r\\\0=\"", 7); + + TString& EscapeImpl(const char* src, size_t len, TString& dst) { + TStringBuf srcStr(src, len); + size_t noEscapeStart = 0; + + while (noEscapeStart < len) { + size_t noEscapeEnd = srcStr.find_first_of(ESCAPE_CHARS, noEscapeStart); + + if (noEscapeEnd == TStringBuf::npos) { + dst.append(src + noEscapeStart, len - noEscapeStart); + break; + } + + dst.append(src + noEscapeStart, noEscapeEnd - noEscapeStart); + + switch (src[noEscapeEnd]) { + case '\t': + dst.append(TStringBuf("\\t")); + break; + case '\n': + dst.append(TStringBuf("\\n")); + break; + case '\r': + dst.append(TStringBuf("\\r")); + break; + case '\0': + dst.append(TStringBuf("\\0")); + break; + case '\\': + dst.append(TStringBuf("\\\\")); + break; + case '=': + dst.append(TStringBuf("\\=")); + break; + case '"': + dst.append(TStringBuf("\\\"")); + break; + } + + noEscapeStart = noEscapeEnd + 1; + } + + return dst; + } + + TString& UnescapeImpl(const char* src, const size_t len, TString& dst) { + TStringBuf srcStr(src, len); + size_t noEscapeStart = 0; + + while (noEscapeStart < len) { + size_t noEscapeEnd = srcStr.find('\\', noEscapeStart); + + if (noEscapeEnd == TStringBuf::npos) { + dst.append(src + noEscapeStart, len - noEscapeStart); + break; + } + + dst.append(src + noEscapeStart, noEscapeEnd - noEscapeStart); + + if (noEscapeEnd + 1 >= len) { + throw yexception() << "expected (t|n|r|0|\\|=|\"|) after \\. Got end of line."; + } + + switch (src[noEscapeEnd + 1]) { + case 't': + dst.append('\t'); + break; + case 'n': + dst.append('\n'); + break; + case 'r': + dst.append('\r'); + break; + case '0': + dst.append('\0'); + break; + case '\\': + dst.append('\\'); + break; + case '=': + dst.append('='); + break; + case '"': + dst.append('"'); + break; + default: + throw yexception() << "unexpected symbol '" << src[noEscapeEnd + 1] << "' after \\"; + } + + noEscapeStart = noEscapeEnd + 2; + } + + return dst; + } + + } + + TString& Escape(const TStringBuf& src, TString& dst) { + return EscapeImpl(src.data(), src.size(), dst); + } + + TString& Unescape(const TStringBuf& src, TString& dst) { + return UnescapeImpl(src.data(), src.size(), dst); + } + +} diff --git a/library/cpp/string_utils/tskv_format/escape.h b/library/cpp/string_utils/tskv_format/escape.h new file mode 100644 index 0000000000..2e3dd02c98 --- /dev/null +++ b/library/cpp/string_utils/tskv_format/escape.h @@ -0,0 +1,10 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> + +namespace NTskvFormat { + TString& Escape(const TStringBuf& src, TString& dst); + TString& Unescape(const TStringBuf& src, TString& dst); + +} diff --git a/library/cpp/string_utils/tskv_format/tskv_map.cpp b/library/cpp/string_utils/tskv_format/tskv_map.cpp new file mode 100644 index 0000000000..99e5f19731 --- /dev/null +++ b/library/cpp/string_utils/tskv_format/tskv_map.cpp @@ -0,0 +1,60 @@ +#include "tskv_map.h" + +namespace { + void Split(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, bool& keyHasEscapes) { + size_t delimiter = 0; + keyHasEscapes = false; + for (delimiter = 0; delimiter < kv.size() && kv[delimiter] != '='; ++delimiter) { + if (kv[delimiter] == '\\') { + ++delimiter; + keyHasEscapes = true; + } + } + + if (delimiter < kv.size()) { + key = kv.Head(delimiter); + value = kv.Tail(delimiter + 1); + } else { + throw yexception() << "Incorrect tskv format"; + } + } + + TStringBuf DeserializeTokenToBuffer(const TStringBuf& token, TString& buffer) { + size_t tokenStart = buffer.size(); + NTskvFormat::Unescape(token, buffer); + return TStringBuf(buffer).Tail(tokenStart); + } + + void DeserializeTokenToString(const TStringBuf& token, TString& result, bool unescape) { + if (unescape) { + result.clear(); + NTskvFormat::Unescape(token, result); + } else { + result = token; + } + + } +} + +void NTskvFormat::NDetail::DeserializeKvToStringBufs(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, TString& buffer, bool unescape) { + bool keyHasEscapes = false; + Split(kv, key, value, keyHasEscapes); + if (unescape) { + if (keyHasEscapes) { + key = DeserializeTokenToBuffer(key, buffer); + } + if (value.Contains('\\')) { + value = DeserializeTokenToBuffer(value, buffer); + } + } +} + +void NTskvFormat::NDetail::DeserializeKvToStrings(const TStringBuf& kv, TString& key, TString& value, bool unescape) { + TStringBuf keyBuf, valueBuf; + bool keyHasEscapes = false; + Split(kv, keyBuf, valueBuf, keyHasEscapes); + + Y_UNUSED(keyHasEscapes); + DeserializeTokenToString(keyBuf, key, unescape); + DeserializeTokenToString(valueBuf, value, unescape); +} diff --git a/library/cpp/string_utils/tskv_format/tskv_map.h b/library/cpp/string_utils/tskv_format/tskv_map.h new file mode 100644 index 0000000000..4f4978fcf5 --- /dev/null +++ b/library/cpp/string_utils/tskv_format/tskv_map.h @@ -0,0 +1,62 @@ +#pragma once + +#include "escape.h" +#include <util/string/cast.h> +#include <util/string/split.h> + +namespace NTskvFormat { + namespace NDetail { + void DeserializeKvToStringBufs(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, TString& buffer, bool unescape); + void DeserializeKvToStrings(const TStringBuf& kv, TString& key, TString& value, bool unescape); + } + + template <typename T> + TString& SerializeMap(const T& data, TString& result) { + result.clear(); + for (const auto& kv : data) { + if (result.size() > 0) { + result.push_back('\t'); + } + Escape(ToString(kv.first), result); + result.push_back('='); + Escape(ToString(kv.second), result); + } + return result; + } + + /** + * Deserializing to TStringBuf is faster, just remember that `data' + * must not be invalidated while `result' is still in use. + */ + template <typename T> + void DeserializeMap(const TStringBuf& data, T& result, TString& buffer, bool unescape = true) { + result.clear(); + buffer.clear(); + buffer.reserve(data.size()); + TStringBuf key, value; + + StringSplitter(data.begin(), data.end()).Split('\t').Consume([&](const TStringBuf kv){ + NDetail::DeserializeKvToStringBufs(kv, key, value, buffer, unescape); + result[key] = value; + }); + + Y_ASSERT(buffer.size() <= data.size()); + } + + template <typename T> + void DeserializeMap(const TStringBuf& data, T& result, bool unescape = true) { + if constexpr(std::is_same<typename T::key_type, TStringBuf>::value || + std::is_same<typename T::mapped_type, TStringBuf>::value) + { + DeserializeMap(data, result, result.DeserializeBuffer, unescape); // we can't unescape values w/o buffer + return; + } + result.clear(); + TString key, value; + + StringSplitter(data.begin(), data.end()).Split('\t').Consume([&](const TStringBuf kv){ + NDetail::DeserializeKvToStrings(kv, key, value, unescape); + result[key] = value; + }); + } +} |