aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils
diff options
context:
space:
mode:
authorqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
committerqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
commit22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
treebffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/string_utils
parent332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
downloadydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
validate canons without yatest_common
Diffstat (limited to 'library/cpp/string_utils')
-rw-r--r--library/cpp/string_utils/secret_string/secret_string.cpp68
-rw-r--r--library/cpp/string_utils/secret_string/secret_string.h74
-rw-r--r--library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp147
-rw-r--r--library/cpp/string_utils/tskv_format/builder.cpp1
-rw-r--r--library/cpp/string_utils/tskv_format/builder.h67
-rw-r--r--library/cpp/string_utils/tskv_format/escape.cpp112
-rw-r--r--library/cpp/string_utils/tskv_format/escape.h10
-rw-r--r--library/cpp/string_utils/tskv_format/tskv_map.cpp60
-rw-r--r--library/cpp/string_utils/tskv_format/tskv_map.h62
9 files changed, 601 insertions, 0 deletions
diff --git a/library/cpp/string_utils/secret_string/secret_string.cpp b/library/cpp/string_utils/secret_string/secret_string.cpp
new file mode 100644
index 0000000000..3b68d3cd27
--- /dev/null
+++ b/library/cpp/string_utils/secret_string/secret_string.cpp
@@ -0,0 +1,68 @@
+#include "secret_string.h"
+
+#include <util/system/madvise.h>
+
+namespace NSecretString {
+ TSecretString::TSecretString(TStringBuf value) {
+ Init(value);
+ }
+
+ TSecretString::~TSecretString() {
+ try {
+ Clear();
+ } catch (...) {
+ }
+ }
+
+ TSecretString& TSecretString::operator=(const TSecretString& o) {
+ if (&o == this) {
+ return *this;
+ }
+
+ Init(o.Value_);
+
+ return *this;
+ }
+
+ /**
+ * It is not honest "move". Actually it is copy-assignment with cleaning of other instance.
+ * This way allowes to avoid side effects of string optimizations:
+ * Copy-On-Write or Short-String-Optimization
+ */
+ TSecretString& TSecretString::operator=(TSecretString&& o) {
+ if (&o == this) {
+ return *this;
+ }
+
+ Init(o.Value_);
+ o.Clear();
+
+ return *this;
+ }
+
+ TSecretString& TSecretString::operator=(const TStringBuf o) {
+ Init(o);
+
+ return *this;
+ }
+
+ void TSecretString::Init(TStringBuf value) {
+ Clear();
+ if (value.empty()) {
+ return;
+ }
+
+ Value_ = value;
+ MadviseExcludeFromCoreDump(Value_);
+ }
+
+ void TSecretString::Clear() {
+ if (Value_.empty()) {
+ return;
+ }
+
+ SecureZero((void*)Value_.data(), Value_.size());
+ MadviseIncludeIntoCoreDump(Value_);
+ Value_.clear();
+ }
+}
diff --git a/library/cpp/string_utils/secret_string/secret_string.h b/library/cpp/string_utils/secret_string/secret_string.h
new file mode 100644
index 0000000000..fdb9f6a85c
--- /dev/null
+++ b/library/cpp/string_utils/secret_string/secret_string.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h>
+
+#include <util/generic/string.h>
+
+namespace NSecretString {
+ /**
+ * TSecretString allowes to store some long lived secrets in "secure" storage in memory.
+ * Common usage:
+ * 1) read secret value from disk/env/etc
+ * 2) put it into TSecretString
+ * 3) destory secret copy from 1)
+ *
+ * Useful scenerios for TSecretString:
+ * - in memory only tasks: using key to create crypto signature;
+ * - rare network cases: db password on connection or OAuth token in background tasks.
+ * These cases disclosure the secret
+ * because of sending it over network with some I/O frameworks.
+ * Usually such frameworks copy input params to provide network protocol: gRPC, for example.
+ *
+ * Supported features:
+ * 1. Exclude secret from core dump.
+ * madvise(MADV_DONTDUMP) in ctor excludes full memory page from core dump.
+ * madvise(MADV_DODUMP) in dtor reverts previous action.
+ * 2. Zero memory before free.
+ *
+ * Code dump looks like this:
+(gdb) print s
+$1 = (const TSecretString &) @0x7fff23c4c560: {
+ Value_ = {<TStringBase<TBasicString<char, std::__y1::char_traits<char> >, char, std::__y1::char_traits<char> >> = {
+ static npos = <optimized out>}, Data_ = 0x107c001d8 <error: Cannot access memory at address 0x107c001d8>}}
+ */
+
+ class TSecretString {
+ public:
+ TSecretString() = default;
+ TSecretString(TStringBuf value);
+ ~TSecretString();
+
+ TSecretString(const TSecretString& o)
+ : TSecretString(o.Value())
+ {
+ }
+
+ TSecretString(TSecretString&& o)
+ : TSecretString(o.Value())
+ {
+ o.Clear();
+ }
+
+ TSecretString& operator=(const TSecretString& o);
+ TSecretString& operator=(TSecretString&& o);
+
+ TSecretString& operator=(const TStringBuf o);
+
+ operator TZtStringBuf() const {
+ return Value();
+ }
+
+ // Provides zero terminated string
+ TZtStringBuf Value() const {
+ return TZtStringBuf(Value_);
+ }
+
+ private:
+ // TStringBuf breaks Copy-On-Write to provide correct copy-ctor and copy-assignment
+ void Init(TStringBuf value);
+ void Clear();
+
+ private:
+ TString Value_;
+ };
+}
diff --git a/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp b/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp
new file mode 100644
index 0000000000..681b75368f
--- /dev/null
+++ b/library/cpp/string_utils/secret_string/ut/secret_string_ut.cpp
@@ -0,0 +1,147 @@
+#include <library/cpp/string_utils/secret_string/secret_string.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NSecretString;
+
+Y_UNIT_TEST_SUITE(SecretTest) {
+ Y_UNIT_TEST(Common) {
+ TSecretString s;
+ UNIT_ASSERT_VALUES_EQUAL("", s.Value());
+ UNIT_ASSERT_VALUES_EQUAL("", (TStringBuf)s);
+
+ TSecretString s2("qwerty");
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", (TStringBuf)s2);
+ }
+
+ Y_UNIT_TEST(CopyCtor1) {
+ TSecretString s1("qwerty");
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+
+ {
+ TSecretString s2(s1);
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+ }
+
+ Y_UNIT_TEST(CopyCtor2) {
+ auto s1 = MakeHolder<TSecretString>("qwerty");
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+
+ TSecretString s2(*s1);
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ s1.Reset();
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ Y_UNIT_TEST(MoveCtor1) {
+ TSecretString s1("qwerty");
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+
+ {
+ TSecretString s2(std::move(s1));
+ UNIT_ASSERT_VALUES_EQUAL("", s1.Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("", s1.Value());
+ }
+
+ Y_UNIT_TEST(MoveCtor2) {
+ auto s1 = MakeHolder<TSecretString>("qwerty");
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+
+ TSecretString s2(std::move(*s1));
+ UNIT_ASSERT_VALUES_EQUAL("", s1->Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ s1.Reset();
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ Y_UNIT_TEST(CopyAssignment1) {
+ TSecretString s1("qwerty");
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+
+ {
+ TSecretString s2;
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+
+ s2 = s1;
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+ }
+
+ Y_UNIT_TEST(CopyAssignment2) {
+ auto s1 = MakeHolder<TSecretString>("qwerty");
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+
+ TSecretString s2;
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+
+ s2 = *s1;
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ s1.Reset();
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ TSecretString s3;
+ s2 = s3;
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+ }
+
+ Y_UNIT_TEST(MoveAssignment1) {
+ TSecretString s1("qwerty");
+
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1.Value());
+
+ {
+ TSecretString s2;
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+
+ s2 = std::move(s1);
+ UNIT_ASSERT_VALUES_EQUAL("", s1.Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("", s1.Value());
+ }
+
+ Y_UNIT_TEST(MoveAssignment2) {
+ auto s1 = MakeHolder<TSecretString>("qwerty");
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s1->Value());
+
+ TSecretString s2;
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+
+ s2 = std::move(*s1);
+ UNIT_ASSERT_VALUES_EQUAL("", s1->Value());
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ s1.Reset();
+ UNIT_ASSERT_VALUES_EQUAL("qwerty", s2.Value());
+
+ TSecretString s3;
+ s2 = std::move(s3);
+ UNIT_ASSERT_VALUES_EQUAL("", s2.Value());
+ }
+
+ Y_UNIT_TEST(ZeroTerminated) {
+ TSecretString s("qwerty");
+
+ UNIT_ASSERT_VALUES_EQUAL(s.Value().size(), strlen(s.Value().data()));
+ }
+}
diff --git a/library/cpp/string_utils/tskv_format/builder.cpp b/library/cpp/string_utils/tskv_format/builder.cpp
new file mode 100644
index 0000000000..ede9074022
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/builder.cpp
@@ -0,0 +1 @@
+#include "builder.h"
diff --git a/library/cpp/string_utils/tskv_format/builder.h b/library/cpp/string_utils/tskv_format/builder.h
new file mode 100644
index 0000000000..40689ddc85
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/builder.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include "escape.h"
+
+#include <util/stream/str.h>
+
+namespace NTskvFormat {
+ class TLogBuilder {
+ private:
+ TStringStream Out;
+
+ public:
+ TLogBuilder() = default;
+
+ TLogBuilder(TStringBuf logType, ui32 unixtime) {
+ Begin(logType, unixtime);
+ }
+
+ TLogBuilder(TStringBuf logType) {
+ Begin(logType);
+ }
+
+ TLogBuilder& Add(TStringBuf fieldName, TStringBuf fieldValue) {
+ if (!Out.Empty()) {
+ Out << '\t';
+ }
+ Escape(fieldName, Out.Str());
+ Out << '=';
+ Escape(fieldValue, Out.Str());
+
+ return *this;
+ }
+
+ TLogBuilder& AddUnescaped(TStringBuf fieldName, TStringBuf fieldValue) {
+ if (!Out.Empty()) {
+ Out << '\t';
+ }
+ Out << fieldName << '=' << fieldValue;
+ return *this;
+ }
+
+ TLogBuilder& Begin(TStringBuf logType, ui32 unixtime) {
+ Out << "tskv\ttskv_format=" << logType << "\tunixtime=" << unixtime;
+ return *this;
+ }
+
+ TLogBuilder& Begin(TStringBuf logType) {
+ Out << "tskv\ttskv_format=" << logType;
+ return *this;
+ }
+
+ TLogBuilder& End() {
+ Out << '\n';
+ return *this;
+ }
+
+ TLogBuilder& Clear() {
+ Out.Clear();
+ return *this;
+ }
+
+ TString& Str() {
+ return Out.Str();
+ }
+ };
+
+}
diff --git a/library/cpp/string_utils/tskv_format/escape.cpp b/library/cpp/string_utils/tskv_format/escape.cpp
new file mode 100644
index 0000000000..3dc78bec8c
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/escape.cpp
@@ -0,0 +1,112 @@
+#include <util/generic/yexception.h>
+#include "escape.h"
+
+namespace NTskvFormat {
+ namespace {
+ const TStringBuf ESCAPE_CHARS("\t\n\r\\\0=\"", 7);
+
+ TString& EscapeImpl(const char* src, size_t len, TString& dst) {
+ TStringBuf srcStr(src, len);
+ size_t noEscapeStart = 0;
+
+ while (noEscapeStart < len) {
+ size_t noEscapeEnd = srcStr.find_first_of(ESCAPE_CHARS, noEscapeStart);
+
+ if (noEscapeEnd == TStringBuf::npos) {
+ dst.append(src + noEscapeStart, len - noEscapeStart);
+ break;
+ }
+
+ dst.append(src + noEscapeStart, noEscapeEnd - noEscapeStart);
+
+ switch (src[noEscapeEnd]) {
+ case '\t':
+ dst.append(TStringBuf("\\t"));
+ break;
+ case '\n':
+ dst.append(TStringBuf("\\n"));
+ break;
+ case '\r':
+ dst.append(TStringBuf("\\r"));
+ break;
+ case '\0':
+ dst.append(TStringBuf("\\0"));
+ break;
+ case '\\':
+ dst.append(TStringBuf("\\\\"));
+ break;
+ case '=':
+ dst.append(TStringBuf("\\="));
+ break;
+ case '"':
+ dst.append(TStringBuf("\\\""));
+ break;
+ }
+
+ noEscapeStart = noEscapeEnd + 1;
+ }
+
+ return dst;
+ }
+
+ TString& UnescapeImpl(const char* src, const size_t len, TString& dst) {
+ TStringBuf srcStr(src, len);
+ size_t noEscapeStart = 0;
+
+ while (noEscapeStart < len) {
+ size_t noEscapeEnd = srcStr.find('\\', noEscapeStart);
+
+ if (noEscapeEnd == TStringBuf::npos) {
+ dst.append(src + noEscapeStart, len - noEscapeStart);
+ break;
+ }
+
+ dst.append(src + noEscapeStart, noEscapeEnd - noEscapeStart);
+
+ if (noEscapeEnd + 1 >= len) {
+ throw yexception() << "expected (t|n|r|0|\\|=|\"|) after \\. Got end of line.";
+ }
+
+ switch (src[noEscapeEnd + 1]) {
+ case 't':
+ dst.append('\t');
+ break;
+ case 'n':
+ dst.append('\n');
+ break;
+ case 'r':
+ dst.append('\r');
+ break;
+ case '0':
+ dst.append('\0');
+ break;
+ case '\\':
+ dst.append('\\');
+ break;
+ case '=':
+ dst.append('=');
+ break;
+ case '"':
+ dst.append('"');
+ break;
+ default:
+ throw yexception() << "unexpected symbol '" << src[noEscapeEnd + 1] << "' after \\";
+ }
+
+ noEscapeStart = noEscapeEnd + 2;
+ }
+
+ return dst;
+ }
+
+ }
+
+ TString& Escape(const TStringBuf& src, TString& dst) {
+ return EscapeImpl(src.data(), src.size(), dst);
+ }
+
+ TString& Unescape(const TStringBuf& src, TString& dst) {
+ return UnescapeImpl(src.data(), src.size(), dst);
+ }
+
+}
diff --git a/library/cpp/string_utils/tskv_format/escape.h b/library/cpp/string_utils/tskv_format/escape.h
new file mode 100644
index 0000000000..2e3dd02c98
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/escape.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+namespace NTskvFormat {
+ TString& Escape(const TStringBuf& src, TString& dst);
+ TString& Unescape(const TStringBuf& src, TString& dst);
+
+}
diff --git a/library/cpp/string_utils/tskv_format/tskv_map.cpp b/library/cpp/string_utils/tskv_format/tskv_map.cpp
new file mode 100644
index 0000000000..99e5f19731
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/tskv_map.cpp
@@ -0,0 +1,60 @@
+#include "tskv_map.h"
+
+namespace {
+ void Split(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, bool& keyHasEscapes) {
+ size_t delimiter = 0;
+ keyHasEscapes = false;
+ for (delimiter = 0; delimiter < kv.size() && kv[delimiter] != '='; ++delimiter) {
+ if (kv[delimiter] == '\\') {
+ ++delimiter;
+ keyHasEscapes = true;
+ }
+ }
+
+ if (delimiter < kv.size()) {
+ key = kv.Head(delimiter);
+ value = kv.Tail(delimiter + 1);
+ } else {
+ throw yexception() << "Incorrect tskv format";
+ }
+ }
+
+ TStringBuf DeserializeTokenToBuffer(const TStringBuf& token, TString& buffer) {
+ size_t tokenStart = buffer.size();
+ NTskvFormat::Unescape(token, buffer);
+ return TStringBuf(buffer).Tail(tokenStart);
+ }
+
+ void DeserializeTokenToString(const TStringBuf& token, TString& result, bool unescape) {
+ if (unescape) {
+ result.clear();
+ NTskvFormat::Unescape(token, result);
+ } else {
+ result = token;
+ }
+
+ }
+}
+
+void NTskvFormat::NDetail::DeserializeKvToStringBufs(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, TString& buffer, bool unescape) {
+ bool keyHasEscapes = false;
+ Split(kv, key, value, keyHasEscapes);
+ if (unescape) {
+ if (keyHasEscapes) {
+ key = DeserializeTokenToBuffer(key, buffer);
+ }
+ if (value.Contains('\\')) {
+ value = DeserializeTokenToBuffer(value, buffer);
+ }
+ }
+}
+
+void NTskvFormat::NDetail::DeserializeKvToStrings(const TStringBuf& kv, TString& key, TString& value, bool unescape) {
+ TStringBuf keyBuf, valueBuf;
+ bool keyHasEscapes = false;
+ Split(kv, keyBuf, valueBuf, keyHasEscapes);
+
+ Y_UNUSED(keyHasEscapes);
+ DeserializeTokenToString(keyBuf, key, unescape);
+ DeserializeTokenToString(valueBuf, value, unescape);
+}
diff --git a/library/cpp/string_utils/tskv_format/tskv_map.h b/library/cpp/string_utils/tskv_format/tskv_map.h
new file mode 100644
index 0000000000..4f4978fcf5
--- /dev/null
+++ b/library/cpp/string_utils/tskv_format/tskv_map.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "escape.h"
+#include <util/string/cast.h>
+#include <util/string/split.h>
+
+namespace NTskvFormat {
+ namespace NDetail {
+ void DeserializeKvToStringBufs(const TStringBuf& kv, TStringBuf& key, TStringBuf& value, TString& buffer, bool unescape);
+ void DeserializeKvToStrings(const TStringBuf& kv, TString& key, TString& value, bool unescape);
+ }
+
+ template <typename T>
+ TString& SerializeMap(const T& data, TString& result) {
+ result.clear();
+ for (const auto& kv : data) {
+ if (result.size() > 0) {
+ result.push_back('\t');
+ }
+ Escape(ToString(kv.first), result);
+ result.push_back('=');
+ Escape(ToString(kv.second), result);
+ }
+ return result;
+ }
+
+ /**
+ * Deserializing to TStringBuf is faster, just remember that `data'
+ * must not be invalidated while `result' is still in use.
+ */
+ template <typename T>
+ void DeserializeMap(const TStringBuf& data, T& result, TString& buffer, bool unescape = true) {
+ result.clear();
+ buffer.clear();
+ buffer.reserve(data.size());
+ TStringBuf key, value;
+
+ StringSplitter(data.begin(), data.end()).Split('\t').Consume([&](const TStringBuf kv){
+ NDetail::DeserializeKvToStringBufs(kv, key, value, buffer, unescape);
+ result[key] = value;
+ });
+
+ Y_ASSERT(buffer.size() <= data.size());
+ }
+
+ template <typename T>
+ void DeserializeMap(const TStringBuf& data, T& result, bool unescape = true) {
+ if constexpr(std::is_same<typename T::key_type, TStringBuf>::value ||
+ std::is_same<typename T::mapped_type, TStringBuf>::value)
+ {
+ DeserializeMap(data, result, result.DeserializeBuffer, unescape); // we can't unescape values w/o buffer
+ return;
+ }
+ result.clear();
+ TString key, value;
+
+ StringSplitter(data.begin(), data.end()).Split('\t').Consume([&](const TStringBuf kv){
+ NDetail::DeserializeKvToStrings(kv, key, value, unescape);
+ result[key] = value;
+ });
+ }
+}