aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/minikql/mkql_string_util.cpp
diff options
context:
space:
mode:
authorvvvv <vvvv@yandex-team.com>2024-11-07 04:19:26 +0300
committervvvv <vvvv@yandex-team.com>2024-11-07 04:29:50 +0300
commit2661be00f3bc47590fda9218bf0386d6355c8c88 (patch)
tree3d316c07519191283d31c5f537efc6aabb42a2f0 /yql/essentials/minikql/mkql_string_util.cpp
parentcf2a23963ac10add28c50cc114fbf48953eca5aa (diff)
downloadydb-2661be00f3bc47590fda9218bf0386d6355c8c88.tar.gz
Moved yql/minikql YQL-19206
init [nodiff:caesar] commit_hash:d1182ef7d430ccf7e4d37ed933c7126d7bd5d6e4
Diffstat (limited to 'yql/essentials/minikql/mkql_string_util.cpp')
-rw-r--r--yql/essentials/minikql/mkql_string_util.cpp169
1 files changed, 169 insertions, 0 deletions
diff --git a/yql/essentials/minikql/mkql_string_util.cpp b/yql/essentials/minikql/mkql_string_util.cpp
new file mode 100644
index 0000000000..78adacc231
--- /dev/null
+++ b/yql/essentials/minikql/mkql_string_util.cpp
@@ -0,0 +1,169 @@
+#include "mkql_string_util.h"
+
+namespace NKikimr {
+namespace NMiniKQL {
+
+namespace {
+
+ui32 CheckedSum(ui32 one, ui32 two) {
+ if (ui64(one) + ui64(two) > ui64(std::numeric_limits<ui32>::max()))
+ ythrow yexception() << "Impossible to concat too large strings " << one << " and " << two << " bytes!";
+ return one + two;
+}
+
+}
+
+NUdf::TUnboxedValuePod AppendString(const NUdf::TUnboxedValuePod value, const NUdf::TStringRef ref)
+{
+ if (!ref.Size())
+ return value;
+
+ const auto& valueRef = value.AsStringRef();
+ if (!valueRef.Size())
+ return MakeString(ref);
+
+ const auto newSize = CheckedSum(valueRef.Size(), ref.Size());
+ if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
+ auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
+ const auto buf = result.AsStringRef().Data();
+ std::memcpy(buf, valueRef.Data(), valueRef.Size());
+ std::memcpy(buf + valueRef.Size(), ref.Data(), ref.Size());
+ return result;
+ } else {
+ if (value.IsString()) {
+ auto str = value.AsStringValue();
+ const ui32 offset = ref.Data() - str.Data();
+ if (str.Size() == valueRef.Size() + offset) {
+ if (str.TryExpandOn(ref.Size())) {
+ std::memcpy(str.Data() + offset + valueRef.Size(), ref.Data(), ref.Size());
+ return NUdf::TUnboxedValuePod(std::move(str), newSize, offset);
+ }
+ }
+ }
+
+ auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
+ NUdf::TStringValue str(data);
+ data->UnRef();
+ std::memcpy(str.Data(), valueRef.Data(), valueRef.Size());
+ std::memcpy(str.Data() + valueRef.Size(), ref.Data(), ref.Size());
+ return NUdf::TUnboxedValuePod(std::move(str));
+ }
+}
+
+NUdf::TUnboxedValuePod PrependString(const NUdf::TStringRef ref, const NUdf::TUnboxedValuePod value)
+{
+ if (!ref.Size())
+ return value;
+
+ const auto& valueRef = value.AsStringRef();
+ if (!valueRef.Size())
+ return MakeString(ref);
+
+ const auto newSize = CheckedSum(valueRef.Size(), ref.Size());
+ if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
+ auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
+ const auto buf = result.AsStringRef().Data();
+ std::memcpy(buf, ref.Data(), ref.Size());
+ std::memcpy(buf + ref.Size(), valueRef.Data(), valueRef.Size());
+ return result;
+ } else {
+ auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
+ NUdf::TStringValue str(data);
+ data->UnRef();
+ std::memcpy(str.Data(), ref.Data(), ref.Size());
+ std::memcpy(str.Data() + ref.Size(), valueRef.Data(), valueRef.Size());
+ value.DeleteUnreferenced();
+ return NUdf::TUnboxedValuePod(std::move(str));
+ }
+}
+
+NUdf::TUnboxedValuePod ConcatStrings(const NUdf::TUnboxedValuePod first, const NUdf::TUnboxedValuePod second)
+{
+ const auto& leftRef = first.AsStringRef();
+ if (!leftRef.Size())
+ return second;
+
+ const auto& rightRef = second.AsStringRef();
+ if (!rightRef.Size())
+ return first;
+
+ const auto newSize = CheckedSum(leftRef.Size(), rightRef.Size());
+ if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
+ auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
+ const auto buf = result.AsStringRef().Data();
+ std::memcpy(buf, leftRef.Data(), leftRef.Size());
+ std::memcpy(buf + leftRef.Size(), rightRef.Data(), rightRef.Size());
+ return result;
+ } else {
+ if (first.IsString()) {
+ auto str = first.AsStringValue();
+ const ui32 offset = leftRef.Data() - str.Data();
+ if (str.Size() == leftRef.Size() + offset) {
+ if (str.TryExpandOn(rightRef.Size())) {
+ std::memcpy(str.Data() + offset + leftRef.Size(), rightRef.Data(), rightRef.Size());
+ second.DeleteUnreferenced();
+ return NUdf::TUnboxedValuePod(std::move(str), newSize, offset);
+ }
+ }
+ }
+
+ auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
+ NUdf::TStringValue str(data);
+ data->UnRef();
+ std::memcpy(str.Data(), leftRef.Data(), leftRef.Size());
+ std::memcpy(str.Data() + leftRef.Size(), rightRef.Data(), rightRef.Size());
+ second.DeleteUnreferenced();
+ return NUdf::TUnboxedValuePod(std::move(str));
+ }
+}
+
+NUdf::TUnboxedValuePod SubString(const NUdf::TUnboxedValuePod value, ui32 offset, ui32 size)
+{
+ const auto& ref = value.AsStringRef();
+ if (size == 0U || ref.Size() <= offset) {
+ value.DeleteUnreferenced();
+ return NUdf::TUnboxedValuePod::Zero();
+ }
+
+ if (offset == 0U && ref.Size() <= size)
+ return value;
+
+ if (const auto newSize = std::min(ref.Size() - offset, size); newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
+ auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
+ std::memcpy(result.AsStringRef().Data(), ref.Data() + offset, newSize);
+ value.DeleteUnreferenced();
+ return result;
+ } else {
+ auto old = value.AsStringValue();
+ if (const auto newOffset = ui32(ref.Data() - old.Data()) + offset; NUdf::TUnboxedValuePod::OffsetLimit > newOffset)
+ return NUdf::TUnboxedValuePod(std::move(old), newSize, newOffset);
+
+ auto data = NUdf::TStringValue::AllocateData(newSize, newSize + (newSize >> 1U));
+ NUdf::TStringValue str(data);
+ data->UnRef();
+ std::memcpy(str.Data(), ref.Data() + offset, newSize);
+ return NUdf::TUnboxedValuePod(std::move(str));
+ }
+}
+
+NUdf::TUnboxedValuePod MakeString(const NUdf::TStringRef ref)
+{
+ if (ref.Size() <= NUdf::TUnboxedValuePod::InternalBufferSize)
+ return NUdf::TUnboxedValuePod::Embedded(ref);
+
+ NUdf::TStringValue str(ref.Size());
+ std::memcpy(str.Data(), ref.Data(), ref.Size());
+ return NUdf::TUnboxedValuePod(std::move(str));
+}
+
+NUdf::TUnboxedValuePod MakeStringNotFilled(ui32 size, ui32 pad)
+{
+ const auto fullSize = size + pad;
+ if (fullSize <= NUdf::TUnboxedValuePod::InternalBufferSize)
+ return NUdf::TUnboxedValuePod::Embedded(size);
+
+ return NUdf::TUnboxedValuePod(NUdf::TStringValue(fullSize), size);
+}
+
+}
+}