diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-07 04:19:26 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 04:29:50 +0300 |
commit | 2661be00f3bc47590fda9218bf0386d6355c8c88 (patch) | |
tree | 3d316c07519191283d31c5f537efc6aabb42a2f0 /yql/essentials/minikql/mkql_string_util.cpp | |
parent | cf2a23963ac10add28c50cc114fbf48953eca5aa (diff) | |
download | ydb-2661be00f3bc47590fda9218bf0386d6355c8c88.tar.gz |
Moved yql/minikql YQL-19206
init
[nodiff:caesar]
commit_hash:d1182ef7d430ccf7e4d37ed933c7126d7bd5d6e4
Diffstat (limited to 'yql/essentials/minikql/mkql_string_util.cpp')
-rw-r--r-- | yql/essentials/minikql/mkql_string_util.cpp | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/yql/essentials/minikql/mkql_string_util.cpp b/yql/essentials/minikql/mkql_string_util.cpp new file mode 100644 index 0000000000..78adacc231 --- /dev/null +++ b/yql/essentials/minikql/mkql_string_util.cpp @@ -0,0 +1,169 @@ +#include "mkql_string_util.h" + +namespace NKikimr { +namespace NMiniKQL { + +namespace { + +ui32 CheckedSum(ui32 one, ui32 two) { + if (ui64(one) + ui64(two) > ui64(std::numeric_limits<ui32>::max())) + ythrow yexception() << "Impossible to concat too large strings " << one << " and " << two << " bytes!"; + return one + two; +} + +} + +NUdf::TUnboxedValuePod AppendString(const NUdf::TUnboxedValuePod value, const NUdf::TStringRef ref) +{ + if (!ref.Size()) + return value; + + const auto& valueRef = value.AsStringRef(); + if (!valueRef.Size()) + return MakeString(ref); + + const auto newSize = CheckedSum(valueRef.Size(), ref.Size()); + if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) { + auto result = NUdf::TUnboxedValuePod::Embedded(newSize); + const auto buf = result.AsStringRef().Data(); + std::memcpy(buf, valueRef.Data(), valueRef.Size()); + std::memcpy(buf + valueRef.Size(), ref.Data(), ref.Size()); + return result; + } else { + if (value.IsString()) { + auto str = value.AsStringValue(); + const ui32 offset = ref.Data() - str.Data(); + if (str.Size() == valueRef.Size() + offset) { + if (str.TryExpandOn(ref.Size())) { + std::memcpy(str.Data() + offset + valueRef.Size(), ref.Data(), ref.Size()); + return NUdf::TUnboxedValuePod(std::move(str), newSize, offset); + } + } + } + + auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); + NUdf::TStringValue str(data); + data->UnRef(); + std::memcpy(str.Data(), valueRef.Data(), valueRef.Size()); + std::memcpy(str.Data() + valueRef.Size(), ref.Data(), ref.Size()); + return NUdf::TUnboxedValuePod(std::move(str)); + } +} + +NUdf::TUnboxedValuePod PrependString(const NUdf::TStringRef ref, const NUdf::TUnboxedValuePod value) +{ + if (!ref.Size()) + return value; + + const auto& valueRef = value.AsStringRef(); + if (!valueRef.Size()) + return MakeString(ref); + + const auto newSize = CheckedSum(valueRef.Size(), ref.Size()); + if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) { + auto result = NUdf::TUnboxedValuePod::Embedded(newSize); + const auto buf = result.AsStringRef().Data(); + std::memcpy(buf, ref.Data(), ref.Size()); + std::memcpy(buf + ref.Size(), valueRef.Data(), valueRef.Size()); + return result; + } else { + auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); + NUdf::TStringValue str(data); + data->UnRef(); + std::memcpy(str.Data(), ref.Data(), ref.Size()); + std::memcpy(str.Data() + ref.Size(), valueRef.Data(), valueRef.Size()); + value.DeleteUnreferenced(); + return NUdf::TUnboxedValuePod(std::move(str)); + } +} + +NUdf::TUnboxedValuePod ConcatStrings(const NUdf::TUnboxedValuePod first, const NUdf::TUnboxedValuePod second) +{ + const auto& leftRef = first.AsStringRef(); + if (!leftRef.Size()) + return second; + + const auto& rightRef = second.AsStringRef(); + if (!rightRef.Size()) + return first; + + const auto newSize = CheckedSum(leftRef.Size(), rightRef.Size()); + if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) { + auto result = NUdf::TUnboxedValuePod::Embedded(newSize); + const auto buf = result.AsStringRef().Data(); + std::memcpy(buf, leftRef.Data(), leftRef.Size()); + std::memcpy(buf + leftRef.Size(), rightRef.Data(), rightRef.Size()); + return result; + } else { + if (first.IsString()) { + auto str = first.AsStringValue(); + const ui32 offset = leftRef.Data() - str.Data(); + if (str.Size() == leftRef.Size() + offset) { + if (str.TryExpandOn(rightRef.Size())) { + std::memcpy(str.Data() + offset + leftRef.Size(), rightRef.Data(), rightRef.Size()); + second.DeleteUnreferenced(); + return NUdf::TUnboxedValuePod(std::move(str), newSize, offset); + } + } + } + + auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); + NUdf::TStringValue str(data); + data->UnRef(); + std::memcpy(str.Data(), leftRef.Data(), leftRef.Size()); + std::memcpy(str.Data() + leftRef.Size(), rightRef.Data(), rightRef.Size()); + second.DeleteUnreferenced(); + return NUdf::TUnboxedValuePod(std::move(str)); + } +} + +NUdf::TUnboxedValuePod SubString(const NUdf::TUnboxedValuePod value, ui32 offset, ui32 size) +{ + const auto& ref = value.AsStringRef(); + if (size == 0U || ref.Size() <= offset) { + value.DeleteUnreferenced(); + return NUdf::TUnboxedValuePod::Zero(); + } + + if (offset == 0U && ref.Size() <= size) + return value; + + if (const auto newSize = std::min(ref.Size() - offset, size); newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) { + auto result = NUdf::TUnboxedValuePod::Embedded(newSize); + std::memcpy(result.AsStringRef().Data(), ref.Data() + offset, newSize); + value.DeleteUnreferenced(); + return result; + } else { + auto old = value.AsStringValue(); + if (const auto newOffset = ui32(ref.Data() - old.Data()) + offset; NUdf::TUnboxedValuePod::OffsetLimit > newOffset) + return NUdf::TUnboxedValuePod(std::move(old), newSize, newOffset); + + auto data = NUdf::TStringValue::AllocateData(newSize, newSize + (newSize >> 1U)); + NUdf::TStringValue str(data); + data->UnRef(); + std::memcpy(str.Data(), ref.Data() + offset, newSize); + return NUdf::TUnboxedValuePod(std::move(str)); + } +} + +NUdf::TUnboxedValuePod MakeString(const NUdf::TStringRef ref) +{ + if (ref.Size() <= NUdf::TUnboxedValuePod::InternalBufferSize) + return NUdf::TUnboxedValuePod::Embedded(ref); + + NUdf::TStringValue str(ref.Size()); + std::memcpy(str.Data(), ref.Data(), ref.Size()); + return NUdf::TUnboxedValuePod(std::move(str)); +} + +NUdf::TUnboxedValuePod MakeStringNotFilled(ui32 size, ui32 pad) +{ + const auto fullSize = size + pad; + if (fullSize <= NUdf::TUnboxedValuePod::InternalBufferSize) + return NUdf::TUnboxedValuePod::Embedded(size); + + return NUdf::TUnboxedValuePod(NUdf::TStringValue(fullSize), size); +} + +} +} |