diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2024-11-14 17:24:46 +0300 |
---|---|---|
committer | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2024-11-14 17:24:46 +0300 |
commit | ce6c4c6e7787232094224395b84e7756f1f882e9 (patch) | |
tree | aaca1af71a72dbe25e96f3fcbb38afc9f95e2d44 | |
parent | f438674be1e4459d82411b76d7112fa14fb71b67 (diff) | |
parent | caec531a3fa9fa1aa41cedffbe8e3e9022f28278 (diff) | |
download | ydb-ce6c4c6e7787232094224395b84e7756f1f882e9.tar.gz |
Merge branch 'rightlib' into mergelibs-yurchuk-manual
-rw-r--r-- | build/mapping.conf.json | 6 | ||||
-rw-r--r-- | library/python/testing/yatest_common/yatest/common/canonical.py | 7 | ||||
-rw-r--r-- | yql/essentials/core/yql_expr_optimize.cpp | 12 | ||||
-rw-r--r-- | yt/yt/library/decimal/decimal.cpp | 20 | ||||
-rw-r--r-- | yt/yt/library/decimal/decimal.h | 3 | ||||
-rw-r--r-- | yt/yt/library/formats/arrow_parser.cpp | 77 |
6 files changed, 95 insertions, 30 deletions
diff --git a/build/mapping.conf.json b/build/mapping.conf.json index d47037d119..9b007796f9 100644 --- a/build/mapping.conf.json +++ b/build/mapping.conf.json @@ -901,12 +901,15 @@ "7324461836": "https://devtools-registry.s3.yandex.net/7324461836", "7193803465": "https://devtools-registry.s3.yandex.net/7193803465", "7324464594": "https://devtools-registry.s3.yandex.net/7324464594", + "7458707245": "https://devtools-registry.s3.yandex.net/7458707245", "7414146467": "https://devtools-registry.s3.yandex.net/7414146467", "7442753753": "https://devtools-registry.s3.yandex.net/7442753753", "7406675906": "https://devtools-registry.s3.yandex.net/7406675906", "7442782962": "https://devtools-registry.s3.yandex.net/7442782962", "7406663741": "https://devtools-registry.s3.yandex.net/7406663741", + "7458616985": "https://devtools-registry.s3.yandex.net/7458616985", "7406665335": "https://devtools-registry.s3.yandex.net/7406665335", + "7458630270": "https://devtools-registry.s3.yandex.net/7458630270", "7193800506": "https://devtools-registry.s3.yandex.net/7193800506", "7324461714": "https://devtools-registry.s3.yandex.net/7324461714", "7193813071": "https://devtools-registry.s3.yandex.net/7193813071", @@ -1929,12 +1932,15 @@ "7324461836": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/0541e185-8261-4b07-9149-257f03a9c8ae/yfm-docs.tar", "7193803465": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/17df2ad2-24bc-49e8-8909-b58685dac393/yfm-docs.tar", "7324464594": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/32cc8c74-decd-44a8-bc8c-f8f0d7edfffe/yfm-docs.tar", + "7458707245": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/4e7df89f-3762-4eba-ba73-562ccaeae548/yfm-docs.tar", "7414146467": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/66167d72-07fa-444c-8493-dea0a39d034e/yfm-docs.tar", "7442753753": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/68e622e9-832b-4a30-81c5-a38c80bb0776/yfm-docs.tar", "7406675906": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/6bba4fa0-ac5e-4a8a-89df-ce9e09573567/yfm-docs.tar", "7442782962": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/6c14a83e-0f36-4397-92ae-c7f2fc1c69ce/yfm-docs.tar", "7406663741": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/6f3d31a2-e730-48d5-92b4-024148b5768a/yfm-docs.tar", + "7458616985": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/85261fb1-0e04-4e8d-b1d5-7042725f1345/yfm-docs.tar", "7406665335": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/895e2cbd-d4a8-4a5c-9cf5-5666e796f17a/yfm-docs.tar", + "7458630270": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/8bb29ea7-2952-4e33-b716-e6d188b5ded9/yfm-docs.tar", "7193800506": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/9be8ed55-d7f8-4029-a7fd-fbfa072b896f/yfm-docs.tar", "7324461714": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/b3543418-58d4-4e1c-b2be-43b55b035e91/yfm-docs.tar", "7193813071": "none-none-none-service_resources/TASKLET_EXECUTABLE/backup/b6531a79-b803-4672-a9e9-f9f348009f5f/yfm-docs.tar", diff --git a/library/python/testing/yatest_common/yatest/common/canonical.py b/library/python/testing/yatest_common/yatest/common/canonical.py index 8408e0f68f..57467b75d3 100644 --- a/library/python/testing/yatest_common/yatest/common/canonical.py +++ b/library/python/testing/yatest_common/yatest/common/canonical.py @@ -14,10 +14,11 @@ yatest_logger = logging.getLogger("ya.test") def _copy(src, dst, universal_lines=False): if universal_lines: - with open(dst, "wb") as f: + with open(dst, "wb") as f_dst: mode = "rbU" if six.PY2 else "rb" - for line in open(src, mode): - f.write(line) + with open(src, mode) as f_src: + for line in f_src: + f_dst.write(line) return shutil.copy(src, dst) diff --git a/yql/essentials/core/yql_expr_optimize.cpp b/yql/essentials/core/yql_expr_optimize.cpp index 28dcd18c12..91608eed80 100644 --- a/yql/essentials/core/yql_expr_optimize.cpp +++ b/yql/essentials/core/yql_expr_optimize.cpp @@ -4,6 +4,7 @@ #include "yql_expr_type_annotation.h" #include <yql/essentials/utils/log/log.h> +#include <yql/essentials/utils/log/profile.h> namespace NYql { @@ -343,7 +344,7 @@ namespace { } } - void VisitExprLambdasLastInternal(const TExprNode::TPtr& node, + void VisitExprLambdasLastInternal(const TExprNode::TPtr& node, const TExprVisitPtrFunc& preLambdaFunc, const TExprVisitPtrFunc& postLambdaFunc, TNodeSet& visitedNodes) @@ -357,9 +358,9 @@ namespace { VisitExprLambdasLastInternal(child, preLambdaFunc, postLambdaFunc, visitedNodes); } } - + preLambdaFunc(node); - + for (auto child : node->Children()) { if (child->IsLambda()) { VisitExprLambdasLastInternal(child, preLambdaFunc, postLambdaFunc, visitedNodes); @@ -536,7 +537,7 @@ IGraphTransformer::TStatus ExpandApply(const TExprNode::TPtr& input, TExprNode:: if (ctx.Step.IsDone(TExprStep::ExpandApplyForLambdas)) return IGraphTransformer::TStatus::Ok; - YQL_CLOG(DEBUG, Core) << "Start ExpandApply"; + YQL_PROFILE_SCOPE(DEBUG, "ExpandApply"); TOptimizeExprSettings settings(nullptr); auto ret = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, bool& changed, TExprContext& ctx) -> TExprNode::TPtr { if (node->Content() == "WithOptionalArgs") { @@ -858,7 +859,6 @@ IGraphTransformer::TStatus ExpandApply(const TExprNode::TPtr& input, TExprNode:: ctx.Step.Done(TExprStep::ExpandApplyForLambdas); } - YQL_CLOG(DEBUG, Core) << "Finish ExpandApply"; return ret; } @@ -910,7 +910,7 @@ void VisitExpr(const TExprNode& root, const TExprVisitRefFunc& preFunc, const TE void VisitExpr(const TExprNode::TPtr& root, const TExprVisitPtrFunc& func, TNodeSet& visitedNodes) { VisitExprInternal(root, func, {}, visitedNodes); } - + void VisitExprLambdasLast(const TExprNode::TPtr& root, const TExprVisitPtrFunc& preLambdaFunc, const TExprVisitPtrFunc& postLambdaFunc) { TNodeSet visitedNodes; diff --git a/yt/yt/library/decimal/decimal.cpp b/yt/yt/library/decimal/decimal.cpp index 3df4b44028..9a61ee585a 100644 --- a/yt/yt/library/decimal/decimal.cpp +++ b/yt/yt/library/decimal/decimal.cpp @@ -891,10 +891,28 @@ TStringBuf TDecimal::WriteBinary256(int precision, TValue256 value, char* buffer CheckDecimalIntBits<TValue256>(precision); YT_VERIFY(bufferLength >= resultLength); - DecimalIntegerToBinaryUnchecked(std::move(value), buffer); + DecimalIntegerToBinaryUnchecked(value, buffer); return TStringBuf{buffer, sizeof(TValue256)}; } +TStringBuf TDecimal::WriteBinary256Variadic(int precision, TValue256 value, char* buffer, size_t bufferLength) +{ + const size_t resultLength = GetValueBinarySize(precision); + switch (resultLength) { + case 4: + return WriteBinary32(precision, *reinterpret_cast<i32*>(value.Parts.data()), buffer, bufferLength); + case 8: + return WriteBinary64(precision, *reinterpret_cast<i64*>(value.Parts.data()), buffer, bufferLength); + case 16: + return WriteBinary128(precision, *reinterpret_cast<TValue128*>(value.Parts.data()), buffer, bufferLength); + case 32: + return WriteBinary256(precision, value, buffer, bufferLength); + default: + THROW_ERROR_EXCEPTION("Invalid precision %v", precision); + } +} + + template <typename T> Y_FORCE_INLINE void CheckBufferLength(int precision, size_t bufferLength) { diff --git a/yt/yt/library/decimal/decimal.h b/yt/yt/library/decimal/decimal.h index 27375d3904..1d28efe2ff 100644 --- a/yt/yt/library/decimal/decimal.h +++ b/yt/yt/library/decimal/decimal.h @@ -24,6 +24,7 @@ public: }; static_assert(sizeof(TValue128) == 2 * sizeof(ui64)); + //! Lower-endian representation of 256-bit decimal value. struct TValue256 { std::array<ui32, 8> Parts; @@ -64,6 +65,8 @@ public: // Writes either 32-bit, 64-bit or 128-bit binary value depending on precision, provided a TValue128. static TStringBuf WriteBinary128Variadic(int precision, TValue128 value, char* buffer, size_t bufferLength); + // Writes either 32-bit, 64-bit, 128-bit or 256-bit binary value depending on precision, provided a TValue256. + static TStringBuf WriteBinary256Variadic(int precision, TValue256 value, char* buffer, size_t bufferLength); static i32 ParseBinary32(int precision, TStringBuf buffer); static i64 ParseBinary64(int precision, TStringBuf buffer); diff --git a/yt/yt/library/formats/arrow_parser.cpp b/yt/yt/library/formats/arrow_parser.cpp index fb3846c1a6..e248240fd6 100644 --- a/yt/yt/library/formats/arrow_parser.cpp +++ b/yt/yt/library/formats/arrow_parser.cpp @@ -40,6 +40,28 @@ void ThrowOnError(const arrow::Status& status) } } +template <class TUnderlyingValueType> +TStringBuf SerializeDecimalBinary(const TStringBuf& value, int precision, char* buffer, size_t bufferLength) +{ + // NB: Arrow wire representation of Decimal128 is little-endian and (obviously) 128 bit, + // while YT in-memory representation of Decimal is big-endian, variadic-length of either 32 bit, 64 bit or 128 bit, + // and MSB-flipped to ensure lexical sorting order. + // Representation of Decimal256 is similar, but the upper limit for a length is 256 bit. + TUnderlyingValueType decimalValue; + YT_VERIFY(value.size() == sizeof(decimalValue)); + std::memcpy(&decimalValue, value.data(), value.size()); + + TStringBuf decimalBinary; + if constexpr (std::is_same_v<TUnderlyingValueType, TDecimal::TValue128>) { + decimalBinary = TDecimal::WriteBinary128Variadic(precision, decimalValue, buffer, bufferLength); + } else if constexpr (std::is_same_v<TUnderlyingValueType, TDecimal::TValue256>) { + decimalBinary = TDecimal::WriteBinary256Variadic(precision, decimalValue, buffer, bufferLength); + } else { + static_assert(std::is_same_v<TUnderlyingValueType, TDecimal::TValue256>, "Unexpected decimal type"); + } + return decimalBinary; +} + //////////////////////////////////////////////////////////////////////////////// class TArraySimpleVisitor @@ -291,28 +313,12 @@ private: } template <class TUnderlyingValueType> - TUnversionedValue MakeDecimalBinaryValue(const TStringBuf& value, i64 columnId, int precision) + TUnversionedValue MakeDecimalBinaryValue(const TStringBuf& arrowValue, i64 columnId, int precision) { - // NB: Arrow wire representation of Decimal128 is little-endian and (obviously) 128 bit, - // while YT in-memory representation of Decimal is big-endian, variadic-length of either 32 bit, 64 bit or 128 bit, - // and MSB-flipped to ensure lexical sorting order. - // Representation of Decimal256 is similar, but only 256 bits. - TUnderlyingValueType decimalValue; - YT_VERIFY(value.size() == sizeof(decimalValue)); - std::memcpy(&decimalValue, value.data(), value.size()); - - const auto maxByteCount = sizeof(decimalValue); + const auto maxByteCount = sizeof(TUnderlyingValueType); char* buffer = BufferForStringLikeValues_->Preallocate(maxByteCount); - TStringBuf decimalBinary; - if constexpr (std::is_same_v<TUnderlyingValueType, TDecimal::TValue128>) { - decimalBinary = TDecimal::WriteBinary128Variadic(precision, decimalValue, buffer, maxByteCount); - } else if constexpr (std::is_same_v<TUnderlyingValueType, TDecimal::TValue256>) { - decimalBinary = TDecimal::WriteBinary256(precision, decimalValue, buffer, maxByteCount); - } else { - static_assert(std::is_same_v<TUnderlyingValueType, TDecimal::TValue256>, "Unexpected decimal type"); - } + auto decimalBinary = SerializeDecimalBinary<TUnderlyingValueType>(arrowValue, precision, buffer, maxByteCount); BufferForStringLikeValues_->Advance(decimalBinary.size()); - return MakeUnversionedStringValue(decimalBinary, columnId); } }; @@ -456,6 +462,20 @@ public: return ParseStruct(); } + arrow::Status Visit(const arrow::Decimal128Type& type) override + { + return ParseStringLikeArray<arrow::Decimal128Array>([&] (const TStringBuf& value) { + WriteDecimalBinary<TDecimal::TValue128>(value, type.precision()); + }); + } + + arrow::Status Visit(const arrow::Decimal256Type& type) override + { + return ParseStringLikeArray<arrow::Decimal256Array>([&] (const TStringBuf& value) { + WriteDecimalBinary<TDecimal::TValue256>(value, type.precision()); + }); + } + private: const int RowIndex_; @@ -506,12 +526,20 @@ private: template <typename ArrayType> arrow::Status ParseStringLikeArray() { + return ParseStringLikeArray<ArrayType>([&] (const TStringBuf& value) { + Writer_->WriteBinaryString(value); + }); + } + + template <typename ArrayType> + arrow::Status ParseStringLikeArray(auto writeStringValue) + { auto array = std::static_pointer_cast<ArrayType>(Array_); if (array->IsNull(RowIndex_)) { Writer_->WriteEntity(); } else { auto element = array->GetView(RowIndex_); - Writer_->WriteBinaryString(TStringBuf(element.data(), element.size())); + writeStringValue(TStringBuf(element.data(), element.size())); } return arrow::Status::OK(); } @@ -610,6 +638,15 @@ private: } return arrow::Status::OK(); } + + template <class TUnderlyingType> + void WriteDecimalBinary(TStringBuf arrowValue, int precision) + { + const auto maxByteCount = sizeof(TUnderlyingType); + char buffer[maxByteCount]; + auto decimalBinary = SerializeDecimalBinary<TUnderlyingType>(arrowValue, precision, buffer, maxByteCount); + Writer_->WriteBinaryString(decimalBinary); + } }; //////////////////////////////////////////////////////////////////////////////// |