diff options
| author | atarasov5 <[email protected]> | 2026-01-19 11:04:24 +0300 |
|---|---|---|
| committer | atarasov5 <[email protected]> | 2026-01-19 11:45:10 +0300 |
| commit | 15a5dcdc957b7700cfb15e3f0b4fd5e5e94de9a7 (patch) | |
| tree | f4306a04f7e31698ef6a0577b7a4bc7e733eb362 | |
| parent | ce297ea2facfd2c98298486d4c3535bc00d05e66 (diff) | |
YQL-13448: Introduce window RANGE logic
Introduce window RANGE logic.
Зеркальный пр с включение фичи путем инлайнинга в коде <https://nda.ya.ru/t/p2qAEoNq7SNwUJ>
Зеркальный пр где я провожу perf измерения <https://nda.ya.ru/t/7UdlI38n7SNwUL> - в нем я описал результаты
commit_hash:2626d7d6b77f1ccb31e395d974a2beaa60f27a97
165 files changed, 10850 insertions, 545 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 79448b1fdd9..89989eddc4f 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -3102,8 +3102,6 @@ TExprNode::TPtr DoNormalizeFrames(const TExprNode::TPtr& frames, TExprContext& c TWindowFrameSettings frameSettings = TWindowFrameSettings::Parse(*winOn, ctx); if (frameSettings.GetFrameType() == EFrameType::FrameByRange) { - YQL_ENSURE(IsUnbounded(frameSettings.GetFirst())); - YQL_ENSURE(IsCurrentRow(frameSettings.GetLast())); continue; } diff --git a/yql/essentials/core/sql_types/sort_order.h b/yql/essentials/core/sql_types/sort_order.h new file mode 100644 index 00000000000..d0c0c87b151 --- /dev/null +++ b/yql/essentials/core/sql_types/sort_order.h @@ -0,0 +1,44 @@ +#pragma once + +#include <util/stream/output.h> +#include <util/string/cast.h> + +namespace NYql { + +enum class ESortOrder { + Asc, + Desc, + Unimportant, +}; + +inline IOutputStream& operator<<(IOutputStream& out, ESortOrder order) { + switch (order) { + case ESortOrder::Asc: + return out << "Asc"; + case ESortOrder::Desc: + return out << "Desc"; + case ESortOrder::Unimportant: + return out << "Unimportant"; + } +} + +inline bool TryFromString(TStringBuf str, ESortOrder& order) { + if (str == "Asc") { + order = ESortOrder::Asc; + return true; + } else if (str == "Desc") { + order = ESortOrder::Desc; + return true; + } else if (str == "Unimportant") { + order = ESortOrder::Unimportant; + return true; + } + return false; +} + +} // namespace NYql + +template <> +inline bool TryFromStringImpl<NYql::ESortOrder>(const char* data, size_t len, NYql::ESortOrder& result) { + return NYql::TryFromString(TStringBuf(data, len), result); +} diff --git a/yql/essentials/core/sql_types/ut/ya.make b/yql/essentials/core/sql_types/ut/ya.make index 363e4dc4a44..f56a42f835b 100644 --- a/yql/essentials/core/sql_types/ut/ya.make +++ b/yql/essentials/core/sql_types/ut/ya.make @@ -3,6 +3,8 @@ UNITTEST_FOR(yql/essentials/core/sql_types) SRCS( match_recognize_ut.cpp normalize_name_ut.cpp + window_frame_bounds_ut.cpp + window_number_and_direction_ut.cpp ) PEERDIR( diff --git a/yql/essentials/core/sql_types/window_direction.h b/yql/essentials/core/sql_types/window_direction.h new file mode 100644 index 00000000000..88b2b81c5cd --- /dev/null +++ b/yql/essentials/core/sql_types/window_direction.h @@ -0,0 +1,52 @@ +#pragma once + +#include <util/stream/output.h> +#include <util/string/cast.h> + +namespace NYql::NWindow { + +enum class EDirection { + Preceding, + Following, +}; + +constexpr EDirection InvertDirection(EDirection direction) { + return direction == EDirection::Preceding ? EDirection::Following : EDirection::Preceding; +} + +inline IOutputStream& operator<<(IOutputStream& out, EDirection direction) { + switch (direction) { + case EDirection::Preceding: + return out << "Preceding"; + case EDirection::Following: + return out << "Following"; + } +} + +inline TString DirectionToString(EDirection direction) { + switch (direction) { + case EDirection::Preceding: + return "Preceding"; + case EDirection::Following: + return "Following"; + } +} + +inline bool TryFromString(const TStringBuf& str, EDirection& direction) { + if (str == "Preceding") { + direction = EDirection::Preceding; + return true; + } else if (str == "Following") { + direction = EDirection::Following; + return true; + } + return false; +} + +} // namespace NYql::NWindow + +// Specialization for TryFromString support +template <> +inline bool TryFromStringImpl<NYql::NWindow::EDirection>(const char* data, size_t len, NYql::NWindow::EDirection& result) { + return NYql::NWindow::TryFromString(TStringBuf(data, len), result); +} diff --git a/yql/essentials/core/sql_types/window_frame_bounds.h b/yql/essentials/core/sql_types/window_frame_bounds.h new file mode 100644 index 00000000000..c86aa52a6e1 --- /dev/null +++ b/yql/essentials/core/sql_types/window_frame_bounds.h @@ -0,0 +1,230 @@ +#pragma once + +#include "window_number_and_direction.h" + +#include <util/generic/vector.h> +#include <util/generic/hash.h> + +namespace NYql::NWindow { + +template <typename T> +using TInputRange = TNumberAndDirection<T>; + +using TInputRow = TNumberAndDirection<ui64>; + +template <typename T> +class TWindowFrame { +public: + using TBoundType = T; + + TWindowFrame(T min, T max) + : Min_(std::move(min)) + , Max_(std::move(max)) + { + } + + const T& Min() const { + return Min_; + } + + const T& Max() const { + return Max_; + } + + T& Min() { + return Min_; + } + + T& Max() { + return Max_; + } + + bool operator==(const TWindowFrame&) const = default; + +private: + T Min_; + T Max_; +}; + +// Hash function for TNumberAndDirection. +template <typename T> +struct TNumberAndDirectionHash { + size_t operator()(const TNumberAndDirection<T>& value) const { + size_t hash = 0; + hash = CombineHashes(hash, THash<int>{}(static_cast<int>(value.GetDirection()))); + hash = CombineHashes(hash, THash<bool>{}(value.IsInf())); + if (!value.IsInf()) { + hash = CombineHashes(hash, THash<T>{}(value.GetUnderlyingValue())); + } + return hash; + } +}; + +// Hash function for TWindowFrame. +template <typename T> +struct TWindowFrameHash { + size_t operator()(const TWindowFrame<T>& frame) const { + TNumberAndDirectionHash<typename T::TNumberType> boundHash; + size_t hash = boundHash(frame.Min()); + hash = CombineHashes(hash, boundHash(frame.Max())); + return hash; + } +}; + +template <typename T> +using TInputRangeWindowFrame = TWindowFrame<TInputRange<T>>; + +using TInputRowWindowFrame = TWindowFrame<TInputRow>; + +using TRow = i64; + +class TRowWindowFrame: public TWindowFrame<TRow> { + using Base = TWindowFrame<TRow>; + +public: + using Base::operator=; + using Base::Base; + + TRow Size() const { + if (Max() >= Min()) { + return Max() - Min(); + } else { + return 0; + } + } + + bool Empty() const { + return Size() == 0; + } + static TRowWindowFrame CreateEmpty() { + return TRowWindowFrame(0, 0); + } +}; + +// Container for window frame bounds specifications. +// Stores multiple window frame intervals and delta specifications that define +// how window frames should be calculated for aggregation operations. +// +// Supports both range-based (value-based) and row-based (position-based) window frames. +// Each bound can be either a finite value or unbounded (infinity), allowing specifications like: +// - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +// - RANGE BETWEEN 10 PRECEDING AND 5 FOLLOWING +// - ROWS BETWEEN 2 PRECEDING AND UNBOUNDED FOLLOWING +// +// Unbounded values are represented using TNumberAndDirection with TUnbounded type, +// indicating infinite extent in the specified direction (Left for PRECEDING, Right for FOLLOWING). +template <typename TRangeElement> +class TCoreWinFrameCollectorBounds { +public: + // Handle that identifies a window frame specification. + class THandle { + public: + THandle(size_t index, bool isRange, bool isIncremental) + : Index_(index) + , IsRange_(isRange) + , IsIncremental_(isIncremental) + { + } + + size_t Index() { + return Index_; + } + + bool IsRange() { + return IsRange_; + } + + bool IsIncremental() { + return IsIncremental_; + } + + private: + const size_t Index_; + const bool IsRange_; + const bool IsIncremental_; + }; + + TCoreWinFrameCollectorBounds(bool dedup) + : Dedup_(dedup) + { + } + + THandle AddRange(const TInputRangeWindowFrame<TRangeElement>& range) { + auto it = RangeIntervalsCache_.find(range); + if (Dedup_ && it != RangeIntervalsCache_.end()) { + return it->second; + } + RangeIntervals_.push_back(range); + THandle handle(RangeIntervals_.size() - 1, /*isRange=*/true, /*isIncremental=*/false); + RangeIntervalsCache_.emplace(range, handle); + return handle; + } + + THandle AddRow(const TInputRowWindowFrame& row) { + auto it = RowIntervalsCache_.find(row); + if (Dedup_ && it != RowIntervalsCache_.end()) { + return it->second; + } + RowIntervals_.push_back(row); + THandle handle(RowIntervals_.size() - 1, /*isRange=*/false, /*isIncremental=*/false); + RowIntervalsCache_.emplace(row, handle); + return handle; + } + + THandle AddRangeIncremental(const TInputRange<TRangeElement>& delta) { + auto it = RangeIncrementalsCache_.find(delta); + if (Dedup_ && it != RangeIncrementalsCache_.end()) { + return it->second; + } + RangeIncrementals_.push_back(delta); + THandle handle(RangeIncrementals_.size() - 1, /*isRange=*/true, /*isIncremental=*/true); + RangeIncrementalsCache_.emplace(delta, handle); + return handle; + } + + THandle AddRowIncremental(const TInputRow& delta) { + auto it = RowIncrementalsCache_.find(delta); + if (Dedup_ && it != RowIncrementalsCache_.end()) { + return it->second; + } + RowIncrementals_.push_back(delta); + THandle handle(RowIncrementals_.size() - 1, /*isRange=*/false, /*isIncremental=*/true); + RowIncrementalsCache_.emplace(delta, handle); + return handle; + } + + const TVector<TInputRangeWindowFrame<TRangeElement>>& RangeIntervals() const { + return RangeIntervals_; + } + + const TVector<TInputRowWindowFrame>& RowIntervals() const { + return RowIntervals_; + } + + const TVector<TInputRange<TRangeElement>>& RangeIncrementals() const { + return RangeIncrementals_; + } + + const TVector<TInputRow>& RowIncrementals() const { + return RowIncrementals_; + } + + bool Empty() const { + return RangeIntervals_.empty() && RowIntervals_.empty() && RangeIncrementals_.empty() && RowIncrementals_.empty(); + } + +private: + TVector<TInputRangeWindowFrame<TRangeElement>> RangeIntervals_; + TVector<TInputRowWindowFrame> RowIntervals_; + TVector<TInputRange<TRangeElement>> RangeIncrementals_; + TVector<TInputRow> RowIncrementals_; + + // Caches for deduplication. + bool Dedup_; + THashMap<TInputRangeWindowFrame<TRangeElement>, THandle, TWindowFrameHash<TInputRange<TRangeElement>>> RangeIntervalsCache_; + THashMap<TInputRowWindowFrame, THandle, TWindowFrameHash<TInputRow>> RowIntervalsCache_; + THashMap<TInputRange<TRangeElement>, THandle, TNumberAndDirectionHash<TRangeElement>> RangeIncrementalsCache_; + THashMap<TInputRow, THandle, TNumberAndDirectionHash<TInputRow::TNumberType>> RowIncrementalsCache_; +}; + +} // namespace NYql::NWindow diff --git a/yql/essentials/core/sql_types/window_frame_bounds_ut.cpp b/yql/essentials/core/sql_types/window_frame_bounds_ut.cpp new file mode 100644 index 00000000000..5e70b105e98 --- /dev/null +++ b/yql/essentials/core/sql_types/window_frame_bounds_ut.cpp @@ -0,0 +1,100 @@ +#include "window_frame_bounds.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <util/string/cast.h> +#include <util/random/random.h> +#include <util/string/printf.h> + +using namespace NYql::NWindow; + +Y_UNIT_TEST_SUITE(CoreWinFrameCollectorBoundsTest) { + +Y_UNIT_TEST(DifferentEntries_ReturnDifferentHandles) { + TCoreWinFrameCollectorBounds<i64> bounds(/*dedup=*/true); + + // AddRange + auto rangeHandle1 = bounds.AddRange(TInputRangeWindowFrame<i64>(TInputRange<i64>(10, EDirection::Preceding), TInputRange<i64>(5, EDirection::Following))); + auto rangeHandle2 = bounds.AddRange(TInputRangeWindowFrame<i64>(TInputRange<i64>(20, EDirection::Preceding), TInputRange<i64>(10, EDirection::Following))); + UNIT_ASSERT_UNEQUAL(rangeHandle1.Index(), rangeHandle2.Index()); + + // AddRow + auto rowHandle1 = bounds.AddRow(TInputRowWindowFrame(TInputRow(10, EDirection::Preceding), TInputRow(5, EDirection::Following))); + auto rowHandle2 = bounds.AddRow(TInputRowWindowFrame(TInputRow(20, EDirection::Preceding), TInputRow(10, EDirection::Following))); + UNIT_ASSERT_UNEQUAL(rowHandle1.Index(), rowHandle2.Index()); + + // AddRangeIncremental + auto rangeIncHandle1 = bounds.AddRangeIncremental(TInputRange<i64>(10, EDirection::Preceding)); + auto rangeIncHandle2 = bounds.AddRangeIncremental(TInputRange<i64>(20, EDirection::Following)); + UNIT_ASSERT_UNEQUAL(rangeIncHandle1.Index(), rangeIncHandle2.Index()); + + // AddRowIncremental + auto rowIncHandle1 = bounds.AddRowIncremental(TInputRow(10, EDirection::Preceding)); + auto rowIncHandle2 = bounds.AddRowIncremental(TInputRow(20, EDirection::Following)); + UNIT_ASSERT_UNEQUAL(rowIncHandle1.Index(), rowIncHandle2.Index()); +} + +Y_UNIT_TEST(DuplicateEntries_ReturnCachedHandles) { + TCoreWinFrameCollectorBounds<i64> bounds(/*dedup=*/true); + + // AddRange + auto rangeFrame = TInputRangeWindowFrame<i64>(TInputRange<i64>(10, EDirection::Preceding), TInputRange<i64>(5, EDirection::Following)); + auto rangeHandle1 = bounds.AddRange(rangeFrame); + auto rangeHandle2 = bounds.AddRange(rangeFrame); + UNIT_ASSERT_EQUAL(rangeHandle1.Index(), rangeHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RangeIntervals().size(), 1); + + // AddRow + auto rowFrame = TInputRowWindowFrame(TInputRow(10, EDirection::Preceding), TInputRow(5, EDirection::Following)); + auto rowHandle1 = bounds.AddRow(rowFrame); + auto rowHandle2 = bounds.AddRow(rowFrame); + UNIT_ASSERT_EQUAL(rowHandle1.Index(), rowHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RowIntervals().size(), 1); + + // AddRangeIncremental + auto rangeDelta = TInputRange<i64>(10, EDirection::Preceding); + auto rangeIncHandle1 = bounds.AddRangeIncremental(rangeDelta); + auto rangeIncHandle2 = bounds.AddRangeIncremental(rangeDelta); + UNIT_ASSERT_EQUAL(rangeIncHandle1.Index(), rangeIncHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RangeIncrementals().size(), 1); + + // AddRowIncremental + auto rowDelta = TInputRow(10, EDirection::Preceding); + auto rowIncHandle1 = bounds.AddRowIncremental(rowDelta); + auto rowIncHandle2 = bounds.AddRowIncremental(rowDelta); + UNIT_ASSERT_EQUAL(rowIncHandle1.Index(), rowIncHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RowIncrementals().size(), 1); +} + +Y_UNIT_TEST(DuplicateEntries_NoDedupFlag_NoCaching) { + TCoreWinFrameCollectorBounds<i64> bounds(/*dedup=*/false); + + // AddRange - duplicates should create new entries + auto rangeFrame = TInputRangeWindowFrame<i64>(TInputRange<i64>(10, EDirection::Preceding), TInputRange<i64>(5, EDirection::Following)); + auto rangeHandle1 = bounds.AddRange(rangeFrame); + auto rangeHandle2 = bounds.AddRange(rangeFrame); + UNIT_ASSERT_UNEQUAL(rangeHandle1.Index(), rangeHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RangeIntervals().size(), 2); + + // AddRow - duplicates should create new entries + auto rowFrame = TInputRowWindowFrame(TInputRow(10, EDirection::Preceding), TInputRow(5, EDirection::Following)); + auto rowHandle1 = bounds.AddRow(rowFrame); + auto rowHandle2 = bounds.AddRow(rowFrame); + UNIT_ASSERT_UNEQUAL(rowHandle1.Index(), rowHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RowIntervals().size(), 2); + + // AddRangeIncremental - duplicates should create new entries + auto rangeDelta = TInputRange<i64>(10, EDirection::Preceding); + auto rangeIncHandle1 = bounds.AddRangeIncremental(rangeDelta); + auto rangeIncHandle2 = bounds.AddRangeIncremental(rangeDelta); + UNIT_ASSERT_UNEQUAL(rangeIncHandle1.Index(), rangeIncHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RangeIncrementals().size(), 2); + + // AddRowIncremental - duplicates should create new entries + auto rowDelta = TInputRow(10, EDirection::Preceding); + auto rowIncHandle1 = bounds.AddRowIncremental(rowDelta); + auto rowIncHandle2 = bounds.AddRowIncremental(rowDelta); + UNIT_ASSERT_UNEQUAL(rowIncHandle1.Index(), rowIncHandle2.Index()); + UNIT_ASSERT_EQUAL(bounds.RowIncrementals().size(), 2); +} + +} // Y_UNIT_TEST_SUITE(CoreWinFrameCollectorBoundsTest) diff --git a/yql/essentials/core/sql_types/window_frames_collector_params.h b/yql/essentials/core/sql_types/window_frames_collector_params.h new file mode 100644 index 00000000000..6aa52e3033a --- /dev/null +++ b/yql/essentials/core/sql_types/window_frames_collector_params.h @@ -0,0 +1,39 @@ +#pragma once + +#include "window_frame_bounds.h" + +#include <yql/essentials/core/sql_types/sort_order.h> + +namespace NYql::NWindow { + +template <typename TRangeType> +class TCoreWinFramesCollectorParams { +public: + TCoreWinFramesCollectorParams(TCoreWinFrameCollectorBounds<TRangeType> bounds, ESortOrder sortOrder, const TString& sortColumnName) + : Bounds_(std::move(bounds)) + , SortOrder_(sortOrder) + , SortColumnName_(sortColumnName) + { + } + + const TCoreWinFrameCollectorBounds<TRangeType>& GetBounds() const { + return Bounds_; + } + + ESortOrder GetSortOrder() const { + return SortOrder_; + } + + TStringBuf GetSortColumnName() const { + return SortColumnName_; + } + +private: + TCoreWinFrameCollectorBounds<TRangeType> Bounds_; + ESortOrder SortOrder_; + TString SortColumnName_; +}; + +using TStringCoreWinFramesCollectorParams = TCoreWinFramesCollectorParams<TString>; + +} // namespace NYql::NWindow diff --git a/yql/essentials/core/sql_types/window_number_and_direction.h b/yql/essentials/core/sql_types/window_number_and_direction.h new file mode 100644 index 00000000000..4ca9f689cf2 --- /dev/null +++ b/yql/essentials/core/sql_types/window_number_and_direction.h @@ -0,0 +1,145 @@ +#pragma once + +#include <yql/essentials/public/decimal/yql_decimal.h> +#include <yql/essentials/core/sql_types/window_direction.h> +#include <yql/essentials/utils/yql_panic.h> + +#include <util/system/types.h> +#include <util/system/yassert.h> + +#include <variant> +#include <compare> +#include <cmath> + +namespace NYql::NWindow { + +template <typename T> +class TNumberAndDirection { +public: + static inline constexpr bool IsArithmetic = std::is_arithmetic_v<T>; + + struct TUnbounded { + bool operator==(const TUnbounded&) const = default; + }; + + using TValueType = std::variant<T, TUnbounded>; + using TNumberType = T; + + TNumberAndDirection(TValueType value, EDirection direction) + : Value_(value) + , Direction_(direction) + { + YQL_ENSURE(IsInf() || GetUnderlyingValue() >= 0, "Only positive values are allowed."); + if constexpr (std::is_floating_point_v<TNumberType>) { + if (!IsInf()) { + Y_ABORT_UNLESS(!std::isnan(GetUnderlyingValue()), "Nan is not allowed to be a directioned value."); + } + } + if constexpr (IsArithmetic) { + // Normalize zero value to prevent two possible interpretations. + if (!IsInf() && GetUnderlyingValue() == 0) { + Direction_ = EDirection::Following; + } + } + } + + TNumberAndDirection(TNumberType value, EDirection direction) + : TNumberAndDirection(TValueType(value), direction) + { + } + + static TNumberAndDirection<T> Inf(EDirection direction) { + return TNumberAndDirection<T>(TUnbounded{}, direction); + } + + static TNumberAndDirection<T> Zero() + requires IsArithmetic + { + return TNumberAndDirection<T>(0, EDirection::Following); + } + + static TNumberAndDirection<T> Zero() + requires std::same_as<T, TString> + { + return TNumberAndDirection<T>("0", EDirection::Following); + } + + const T& GetUnderlyingValue() const { + return std::get<T>(Value_); + } + + bool IsInf() const { + return std::holds_alternative<TUnbounded>(Value_); + } + + EDirection GetDirection() const { + return Direction_; + } + + std::strong_ordering operator<=>(const TNumberAndDirection& other) const + requires(IsArithmetic) + { + if (Direction_ == EDirection::Preceding && other.Direction_ == EDirection::Following) { + return std::strong_ordering::less; + } + if (Direction_ == EDirection::Following && other.Direction_ == EDirection::Preceding) { + return std::strong_ordering::greater; + } + if (Direction_ == EDirection::Preceding) { + if (other.IsInf()) { + if (IsInf()) { + return std::strong_ordering::equivalent; + } else { + return std::strong_ordering::greater; + } + } else { + if (IsInf()) { + return std::strong_ordering::less; + } else { + return ToStrong(other.GetUnderlyingValue() <=> GetUnderlyingValue()); + } + } + + } else { + if (other.IsInf()) { + if (IsInf()) { + return std::strong_ordering::equivalent; + } else { + return std::strong_ordering::less; + } + } else { + if (IsInf()) { + return std::strong_ordering::greater; + } else { + return ToStrong(GetUnderlyingValue() <=> other.GetUnderlyingValue()); + } + } + } + } + + bool operator==(const TNumberAndDirection& other) const = default; + bool operator!=(const TNumberAndDirection& other) const = default; + +private: + static std::strong_ordering ToStrong(std::partial_ordering po) { + if (po < 0) { + return std::strong_ordering::less; + } + if (po > 0) { + return std::strong_ordering::greater; + } + if (po == 0) { + return std::strong_ordering::equal; + } + ythrow yexception() << "unexpected unordered"; + } + + static std::strong_ordering ToStrong(std::strong_ordering po) { + return po; + } + + TValueType Value_; + EDirection Direction_; +}; + +} // namespace NYql::NWindow diff --git a/yql/essentials/core/sql_types/window_number_and_direction_ut.cpp b/yql/essentials/core/sql_types/window_number_and_direction_ut.cpp new file mode 100644 index 00000000000..95110767394 --- /dev/null +++ b/yql/essentials/core/sql_types/window_number_and_direction_ut.cpp @@ -0,0 +1,427 @@ +#include "window_number_and_direction.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NYql::NWindow; + +Y_UNIT_TEST_SUITE(TNumberAndDirectionTest) { + +Y_UNIT_TEST(Comparison_LeftInf_vs_LeftInf) { + TNumberAndDirection<i64> a(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> b(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_LeftInf_vs_LeftSmallNum) { + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + UNIT_ASSERT(leftInf < leftSmall); + UNIT_ASSERT(leftInf <= leftSmall); + UNIT_ASSERT(!(leftInf == leftSmall)); + UNIT_ASSERT(leftInf != leftSmall); + UNIT_ASSERT(!(leftInf > leftSmall)); + UNIT_ASSERT(!(leftInf >= leftSmall)); +} + +Y_UNIT_TEST(Comparison_LeftInf_vs_LeftLargeNum) { + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + UNIT_ASSERT(leftInf < leftLarge); + UNIT_ASSERT(leftInf <= leftLarge); + UNIT_ASSERT(!(leftInf == leftLarge)); + UNIT_ASSERT(leftInf != leftLarge); + UNIT_ASSERT(!(leftInf > leftLarge)); + UNIT_ASSERT(!(leftInf >= leftLarge)); +} + +Y_UNIT_TEST(Comparison_LeftInf_vs_RightInf) { + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(leftInf < rightInf); + UNIT_ASSERT(leftInf <= rightInf); + UNIT_ASSERT(!(leftInf == rightInf)); + UNIT_ASSERT(leftInf != rightInf); + UNIT_ASSERT(!(leftInf > rightInf)); + UNIT_ASSERT(!(leftInf >= rightInf)); +} + +Y_UNIT_TEST(Comparison_LeftInf_vs_RightSmallNum) { + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + UNIT_ASSERT(leftInf < rightSmall); + UNIT_ASSERT(leftInf <= rightSmall); + UNIT_ASSERT(!(leftInf == rightSmall)); + UNIT_ASSERT(leftInf != rightSmall); + UNIT_ASSERT(!(leftInf > rightSmall)); + UNIT_ASSERT(!(leftInf >= rightSmall)); +} + +Y_UNIT_TEST(Comparison_LeftInf_vs_RightLargeNum) { + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + UNIT_ASSERT(leftInf < rightLarge); + UNIT_ASSERT(leftInf <= rightLarge); + UNIT_ASSERT(!(leftInf == rightLarge)); + UNIT_ASSERT(leftInf != rightLarge); + UNIT_ASSERT(!(leftInf > rightLarge)); + UNIT_ASSERT(!(leftInf >= rightLarge)); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_LeftInf) { + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(leftSmall < leftInf)); + UNIT_ASSERT(!(leftSmall <= leftInf)); + UNIT_ASSERT(!(leftSmall == leftInf)); + UNIT_ASSERT(leftSmall != leftInf); + UNIT_ASSERT(leftSmall > leftInf); + UNIT_ASSERT(leftSmall >= leftInf); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_LeftSmallNum_Equal) { + TNumberAndDirection<i64> a(100, EDirection::Preceding); + TNumberAndDirection<i64> b(100, EDirection::Preceding); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_LeftLargeNum) { + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + // For Left: larger value is "less" (200 < 100) + UNIT_ASSERT(!(leftSmall < leftLarge)); + UNIT_ASSERT(!(leftSmall <= leftLarge)); + UNIT_ASSERT(!(leftSmall == leftLarge)); + UNIT_ASSERT(leftSmall != leftLarge); + UNIT_ASSERT(leftSmall > leftLarge); + UNIT_ASSERT(leftSmall >= leftLarge); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_RightInf) { + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(leftSmall < rightInf); + UNIT_ASSERT(leftSmall <= rightInf); + UNIT_ASSERT(!(leftSmall == rightInf)); + UNIT_ASSERT(leftSmall != rightInf); + UNIT_ASSERT(!(leftSmall > rightInf)); + UNIT_ASSERT(!(leftSmall >= rightInf)); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_RightSmallNum) { + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + // Left always < Right + UNIT_ASSERT(leftSmall < rightSmall); + UNIT_ASSERT(leftSmall <= rightSmall); + UNIT_ASSERT(!(leftSmall == rightSmall)); + UNIT_ASSERT(leftSmall != rightSmall); + UNIT_ASSERT(!(leftSmall > rightSmall)); + UNIT_ASSERT(!(leftSmall >= rightSmall)); +} + +Y_UNIT_TEST(Comparison_LeftSmallNum_vs_RightLargeNum) { + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + // Left always < Right + UNIT_ASSERT(leftSmall < rightLarge); + UNIT_ASSERT(leftSmall <= rightLarge); + UNIT_ASSERT(!(leftSmall == rightLarge)); + UNIT_ASSERT(leftSmall != rightLarge); + UNIT_ASSERT(!(leftSmall > rightLarge)); + UNIT_ASSERT(!(leftSmall >= rightLarge)); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_LeftInf) { + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(leftLarge < leftInf)); + UNIT_ASSERT(!(leftLarge <= leftInf)); + UNIT_ASSERT(!(leftLarge == leftInf)); + UNIT_ASSERT(leftLarge != leftInf); + UNIT_ASSERT(leftLarge > leftInf); + UNIT_ASSERT(leftLarge >= leftInf); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_LeftSmallNum) { + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + // For Left: larger value is "less" (200 < 100) + UNIT_ASSERT(leftLarge < leftSmall); + UNIT_ASSERT(leftLarge <= leftSmall); + UNIT_ASSERT(!(leftLarge == leftSmall)); + UNIT_ASSERT(leftLarge != leftSmall); + UNIT_ASSERT(!(leftLarge > leftSmall)); + UNIT_ASSERT(!(leftLarge >= leftSmall)); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_LeftLargeNum_Equal) { + TNumberAndDirection<i64> a(200, EDirection::Preceding); + TNumberAndDirection<i64> b(200, EDirection::Preceding); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_RightInf) { + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(leftLarge < rightInf); + UNIT_ASSERT(leftLarge <= rightInf); + UNIT_ASSERT(!(leftLarge == rightInf)); + UNIT_ASSERT(leftLarge != rightInf); + UNIT_ASSERT(!(leftLarge > rightInf)); + UNIT_ASSERT(!(leftLarge >= rightInf)); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_RightSmallNum) { + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + // Left always < Right + UNIT_ASSERT(leftLarge < rightSmall); + UNIT_ASSERT(leftLarge <= rightSmall); + UNIT_ASSERT(!(leftLarge == rightSmall)); + UNIT_ASSERT(leftLarge != rightSmall); + UNIT_ASSERT(!(leftLarge > rightSmall)); + UNIT_ASSERT(!(leftLarge >= rightSmall)); +} + +Y_UNIT_TEST(Comparison_LeftLargeNum_vs_RightLargeNum) { + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + // Left always < Right + UNIT_ASSERT(leftLarge < rightLarge); + UNIT_ASSERT(leftLarge <= rightLarge); + UNIT_ASSERT(!(leftLarge == rightLarge)); + UNIT_ASSERT(leftLarge != rightLarge); + UNIT_ASSERT(!(leftLarge > rightLarge)); + UNIT_ASSERT(!(leftLarge >= rightLarge)); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_LeftInf) { + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(rightInf < leftInf)); + UNIT_ASSERT(!(rightInf <= leftInf)); + UNIT_ASSERT(!(rightInf == leftInf)); + UNIT_ASSERT(rightInf != leftInf); + UNIT_ASSERT(rightInf > leftInf); + UNIT_ASSERT(rightInf >= leftInf); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_LeftSmallNum) { + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + UNIT_ASSERT(!(rightInf < leftSmall)); + UNIT_ASSERT(!(rightInf <= leftSmall)); + UNIT_ASSERT(!(rightInf == leftSmall)); + UNIT_ASSERT(rightInf != leftSmall); + UNIT_ASSERT(rightInf > leftSmall); + UNIT_ASSERT(rightInf >= leftSmall); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_LeftLargeNum) { + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + UNIT_ASSERT(!(rightInf < leftLarge)); + UNIT_ASSERT(!(rightInf <= leftLarge)); + UNIT_ASSERT(!(rightInf == leftLarge)); + UNIT_ASSERT(rightInf != leftLarge); + UNIT_ASSERT(rightInf > leftLarge); + UNIT_ASSERT(rightInf >= leftLarge); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_RightInf) { + TNumberAndDirection<i64> a(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> b(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_RightSmallNum) { + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + UNIT_ASSERT(!(rightInf < rightSmall)); + UNIT_ASSERT(!(rightInf <= rightSmall)); + UNIT_ASSERT(!(rightInf == rightSmall)); + UNIT_ASSERT(rightInf != rightSmall); + UNIT_ASSERT(rightInf > rightSmall); + UNIT_ASSERT(rightInf >= rightSmall); +} + +Y_UNIT_TEST(Comparison_RightInf_vs_RightLargeNum) { + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + UNIT_ASSERT(!(rightInf < rightLarge)); + UNIT_ASSERT(!(rightInf <= rightLarge)); + UNIT_ASSERT(!(rightInf == rightLarge)); + UNIT_ASSERT(rightInf != rightLarge); + UNIT_ASSERT(rightInf > rightLarge); + UNIT_ASSERT(rightInf >= rightLarge); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_LeftInf) { + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(rightSmall < leftInf)); + UNIT_ASSERT(!(rightSmall <= leftInf)); + UNIT_ASSERT(!(rightSmall == leftInf)); + UNIT_ASSERT(rightSmall != leftInf); + UNIT_ASSERT(rightSmall > leftInf); + UNIT_ASSERT(rightSmall >= leftInf); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_LeftSmallNum) { + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + // Right always > Left + UNIT_ASSERT(!(rightSmall < leftSmall)); + UNIT_ASSERT(!(rightSmall <= leftSmall)); + UNIT_ASSERT(!(rightSmall == leftSmall)); + UNIT_ASSERT(rightSmall != leftSmall); + UNIT_ASSERT(rightSmall > leftSmall); + UNIT_ASSERT(rightSmall >= leftSmall); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_LeftLargeNum) { + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + // Right always > Left + UNIT_ASSERT(!(rightSmall < leftLarge)); + UNIT_ASSERT(!(rightSmall <= leftLarge)); + UNIT_ASSERT(!(rightSmall == leftLarge)); + UNIT_ASSERT(rightSmall != leftLarge); + UNIT_ASSERT(rightSmall > leftLarge); + UNIT_ASSERT(rightSmall >= leftLarge); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_RightInf) { + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(rightSmall < rightInf); + UNIT_ASSERT(rightSmall <= rightInf); + UNIT_ASSERT(!(rightSmall == rightInf)); + UNIT_ASSERT(rightSmall != rightInf); + UNIT_ASSERT(!(rightSmall > rightInf)); + UNIT_ASSERT(!(rightSmall >= rightInf)); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_RightSmallNum_Equal) { + TNumberAndDirection<i64> a(100, EDirection::Following); + TNumberAndDirection<i64> b(100, EDirection::Following); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_RightSmallNum_vs_RightLargeNum) { + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + // For Right: 100 < 200 + UNIT_ASSERT(rightSmall < rightLarge); + UNIT_ASSERT(rightSmall <= rightLarge); + UNIT_ASSERT(!(rightSmall == rightLarge)); + UNIT_ASSERT(rightSmall != rightLarge); + UNIT_ASSERT(!(rightSmall > rightLarge)); + UNIT_ASSERT(!(rightSmall >= rightLarge)); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_LeftInf) { + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + TNumberAndDirection<i64> leftInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Preceding); + UNIT_ASSERT(!(rightLarge < leftInf)); + UNIT_ASSERT(!(rightLarge <= leftInf)); + UNIT_ASSERT(!(rightLarge == leftInf)); + UNIT_ASSERT(rightLarge != leftInf); + UNIT_ASSERT(rightLarge > leftInf); + UNIT_ASSERT(rightLarge >= leftInf); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_LeftSmallNum) { + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + TNumberAndDirection<i64> leftSmall(100, EDirection::Preceding); + // Right always > Left + UNIT_ASSERT(!(rightLarge < leftSmall)); + UNIT_ASSERT(!(rightLarge <= leftSmall)); + UNIT_ASSERT(!(rightLarge == leftSmall)); + UNIT_ASSERT(rightLarge != leftSmall); + UNIT_ASSERT(rightLarge > leftSmall); + UNIT_ASSERT(rightLarge >= leftSmall); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_LeftLargeNum) { + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + TNumberAndDirection<i64> leftLarge(200, EDirection::Preceding); + // Right always > Left + UNIT_ASSERT(!(rightLarge < leftLarge)); + UNIT_ASSERT(!(rightLarge <= leftLarge)); + UNIT_ASSERT(!(rightLarge == leftLarge)); + UNIT_ASSERT(rightLarge != leftLarge); + UNIT_ASSERT(rightLarge > leftLarge); + UNIT_ASSERT(rightLarge >= leftLarge); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_RightInf) { + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + TNumberAndDirection<i64> rightInf(TNumberAndDirection<i64>::TUnbounded{}, EDirection::Following); + UNIT_ASSERT(rightLarge < rightInf); + UNIT_ASSERT(rightLarge <= rightInf); + UNIT_ASSERT(!(rightLarge == rightInf)); + UNIT_ASSERT(rightLarge != rightInf); + UNIT_ASSERT(!(rightLarge > rightInf)); + UNIT_ASSERT(!(rightLarge >= rightInf)); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_RightSmallNum) { + TNumberAndDirection<i64> rightLarge(200, EDirection::Following); + TNumberAndDirection<i64> rightSmall(100, EDirection::Following); + // For Right: 200 > 100 + UNIT_ASSERT(!(rightLarge < rightSmall)); + UNIT_ASSERT(!(rightLarge <= rightSmall)); + UNIT_ASSERT(!(rightLarge == rightSmall)); + UNIT_ASSERT(rightLarge != rightSmall); + UNIT_ASSERT(rightLarge > rightSmall); + UNIT_ASSERT(rightLarge >= rightSmall); +} + +Y_UNIT_TEST(Comparison_RightLargeNum_vs_RightLargeNum_Equal) { + TNumberAndDirection<i64> a(200, EDirection::Following); + TNumberAndDirection<i64> b(200, EDirection::Following); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} + +Y_UNIT_TEST(Comparison_DifferentZeroes_Equal) { + TNumberAndDirection<i64> a(0, EDirection::Preceding); + TNumberAndDirection<i64> b(0, EDirection::Following); + UNIT_ASSERT(!(a < b)); + UNIT_ASSERT(a <= b); + UNIT_ASSERT(a == b); + UNIT_ASSERT(!(a != b)); + UNIT_ASSERT(!(a > b)); + UNIT_ASSERT(a >= b); +} +} // Y_UNIT_TEST_SUITE(TNumberAndDirectionTest) diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index c026258496e..6c028836c28 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -20,6 +20,7 @@ #include <yql/essentials/core/issue/protos/issue_id.pb.h> #include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/core/expr_nodes_gen/yql_expr_nodes_gen.h> +#include <yql/essentials/core/yql_window_features.h> #include <yql/essentials/minikql/dom/json.h> #include <yql/essentials/minikql/dom/yson.h> #include <yql/essentials/utils/log/log.h> @@ -11426,7 +11427,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } - bool EnsureQueueResource(const TExprNode* resourceArg, const TTypeAnnotationNode*& elementType, TExtContext& ctx) { + bool EnsureQueueResource(const TExprNode* resourceArg, const TTypeAnnotationNode*& elementType, TContext& ctx) { if (!EnsureResourceType(*resourceArg, ctx.Expr)) { return false; } @@ -11451,8 +11452,99 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return true; } + IGraphTransformer::TStatus WinFramesCollectorWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 3, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + auto streamArg = input->Child(0); + auto resourceArg = input->Child(1); + + if (!EnsureStreamType(*streamArg, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const TTypeAnnotationNode* expectedValueType; + if (!EnsureQueueResource(resourceArg, expectedValueType, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + const TTypeAnnotationNode* streamType = streamArg->GetTypeAnn(); + const TTypeAnnotationNode* itemType = streamType->Cast<TStreamExprType>()->GetItemType(); + if (!IsSameAnnotation(*itemType, *expectedValueType)) { + ctx.Expr.AddError(TIssue(input->Pos(ctx.Expr), TStringBuilder() << "mismatch of stream and queue types: " + << *itemType << " != " << *expectedValueType)); + return IGraphTransformer::TStatus::Error; + } + + auto boundsSettings = input->Child(2); + if (!boundsSettings->IsCallable("AsStruct")) { + ctx.Expr.AddError(TIssue(boundsSettings->Pos(ctx.Expr), TStringBuilder() << "bounds settings must be Struct literal")); + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(streamType); + return IGraphTransformer::TStatus::Ok; + } + + IGraphTransformer::TStatus WinFrameWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + auto status = EnsureDependsOnTailAndRewrite(input, output, ctx.Expr, ctx.Types, 5); + if (status != IGraphTransformer::TStatus::Ok) { + return status; + } + + auto resourceArg = input->Child(0); + auto handle = input->Child(1); + auto isIncremental = input->Child(2); + auto isRange = input->Child(3); + auto isSingleElement = input->Child(4); + + const TTypeAnnotationNode* expectedValueType; + if (!EnsureQueueResource(resourceArg, expectedValueType, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + if (!NNodes::TMaybeNode<NNodes::TCoUint64>(handle)) { + ctx.Expr.AddError(TIssue(input->Pos(ctx.Expr), + TStringBuilder() << "Expecting literal value for handle argument")); + return IGraphTransformer::TStatus::Error; + } + + if (!NNodes::TMaybeNode<NNodes::TCoBool>(isIncremental)) { + ctx.Expr.AddError(TIssue(input->Pos(ctx.Expr), + TStringBuilder() << "Expecting literal value for isIncremental argument")); + return IGraphTransformer::TStatus::Error; + } + + if (!NNodes::TMaybeNode<NNodes::TCoBool>(isRange)) { + ctx.Expr.AddError(TIssue(input->Pos(ctx.Expr), + TStringBuilder() << "Expecting literal value for isRange argument")); + return IGraphTransformer::TStatus::Error; + } + + auto maybeBoolLiteral = NNodes::TMaybeNode<NNodes::TCoBool>(isSingleElement); + if (!maybeBoolLiteral) { + ctx.Expr.AddError(TIssue(input->Pos(ctx.Expr), + TStringBuilder() << "Expecting literal value for isSingleElement argument")); + return IGraphTransformer::TStatus::Error; + } + + bool isSingleElementValue = FromString<bool>(maybeBoolLiteral.Cast().Literal().Value()); + if (!isSingleElementValue) { + input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>((expectedValueType))); + } else { + input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(expectedValueType)); + } + return IGraphTransformer::TStatus::Ok; + } + IGraphTransformer::TStatus QueuePushWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { Y_UNUSED(output); + if (IsWindowNewPipelineEnabled(ctx.Types)) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Queue API is deprecated for new window pipeline")); + return IGraphTransformer::TStatus::Error; + } if (!EnsureArgsCount(*input, 2, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -11474,8 +11566,13 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } - IGraphTransformer::TStatus QueuePopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + IGraphTransformer::TStatus QueuePopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { Y_UNUSED(output); + if (IsWindowNewPipelineEnabled(ctx.Types)) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Queue API is deprecated for new window pipeline")); + return IGraphTransformer::TStatus::Error; + } if (!EnsureArgsCount(*input, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -11490,6 +11587,11 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } IGraphTransformer::TStatus QueuePeekWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + if (IsWindowNewPipelineEnabled(ctx.Types)) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Queue API is deprecated for new window pipeline")); + return IGraphTransformer::TStatus::Error; + } auto status = EnsureDependsOnTailAndRewrite(input, output, ctx.Expr, ctx.Types, 2); if (status != IGraphTransformer::TStatus::Ok) { return status; @@ -11513,6 +11615,11 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } IGraphTransformer::TStatus QueueRangeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + if (IsWindowNewPipelineEnabled(ctx.Types)) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Queue API is deprecated for new window pipeline")); + return IGraphTransformer::TStatus::Error; + } auto status = EnsureDependsOnTailAndRewrite(input, output, ctx.Expr, ctx.Types, 3); if (status != IGraphTransformer::TStatus::Ok) { return status; @@ -11540,6 +11647,11 @@ template <NKikimr::NUdf::EDataSlot DataSlot> IGraphTransformer::TStatus PreserveStreamWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { Y_UNUSED(output); + if (IsWindowNewPipelineEnabled(ctx.Types)) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "PreserveStream API is deprecated for new window pipeline.")); + return IGraphTransformer::TStatus::Error; + } if (!EnsureArgsCount(*input, 3, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -13945,9 +14057,9 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["AggApplyManyState"] = &AggApplyWrapper; Functions["AggBlockApply"] = &AggBlockApplyWrapper; Functions["AggBlockApplyState"] = &AggBlockApplyWrapper; - Functions["WinOnRows"] = &WinOnWrapper; - Functions["WinOnGroups"] = &WinOnWrapper; - Functions["WinOnRange"] = &WinOnWrapper; + ExtFunctions["WinOnRows"] = &WinOnWrapper; + ExtFunctions["WinOnGroups"] = &WinOnWrapper; + ExtFunctions["WinOnRange"] = &WinOnWrapper; ExtFunctions["WindowTraits"] = &WindowTraitsWrapper; Functions["ToWindowTraits"] = &ToWindowTraitsWrapper; Functions["CalcOverWindow"] = &CalcOverWindowWrapper; @@ -14043,7 +14155,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["AggrCountInit"] = &AggrCountInitWrapper; Functions["AggrCountUpdate"] = &AggrCountUpdateWrapper; ExtFunctions["QueueCreate"] = &QueueCreateWrapper; - Functions["QueuePop"] = &QueuePopWrapper; + ExtFunctions["QueuePop"] = &QueuePopWrapper; Functions["DependsOn"] = &DependsOnWrapper; ExtFunctions["InnerDependsOn"] = &InnerDependsOnWrapper; Functions["Seq"] = &SeqWrapper; @@ -14290,6 +14402,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> ExtFunctions["QueuePeek"] = &QueuePeekWrapper; ///< Ext for ParseTypeWrapper compatibility ExtFunctions["QueueRange"] = &QueueRangeWrapper; ///< Ext for ParseTypeWrapper compatibility ExtFunctions["PreserveStream"] = &PreserveStreamWrapper; + + Functions["WinFramesCollector"] = &WinFramesCollectorWrapper; + ExtFunctions["WinFrame"] = &WinFrameWrapper; + ExtFunctions["FilePath"] = &FilePathWrapper; ExtFunctions["FileContent"] = &FileContentWrapper; ExtFunctions["FolderPath"] = &FolderPathWrapper; diff --git a/yql/essentials/core/type_ann/type_ann_list.cpp b/yql/essentials/core/type_ann/type_ann_list.cpp index 4b585e7a1f8..ac9fed7133b 100644 --- a/yql/essentials/core/type_ann/type_ann_list.cpp +++ b/yql/essentials/core/type_ann/type_ann_list.cpp @@ -8,6 +8,7 @@ #include <yql/essentials/core/yql_opt_utils.h> #include <yql/essentials/core/yql_opt_window.h> #include <yql/essentials/core/yql_type_helpers.h> +#include <yql/essentials/core/yql_window_features.h> #include <yql/essentials/parser/pg_catalog/catalog.h> @@ -550,8 +551,13 @@ namespace { isUniversal = true; return IGraphTransformer::TStatus::Ok; } - bool frameCanBeEmpty = !TWindowFrameSettings::Parse(*winOn, ctx).IsNonEmpty(); - + bool isFrameUniversal; + auto frame = TWindowFrameSettings::TryParse(*winOn, ctx, isFrameUniversal); + if (isFrameUniversal) { + isUniversal = true; + return IGraphTransformer::TStatus::Ok; + } + YQL_ENSURE(frame, "Frame expected to be non-empty."); for (auto iterFunc = winOn->Children().begin() + 1; iterFunc != winOn->Children().end(); ++iterFunc) { auto func = *iterFunc; YQL_ENSURE(func->IsList()); @@ -619,7 +625,7 @@ namespace { if (calcSpec->IsCallable("WindowTraits")) { auto finishType = calcSpec->Child(4)->GetTypeAnn(); - if (frameCanBeEmpty) { + if (!frame->IsNonEmpty()) { auto defVal = calcSpec->Child(5); if (!defVal->IsCallable("Null")) { finishType = defVal->GetTypeAnn(); @@ -6423,7 +6429,7 @@ namespace { return IGraphTransformer::TStatus::Ok; } - IGraphTransformer::TStatus WinOnWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + IGraphTransformer::TStatus WinOnWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { if (!EnsureMinArgsCount(*input, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -6444,24 +6450,25 @@ namespace { bool isUniversal; auto frameSettings = TWindowFrameSettings::TryParse(*input, ctx.Expr, isUniversal); - if (!frameSettings) { - return IGraphTransformer::TStatus::Error; - } if (isUniversal) { input->SetTypeAnn(ctx.Expr.MakeType<TUniversalExprType>()); return IGraphTransformer::TStatus::Ok; } + if (!frameSettings) { + return IGraphTransformer::TStatus::Error; + } + auto frameType = frameSettings->GetFrameType(); if (frameType == EFrameType::FrameByGroups) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "GROUPS in frame specification are not supported yet")); return IGraphTransformer::TStatus::Error; } - if (frameType == EFrameType::FrameByRange) { + if (frameType == EFrameType::FrameByRange && !IsRangeWindowFrameEnabled(ctx.Types)) { // only UNBOUNDED PRECEDING -> CURRENT ROW is currently supported - if (!(IsUnbounded(frameSettings->GetFirst()) && IsCurrentRow(frameSettings->GetLast()))) { + if (!(frameSettings->IsLeftInf() && frameSettings->IsRightCurrent())) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "RANGE in frame specification is not supported yet")); return IGraphTransformer::TStatus::Error; } diff --git a/yql/essentials/core/type_ann/type_ann_list.h b/yql/essentials/core/type_ann/type_ann_list.h index e6adce4b300..0d9478d5b65 100644 --- a/yql/essentials/core/type_ann/type_ann_list.h +++ b/yql/essentials/core/type_ann/type_ann_list.h @@ -123,7 +123,7 @@ namespace NTypeAnnImpl { IGraphTransformer::TStatus SkipNullMembersWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus FilterNullElementsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus SkipNullElementsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); - IGraphTransformer::TStatus WinOnWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus WinOnWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx); IGraphTransformer::TStatus WindowTraitsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx); IGraphTransformer::TStatus ToWindowTraitsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus CalcOverWindowWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); diff --git a/yql/essentials/core/type_ann/type_ann_types.cpp b/yql/essentials/core/type_ann/type_ann_types.cpp index 1b7e21d498c..efe1220984f 100644 --- a/yql/essentials/core/type_ann/type_ann_types.cpp +++ b/yql/essentials/core/type_ann/type_ann_types.cpp @@ -1017,7 +1017,7 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Repeat; } - IGraphTransformer::TStatus ParseTypeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + IGraphTransformer::TStatus ParseTypeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { Y_UNUSED(output); if (!EnsureArgsCount(*input, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; diff --git a/yql/essentials/core/type_ann/type_ann_types.h b/yql/essentials/core/type_ann/type_ann_types.h index 8c3d30f05f9..eb8bc49a4f0 100644 --- a/yql/essentials/core/type_ann/type_ann_types.h +++ b/yql/essentials/core/type_ann/type_ann_types.h @@ -35,7 +35,7 @@ namespace NTypeAnnImpl { template <ETypeArgument> IGraphTransformer::TStatus TypeArgWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx); - IGraphTransformer::TStatus ParseTypeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx); + IGraphTransformer::TStatus ParseTypeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus FormatTypeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus FormatTypeDiffWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus TypeHandleWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); diff --git a/yql/essentials/core/ya.make b/yql/essentials/core/ya.make index a34248819e2..d6517b1ac00 100644 --- a/yql/essentials/core/ya.make +++ b/yql/essentials/core/ya.make @@ -64,6 +64,10 @@ SRCS( yql_user_data.h yql_user_data_storage.cpp yql_user_data_storage.h + yql_window_features.cpp + yql_window_features.h + yql_window_frames_collector_params_serializer.cpp + yql_window_frames_collector_params_serializer.h ) PEERDIR( diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp index 72d44ed6789..ddd783d3d58 100644 --- a/yql/essentials/core/yql_opt_utils.cpp +++ b/yql/essentials/core/yql_opt_utils.cpp @@ -133,6 +133,10 @@ TExprNode::TPtr MakeOptionalBool(TPositionHandle position, bool value, TExprCont return ctx.NewCallable(position, "Just", { MakeBool(position, value, ctx)}); } +TExprNode::TPtr MakeString(TPositionHandle position, TStringBuf buf, TExprContext& ctx) { + return ctx.Builder(position).Callable("String").Atom(0, buf).Seal().Build(); +} + TExprNode::TPtr MakePgBool(TPositionHandle position, bool value, TExprContext& ctx) { return ctx.NewCallable(position, "PgConst", { ctx.NewAtom(position, value ? "t" : "f", TNodeFlags::Default), @@ -2814,8 +2818,12 @@ bool IsNormalizedDependsOn(const TExprNode& node) { return false; } +bool IsForbidConstantDependsEnabled(const TTypeAnnotationContext& types) { + return IsOptimizerEnabled<ForbidConstantDependsOnFuseOptName>(types) && !IsOptimizerDisabled<ForbidConstantDependsOnFuseOptName>(types); +} + bool CanFuseLambdas(const TExprNode& outer, const TExprNode& inner, const TTypeAnnotationContext& types) { - if (!IsOptimizerEnabled<ForbidConstantDependsOnFuseOptName>(types) || IsOptimizerDisabled<ForbidConstantDependsOnFuseOptName>(types)) { + if (!IsForbidConstantDependsEnabled(types)) { return true; } diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 72b9afffe31..4e0ccac7878 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -110,6 +110,7 @@ TExprNode::TPtr MakeNull(TPositionHandle position, TExprContext& ctx); TExprNode::TPtr MakeConstMap(TPositionHandle position, const TExprNode::TPtr& input, const TExprNode::TPtr& value, TExprContext& ctx); TExprNode::TPtr MakeBoolNothing(TPositionHandle position, TExprContext& ctx); TExprNode::TPtr MakeBool(TPositionHandle position, bool value, TExprContext& ctx); +TExprNode::TPtr MakeString(TPositionHandle position, TStringBuf buf, TExprContext& ctx); TExprNode::TPtr MakeOptionalBool(TPositionHandle position, bool value, TExprContext& ctx); template <bool Bool> TExprNode::TPtr MakeBool(TPositionHandle position, TExprContext& ctx); @@ -224,6 +225,7 @@ TExprNode::TPtr ReplaceUnessentials(TExprNode::TPtr predicate, TExprNode::TPtr r bool IsDependsOnUsage(const TExprNode& node, const TParentsMap& parentsMap); bool IsNormalizedDependsOn(const TExprNode& node); +bool IsForbidConstantDependsEnabled(const TTypeAnnotationContext& types); bool CanFuseLambdas(const TExprNode& outer, const TExprNode& inner, const TTypeAnnotationContext& types); bool CanApplyExtractMembersToPartitionsByKeys(const TTypeAnnotationContext* types); diff --git a/yql/essentials/core/yql_opt_window.cpp b/yql/essentials/core/yql_opt_window.cpp index cac27f67e28..d2a5c2a6296 100644 --- a/yql/essentials/core/yql_opt_window.cpp +++ b/yql/essentials/core/yql_opt_window.cpp @@ -3,32 +3,564 @@ #include "yql_expr_type_annotation.h" #include <yql/essentials/core/yql_expr_optimize.h> +#include <yql/essentials/core/yql_window_features.h> +#include <yql/essentials/core/sql_types/window_frame_bounds.h> #include <yql/essentials/utils/log/log.h> +#include <yql/essentials/core/sql_types/window_frames_collector_params.h> +#include <yql/essentials/core/yql_window_frames_collector_params_serializer.h> + +#include <expected> namespace NYql { using namespace NNodes; +using NWindow::TCoreWinFrameCollectorBounds; +using NWindow::TNumberAndDirection; +using NWindow::EDirection; +using NWindow::TInputRow; +using NWindow::TInputRowWindowFrame; +using NWindow::TCoreWinFramesCollectorParams; + +using THandle = TCoreWinFrameCollectorBounds<TString>::THandle; + namespace { -const TStringBuf SessionStartMemberName = "_yql_window_session_start"; -const TStringBuf SessionParamsMemberName = "_yql_window_session_params"; +constexpr TStringBuf SessionStartMemberName = "_yql_window_session_start"; +constexpr TStringBuf SessionParamsMemberName = "_yql_window_session_params"; +constexpr TStringBuf SortedColumnMemberName = "_yql_sorted_column"; -enum class EFrameBoundsType : ui8 { - EMPTY, - LAGGING, - CURRENT, - LEADING, - FULL, - GENERIC, +struct TUnsortedTag {}; +struct TManyColumnsInSort {}; + +struct TSorted { + enum class ESortDir { + Asc, + Desc, + }; + const TTypeAnnotationNode* SortedColumnType; + ESortDir SortDir; }; -EFrameBoundsType FrameBoundsType(const TWindowFrameSettings& settings) { - auto first = settings.GetFirstOffset(); - auto last = settings.GetLastOffset(); +bool CheckRowFrameNeverEmpty(const TWindowFrameSettings::TRowFrame& frame) { + if (!frame.first) { + return !frame.second.Defined() || *frame.second >= 0; + } else if (!frame.second.Defined()) { + return !frame.first.Defined() || *frame.first <= 0; + } else { + return *frame.first <= *frame.second && *frame.first <= 0 && *frame.second >= 0; + } +} + +template <typename T> +bool CheckRangeFrameNeverEmpty(TNumberAndDirection<T> left, TNumberAndDirection<T> right) { + auto zero = TNumberAndDirection<T>::Zero(); + return left <= zero && right >= zero; +} + +bool CheckRowFrameIsAlwaysEmpty(const TWindowFrameSettings::TRowFrame& frame) { + return frame.first.Defined() && frame.second.Defined() && *frame.first > *frame.second; +} + +template <typename T> +bool CheckRangeFrameIsAlwaysEmpty(TNumberAndDirection<T> left, TNumberAndDirection<T> right) { + return left > right; +} + +using TSortTraitsInfo = std::variant<TUnsortedTag,TManyColumnsInSort, TSorted>; + +TSorted::ESortDir ExtractSortDirectionFromBool(TExprNode::TPtr sortDirection) { + YQL_ENSURE(sortDirection->IsAtom()); + auto direction = sortDirection->Content(); + YQL_ENSURE(direction == "true" || direction == "false"); + return (direction == "true") ? TSorted::ESortDir::Asc : TSorted::ESortDir::Desc; +} + +TSorted::ESortDir ExtractSortDirection(TExprNode::TPtr sortDirections) { + if (sortDirections->IsCallable("Bool")) { + YQL_ENSURE(sortDirections->ChildrenSize() > 0); + return ExtractSortDirectionFromBool(sortDirections->HeadPtr()); + } else { + YQL_ENSURE(sortDirections->IsList(), "List or bool expected."); + YQL_ENSURE(sortDirections->ChildrenSize() > 0, "At least one child expected."); + return ExtractSortDirection(sortDirections->ChildPtr(0)); + } +} + +TSortTraitsInfo ExtractSortTraitsInfo(const TExprNode::TPtr& sortTraits) { + if (!sortTraits || sortTraits->IsCallable("Void")) { + return TUnsortedTag{}; + } + + YQL_ENSURE(sortTraits->IsCallable("SortTraits"), "Expected SortTraits or Void."); + YQL_ENSURE(sortTraits->ChildrenSize() == 3, "Expected exactly three arguments."); + + auto sortDirections = sortTraits->ChildPtr(1); + auto sortKeyLambda = sortTraits->ChildPtr(2); + + YQL_ENSURE(sortKeyLambda->IsLambda(), "Expected lambda as sort traits."); + + const TTypeAnnotationNode* lambdaType = sortKeyLambda->GetTypeAnn(); + YQL_ENSURE(lambdaType, "Expected to have non null lambda type."); + const TTypeAnnotationNode* firstColumnType = lambdaType; + if (lambdaType->GetKind() == ETypeAnnotationKind::Tuple) { + return TManyColumnsInSort{}; + } + + return TSorted{.SortedColumnType = firstColumnType, + .SortDir = ExtractSortDirection(sortDirections)}; +} + +template <typename T> +class TNumberAndDirectionWithSerialized { +public: + TNumberAndDirectionWithSerialized(const TString& str, EDirection direction) + : Value_(FromString<T>(str), direction) + , String_(str, direction) + { + } + + static TNumberAndDirectionWithSerialized<T> Inf(EDirection direction) { + return TNumberAndDirectionWithSerialized<T>(direction); + } + + const TNumberAndDirection<T>& Value() const { + return Value_; + } + + const TNumberAndDirection<TString>& StringValue() const { + return String_; + } + +private: + TNumberAndDirectionWithSerialized(EDirection direction) + : Value_(TNumberAndDirection<T>::Inf(direction)) + , String_(TNumberAndDirection<TString>::Inf(direction)) + { + } + + TNumberAndDirection<T> Value_; + TNumberAndDirection<TString> String_; +}; + +template <> +class TNumberAndDirectionWithSerialized<void> { +public: + TNumberAndDirectionWithSerialized(const TString& str, EDirection direction) + : String_(str, direction) + { + } + + const TNumberAndDirection<TString>& StringValue() const { + return String_; + } + + static TNumberAndDirectionWithSerialized<void> Inf(EDirection direction) { + return TNumberAndDirectionWithSerialized<void>(direction); + } + +private: + TNumberAndDirectionWithSerialized(EDirection direction) + : String_(TNumberAndDirection<TString>::Inf(direction)) + { + } + + TNumberAndDirection<TString> String_; +}; + +template <typename T> +struct TParseFrameBoundResult { + TNumberAndDirectionWithSerialized<T> BoundLiteral; + TExprNode::TPtr BoundNode; +}; + +TString SerializeActualNodeForError(const TExprNode& node) { + const TTypeAnnotationNode* type = node.GetTypeAnn(); + TStringBuilder errMsg; + if (!type) { + errMsg << "lambda"; + } else if (node.IsCallable()) { + errMsg << node.Content() << " with type " << *type; + } else { + errMsg << *type; + } + return TString(errMsg); +} + +template <typename TLiteral, typename TArithmetic> +std::expected<TParseFrameBoundResult<TArithmetic>, TIssue> ParseFrameRangeBound(TExprNode::TPtr frameBound, TExprContext& ctx) { + YQL_ENSURE(frameBound->IsList(), "List expected"); + + if (!EnsureTupleMinSize(*frameBound, 1, ctx)) { + return std::unexpected(TIssue(ctx.GetPosition(frameBound->Pos()), "Expected tuple with at least one size.")); + } + if (!EnsureAtom(frameBound->Head(), ctx)) { + return std::unexpected(TIssue(ctx.GetPosition(frameBound->Pos()), "Head must be an atom.")); + } + + auto type = frameBound->Head().Content(); + if (type == "currentRow") { + if (frameBound->ChildrenSize() == 1) { + return TParseFrameBoundResult<TArithmetic>{.BoundLiteral = TNumberAndDirectionWithSerialized<TArithmetic>("0", EDirection::Following), .BoundNode = frameBound}; + } + return std::unexpected(TIssue(ctx.GetPosition(frameBound->Pos()), TStringBuilder() << "Expecting no value for '" << type << "'")); + } + + if (!(type == "preceding" || type == "following")) { + return std::unexpected(TIssue(ctx.GetPosition(frameBound->Pos()), TStringBuilder() << "Expecting preceding or following, but got '" << type << "'")); + } + + EDirection direction = (type == "preceding") ? EDirection::Preceding : EDirection::Following; + + if (!EnsureTupleSize(*frameBound, 2, ctx)) { + return std::unexpected(TIssue(ctx.GetPosition(frameBound->Pos()), "Expected tuple with at least 2 size for frame bounds.")); + } + + auto boundValue = frameBound->ChildPtr(1); + if (boundValue->IsAtom()) { + if (boundValue->Content() == "unbounded") { + return TParseFrameBoundResult<TArithmetic>{.BoundLiteral = TNumberAndDirectionWithSerialized<TArithmetic>::Inf(direction), .BoundNode = frameBound}; + } + return std::unexpected(TIssue(ctx.GetPosition(boundValue->Pos()), TStringBuilder() << "Expecting unbounded, but got '" << boundValue->Content() << "'")); + } + + if (!EnsureDataType(*boundValue, ctx)) { + return std::unexpected(TIssue(ctx.GetPosition(boundValue->Pos()), "Expected data type.")); + } + + if constexpr (!std::is_void_v<TLiteral>) { + auto maybeIntLiteral = TMaybeNode<TLiteral>(boundValue); + if (!maybeIntLiteral) { + return std::unexpected(TIssue(ctx.GetPosition(boundValue->Pos()), TStringBuilder() << "Expecting " << TLiteral::CallableName() << " literal, but got: " << SerializeActualNodeForError(*boundValue))); + } + + TString strLiteralValue(maybeIntLiteral.Cast().Literal().Value()); + if (FromString<TArithmetic>(strLiteralValue) < 0) { + return std::unexpected(TIssue(ctx.GetPosition(boundValue->Pos()), TStringBuilder() << "Expecting positive literal values, but got " << strLiteralValue)); + } + auto value = TNumberAndDirectionWithSerialized<TArithmetic>(strLiteralValue, direction); + if (value.Value() == TNumberAndDirection<TArithmetic>(0, EDirection::Following)) { + return TParseFrameBoundResult<TArithmetic>{.BoundLiteral = TNumberAndDirectionWithSerialized<TArithmetic>("0", EDirection::Following), .BoundNode = frameBound}; + } + return TParseFrameBoundResult<TArithmetic>{.BoundLiteral = value, .BoundNode = frameBound}; + } else { + return std::unexpected(TIssue(ctx.GetPosition(boundValue->Pos()), TStringBuilder() << "Offset specifing is not allowed here since that column type does not support for RANGE mode.")); + } +} + +TExprNode::TPtr GetSettingByName(const TExprNode::TChildrenType& settings, TStringBuf name) { + for (const auto& setting : settings) { + const auto settingName = setting->Head().Content(); + if (settingName == name) { + return setting->TailPtr(); + } + } + return nullptr; +} + +ESortOrder GetSortOrder(const TSortTraitsInfo& info) { + return std::visit(TOverloaded{ + [&](const TUnsortedTag&) { + return ESortOrder::Unimportant; + }, + [&](const TManyColumnsInSort&) { + return ESortOrder::Unimportant; + }, + [&](const TSorted& sorted) { + switch (sorted.SortDir) { + case TSorted::ESortDir::Asc: + return ESortOrder::Asc; + case TSorted::ESortDir::Desc: + return ESortOrder::Desc; + }; + }}, info); +} + +template <typename TLiteral, typename TArithmetic> +TMaybe<TWindowFrameSettings> ParseFrameRangeBounds(TExprNode::TPtr frameSpec, TExprContext& ctx) { + auto begin = GetSettingByName(frameSpec->Children(), "begin"); + auto end = GetSettingByName(frameSpec->Children(), "end"); + if (!begin || !end) { + ctx.AddError(TIssue(ctx.GetPosition(frameSpec->Pos()), + TStringBuilder() << "Expected begin and end for row frames.")); + return {}; + } + auto beginParse = ParseFrameRangeBound<TLiteral, TArithmetic>(begin, ctx); + if (!beginParse.has_value()) { + ctx.AddError(beginParse.error()); + return {}; + } + auto endParse = ParseFrameRangeBound<TLiteral, TArithmetic>(end, ctx); + if (!endParse.has_value()) { + ctx.AddError(endParse.error()); + return {}; + } + bool isAlwaysEmpty = false; + bool isNeverEmpty = true; + TString boundsCallable; + if constexpr (!std::is_void_v<TLiteral>) { + boundsCallable = TLiteral::CallableName(); + isAlwaysEmpty = CheckRangeFrameIsAlwaysEmpty(beginParse.value().BoundLiteral.Value(), endParse.value().BoundLiteral.Value()); + isNeverEmpty = CheckRangeFrameNeverEmpty(beginParse.value().BoundLiteral.Value(), endParse.value().BoundLiteral.Value()); + } else { + boundsCallable = ""; + isAlwaysEmpty = false; + isNeverEmpty = true; + } + auto sortTraits = ExtractSortTraitsInfo(GetSettingByName(frameSpec->Children(), "sortSpec")); + auto range = TWindowFrameSettings::TRangeFrame( + {beginParse->BoundLiteral.StringValue(), endParse->BoundLiteral.StringValue()}, + /*isNumeric=*/!std::is_void_v<TLiteral>, + /*sortOrder=*/GetSortOrder(sortTraits), + /*boundsCallable=*/boundsCallable); + + return TWindowFrameSettings{range, + /*neverEmpty=*/isNeverEmpty, + /*compact=*/GetSettingByName(frameSpec->Children(), "compact") != nullptr, + /*isAlwaysEmpty=*/isAlwaysEmpty}; +} + +std::expected<TMaybe<i32>, TIssue> ParseFrameRowsBounds(TExprNode::TPtr setting, TExprContext& ctx) { + if (setting->IsCallable("Int32")) { + auto& valNode = setting->Head(); + YQL_ENSURE(valNode.IsAtom()); + i32 value; + YQL_ENSURE(TryFromString(valNode.Content(), value)); + return value; + } + + if (setting->IsCallable("Void")) { + return TMaybe<i32>(); + } + + return std::unexpected(TIssue(ctx.GetPosition(setting->Tail().Pos()), + TStringBuilder() << "Invalid " + << setting->Head().Content() + << " frame bound - expecting Void or Int32 callable, but got: " + << SerializeActualNodeForError(*setting->TailPtr()))); +} + +bool VerifySettings(const TExprNode::TChildrenType& settings, TExprContext& ctx) { + for (const auto& setting : settings) { + if (!EnsureTupleMinSize(*setting, 1, ctx)) { + return false; + } + + if (!EnsureAtom(setting->Head(), ctx)) { + return false; + } + } + return true; +} + +constexpr TStringBuf ErrorNonNumeric = "Range frame for not sorted frames is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW."; +constexpr TStringBuf ErrorMultipleColumns = "Range frame for multiple expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW."; +constexpr TStringBuf ErrorNonNumericSingleColumn = "Range frame for non numeric expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW."; + +TMaybe<TWindowFrameSettings> TryParseRangeForNotNumericFrameSettings(TExprNode::TPtr frameSpec, TStringBuf error, TExprContext& ctx) { + auto result = ParseFrameRangeBounds<void, void>(frameSpec, ctx); + if (!result) { + return result; + } + auto left = result->GetRangeFrame().GetFirst(); + auto right = result->GetRangeFrame().GetLast(); + + if (left.IsInf() && !right.IsInf() && right.GetUnderlyingValue() == "0") { + return result; + } + + ctx.AddError(TIssue(ctx.GetPosition(frameSpec->Pos()), error)); + return {}; +} + +TMaybe<TWindowFrameSettings> TryParseRangeWindowFrameSettings(TExprNode::TPtr frameSpec, TExprContext& ctx) { + auto sortTraits = ExtractSortTraitsInfo(GetSettingByName(frameSpec->Children(), "sortSpec")); + if (std::holds_alternative<TUnsortedTag>(sortTraits)) { + return TryParseRangeForNotNumericFrameSettings(frameSpec, ErrorNonNumeric, ctx); + } else if (std::holds_alternative<TManyColumnsInSort>(sortTraits)) { + return TryParseRangeForNotNumericFrameSettings(frameSpec, ErrorMultipleColumns, ctx); + } + YQL_ENSURE(std::holds_alternative<TSorted>(sortTraits)); + auto sortedTraits = std::get<TSorted>(sortTraits); + auto* type = sortedTraits.SortedColumnType; + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + if (type->GetKind() == ETypeAnnotationKind::Data) { + switch (type->Cast<TDataExprType>()->GetSlot()) { + case NUdf::EDataSlot::Int8: + return ParseFrameRangeBounds<TCoInt8, i8>(frameSpec, ctx); + case NUdf::EDataSlot::Uint8: + return ParseFrameRangeBounds<TCoUint8, ui8>(frameSpec, ctx); + case NUdf::EDataSlot::Int16: + return ParseFrameRangeBounds<TCoInt16, i16>(frameSpec, ctx); + case NUdf::EDataSlot::Uint16: + return ParseFrameRangeBounds<TCoUint16, ui16>(frameSpec, ctx); + case NUdf::EDataSlot::Int32: + return ParseFrameRangeBounds<TCoInt32, i32>(frameSpec, ctx); + case NUdf::EDataSlot::Uint32: + return ParseFrameRangeBounds<TCoUint32, ui32>(frameSpec, ctx); + case NUdf::EDataSlot::Int64: + return ParseFrameRangeBounds<TCoInt64, i64>(frameSpec, ctx); + case NUdf::EDataSlot::Uint64: + return ParseFrameRangeBounds<TCoUint64, ui64>(frameSpec, ctx); + case NUdf::EDataSlot::Double: + return ParseFrameRangeBounds<TCoDouble, double>(frameSpec, ctx); + case NUdf::EDataSlot::Float: + return ParseFrameRangeBounds<TCoFloat, float>(frameSpec, ctx); + case NUdf::EDataSlot::Date: + case NUdf::EDataSlot::Datetime: + case NUdf::EDataSlot::Timestamp: + case NUdf::EDataSlot::Interval: + case NUdf::EDataSlot::TzDate: + case NUdf::EDataSlot::TzDatetime: + case NUdf::EDataSlot::TzTimestamp: + return ParseFrameRangeBounds<TCoInterval, NUdf::TDataType<NUdf::TInterval>::TLayout>(frameSpec, ctx); + case NUdf::EDataSlot::Date32: + case NUdf::EDataSlot::Datetime64: + case NUdf::EDataSlot::Timestamp64: + case NUdf::EDataSlot::Interval64: + case NUdf::EDataSlot::TzDate32: + case NUdf::EDataSlot::TzDatetime64: + case NUdf::EDataSlot::TzTimestamp64: + return ParseFrameRangeBounds<TCoInterval64, NUdf::TDataType<NUdf::TInterval64>::TLayout>(frameSpec, ctx); + default: + return TryParseRangeForNotNumericFrameSettings(frameSpec, ErrorNonNumericSingleColumn, ctx); + } + } + return TryParseRangeForNotNumericFrameSettings(frameSpec, ErrorNonNumericSingleColumn, ctx); +} + +TMaybe<TWindowFrameSettings> TryParseWindowFrameSettingsFromList(const TExprNode& node, TExprContext& ctx) { + auto frameSpec = node.Child(0); + bool isCompact = GetSettingByName(frameSpec->Children(), "compact") != nullptr; + + if (node.IsCallable("WinOnRows")) { + if (!GetSettingByName(frameSpec->Children(), "begin") || !GetSettingByName(frameSpec->Children(), "end")) { + ctx.AddError(TIssue(ctx.GetPosition(frameSpec->Pos()), + TStringBuilder() << "Expected begin and end for row frames.")); + return {}; + } + auto leftParse = ParseFrameRowsBounds(GetSettingByName(frameSpec->Children(), "begin"), ctx); + if (!leftParse.has_value()) { + ctx.AddError(leftParse.error()); + return {}; + } + + auto rightParse = ParseFrameRowsBounds(GetSettingByName(frameSpec->Children(), "end"), ctx); + if (!rightParse.has_value()) { + ctx.AddError(rightParse.error()); + return {}; + } + + auto frame = TWindowFrameSettings::TRowFrame{leftParse.value(), rightParse.value()}; + return TWindowFrameSettings(frame, /*neverEmpty=*/CheckRowFrameNeverEmpty(frame), /*compact=*/isCompact, /*isAlwaysEmpty=*/CheckRowFrameIsAlwaysEmpty(frame)); + } else if (node.IsCallable("WinOnRange")) { + return TryParseRangeWindowFrameSettings(frameSpec, ctx); + } else { + YQL_ENSURE(node.IsCallable("WinOnGroups")); + TWindowFrameSettings::TGroupsFrame frame{}; + return TWindowFrameSettings(frame, /*neverEmpty=*/false, /*compact=*/isCompact, /*isAlwaysEmpty=*/false); + } +} + +EFrameBoundsNewType GetFrameTypeNew(const TWindowFrameSettings& frameSettings) { + if (frameSettings.IsFullPartition()) { + return EFrameBoundsNewType::FULL; + } + if (frameSettings.IsAlwaysEmpty()) { + return EFrameBoundsNewType::EMPTY; + } + if (frameSettings.IsLeftInf() && !frameSettings.IsRightInf()) { + return EFrameBoundsNewType::INCREMENTAL; + } + return EFrameBoundsNewType::GENERIC; +} + +const TItemExprType* GetSortedColumnType(const TExprNode::TPtr& sortTraits, TExprContext& ctx) { + YQL_ENSURE(sortTraits->IsCallable("SortTraits")); + + auto sortKeyLambda = sortTraits->ChildPtr(2); + YQL_ENSURE(sortKeyLambda->IsLambda()); + const TTypeAnnotationNode* sortKeyType = sortKeyLambda->GetTypeAnn(); + YQL_ENSURE(sortKeyType); + return ctx.MakeType<TItemExprType>(SortedColumnMemberName, sortKeyType); +} + +bool ShouldAddSortedColumn(ESortOrder sortOrder) { + return sortOrder != ESortOrder::Unimportant; +} + +TExprNode::TPtr PushSortedColumnInsideStream(const TExprNode::TPtr& partitionsByKeys, TExprContext& ctx) { + YQL_ENSURE(partitionsByKeys->IsCallable("PartitionsByKeys")); + YQL_ENSURE(partitionsByKeys->ChildrenSize() == 5); + + auto pos = partitionsByKeys->Pos(); + auto list = partitionsByKeys->ChildPtr(0); + auto keySelector = partitionsByKeys->ChildPtr(1); + auto sortDirection = partitionsByKeys->ChildPtr(2); + auto sortKeySelector = partitionsByKeys->ChildPtr(3); + auto handler = partitionsByKeys->ChildPtr(4); + + // If sortKeySelector is Void, nothing to do. + if (sortKeySelector->IsCallable("Void")) { + return partitionsByKeys; + } + + // Add sorted column to the input stream using Map. + auto rowArg = ctx.NewArgument(pos, "row"); + auto addMemberBody = ctx.Builder(pos) + .Callable("AddMember") + .Add(0, rowArg) + .Atom(1, SortedColumnMemberName) + .Apply(2, ctx.DeepCopyLambda(*sortKeySelector)) + .With(0, rowArg) + .Seal() + .Seal() + .Build(); + + auto addMemberLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, {rowArg}), std::move(addMemberBody)); + + auto listWithSortedColumn = ctx.Builder(pos) + .Callable("Map") + .Add(0, list) + .Add(1, addMemberLambda) + .Seal() + .Build(); - if (first.Defined() && last.Defined() && first > last) { +#if 0 // TODO(atarasov5): Decide what to do with double lambda computation here and in non numeric range pipeline. + // Create new sortKeySelector that just extracts the sorted column. + auto newSortKeySelector = ctx.Builder(pos) + .Lambda() + .Param("item") + .Callable("Member") + .Arg(0, "item") + .Atom(1, SortedColumnMemberName) + .Seal() + .Seal() + .Build(); +#else // #if 0 + auto newSortKeySelector = sortKeySelector; +#endif // #if 0 + + // Build new PartitionsByKeys with modified arguments. + return ctx.Builder(pos) + .Callable("PartitionsByKeys") + .Add(0, listWithSortedColumn) + .Add(1, keySelector) + .Add(2, sortDirection) + .Add(3, newSortKeySelector) + .Add(4, handler) + .Seal() + .Build(); +} + +EFrameBoundsType FrameBoundsType(const TWindowFrameSettings::TRowFrame& settings) { + auto first = settings.first; + auto last = settings.second; + + if (CheckRowFrameIsAlwaysEmpty(settings)) { return EFrameBoundsType::EMPTY; } @@ -147,11 +679,15 @@ struct TRawTrait { TWindowFrameSettings FrameSettings; }; -struct TCalcOverWindowTraits { - TMap<TStringBuf, TRawTrait> RawTraits; +struct TQueueParamsFromTraits { ui64 MaxDataOutpace = 0; ui64 MaxDataLag = 0; ui64 MaxUnboundedPrecedingLag = 0; +}; + +struct TCalcOverWindowTraits { + TMap<TStringBuf, TRawTrait> RawTraits; + TQueueParamsFromTraits QueueParams; const TTypeAnnotationNode* LagQueueItemType = nullptr; }; @@ -413,13 +949,24 @@ TExprNode::TPtr ApplyDistinctForCalculateLambda(TExprNode::TPtr calculateLambda, .Build(); } +TInputRow FromSettingsNumbers(i64 number) { + if (number >= 0) { + return TInputRow{static_cast<ui64>(number), EDirection::Following}; + } else { + return TInputRow{static_cast<ui64>(-number), EDirection::Preceding}; + } +} + +TInputRow FromSettingsNumbers(TMaybe<i32> number, EDirection directionIfInf) { + if (!number) { + return TInputRow(TInputRow::TUnbounded{}, directionIfInf); + } + return FromSettingsNumbers(*number); +} + TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, const TStructExprType& rowType, TExprContext& ctx) { TCalcOverWindowTraits result; - auto& maxDataOutpace = result.MaxDataOutpace; - auto& maxDataLag = result.MaxDataLag; - auto& maxUnboundedPrecedingLag = result.MaxUnboundedPrecedingLag; - TVector<const TItemExprType*> lagQueueStructItems; for (auto& winOn : frames->ChildrenList()) { TWindowFrameSettings frameSettings = TWindowFrameSettings::Parse(*winOn, ctx); @@ -429,11 +976,10 @@ TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, const EFrameType ft = frameSettings.GetFrameType(); if (ft == EFrameType::FrameByRows) { - const EFrameBoundsType frameType = FrameBoundsType(frameSettings); - const auto frameFirst = frameSettings.GetFirstOffset(); - const auto frameLast = frameSettings.GetLastOffset(); + const auto frameFirst = frameSettings.GetRowFrame().first; + const auto frameLast = frameSettings.GetRowFrame().second; - if (frameType != EFrameBoundsType::EMPTY) { + if (!frameSettings.IsAlwaysEmpty()) { if (!frameLast.Defined() || *frameLast > 0) { frameOutpace = frameLast.Defined() ? ui64(*frameLast) : Max<ui64>(); } @@ -442,11 +988,6 @@ TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, frameLag = ui64(0 - *frameFirst); } } - } else { - // The only frame we currently support - YQL_ENSURE(ft == EFrameType::FrameByRange); - YQL_ENSURE(IsUnbounded(frameSettings.GetFirst())); - YQL_ENSURE(IsCurrentRow(frameSettings.GetLast())); } const auto& winOnChildren = winOn->ChildrenList(); YQL_ENSURE(winOnChildren.size() > 1); @@ -462,15 +1003,14 @@ TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, YQL_ENSURE(!result.RawTraits.contains(name)); auto traits = item->Child(1); - - auto& rawTraits = result.RawTraits[name]; - rawTraits.FrameSettings = frameSettings; + result.RawTraits.insert({name, TRawTrait{.FrameSettings = frameSettings}}); + auto& rawTraits = result.RawTraits.find(name)->second; rawTraits.Pos = traits->Pos(); YQL_ENSURE(traits->IsCallable({"WindowTraits","CumeDist"}) || ft == EFrameType::FrameByRows, "Non-canonical frame for window functions"); if (traits->IsCallable("WindowTraits")) { - maxDataOutpace = Max(maxDataOutpace, frameOutpace); - maxDataLag = Max(maxDataLag, frameLag); + result.QueueParams.MaxDataOutpace = Max(result.QueueParams.MaxDataOutpace, frameOutpace); + result.QueueParams.MaxDataLag = Max(result.QueueParams.MaxDataLag, frameLag); auto initLambda = traits->ChildPtr(1); auto updateLambda = traits->ChildPtr(2); @@ -509,10 +1049,10 @@ TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, rawTraits.DefaultValue = traits->ChildPtr(5); if (ft == EFrameType::FrameByRows) { - const EFrameBoundsType frameType = FrameBoundsType(frameSettings); - const auto frameLast = frameSettings.GetLastOffset(); + const EFrameBoundsType frameType = FrameBoundsType(frameSettings.GetRowFrame()); + const auto frameLast = frameSettings.GetRowFrame().second; if (frameType == EFrameBoundsType::LAGGING) { - maxUnboundedPrecedingLag = Max(maxUnboundedPrecedingLag, ui64(abs(*frameLast))); + result.QueueParams.MaxUnboundedPrecedingLag = Max(result.QueueParams.MaxUnboundedPrecedingLag, ui64(abs(*frameLast))); lagQueueStructItems.push_back(ctx.MakeType<TItemExprType>(name, rawTraits.OutputType)); } } @@ -528,9 +1068,9 @@ TCalcOverWindowTraits ExtractCalcOverWindowTraits(const TExprNode::TPtr& frames, } if (lead < 0) { - maxDataLag = Max(maxDataLag, ui64(abs(lead))); + result.QueueParams.MaxDataLag = Max(result.QueueParams.MaxDataLag, ui64(abs(lead))); } else { - maxDataOutpace = Max<ui64>(maxDataOutpace, lead); + result.QueueParams.MaxDataOutpace = Max<ui64>(result.QueueParams.MaxDataOutpace, lead); } auto lambdaInputType = @@ -578,8 +1118,11 @@ TExprNode::TPtr BuildDouble(TPositionHandle pos, double value, TExprContext& ctx .Build(); } -TExprNode::TPtr BuildQueuePeek(TPositionHandle pos, const TExprNode::TPtr& queue, ui64 index, const TExprNode::TPtr& dependsOn, - TExprContext& ctx) +TExprNode::TPtr BuildQueuePeek(TPositionHandle pos, + const TExprNode::TPtr& queue, + ui64 index, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) { return ctx.Builder(pos) .Callable("QueuePeek") @@ -613,9 +1156,93 @@ TExprNode::TPtr BuildQueueRange(TPositionHandle pos, const TExprNode::TPtr& queu .Build(); } -TExprNode::TPtr BuildQueue(TPositionHandle pos, const TTypeAnnotationNode& itemType, ui64 queueSize, ui64 initSize, - const TExprNode::TPtr& dependsOn, TExprContext& ctx) +TExprNode::TPtr BuildWinFrame(TPositionHandle pos, + const TExprNode::TPtr& queue, + THandle handle, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx, + bool isSingleElement) +{ + auto queueData = ctx.Builder(pos) + .Callable("WinFrame") + .Add(0, queue) + .Add(1, BuildUint64(pos, handle.Index(), ctx)) + .Add(2, MakeBool(pos, handle.IsIncremental(), ctx)) + .Add(3, MakeBool(pos, handle.IsRange(), ctx)) + .Add(4, MakeBool(pos, isSingleElement, ctx)) + .Callable(5, "DependsOn") + .Add(0, dependsOn) + .Seal() + .Seal() + .Build(); + + if (!isSingleElement) { + return ctx.Builder(pos) + .Callable("OrderedMap") + .Add(0, queueData) + .Lambda(1) + .Param("item") + .Arg("item") + .Seal() + .Seal() + .Build(); + } + + return queueData; +} + +struct TWinFramesCollectorBuildResult { + TExprNodePtr Queue; + TExprNodePtr WinFramesCollector; +}; + +TWinFramesCollectorBuildResult BuildWinFramesCollector(TPositionHandle pos, + TExprNode::TPtr stream, + TExprNode::TPtr itemType, + const NWindow::TStringCoreWinFramesCollectorParams& params, + TStringBuf rangeCallableName, + TExprNode::TPtr dependsOn, + TExprContext& ctx) { + auto unboundedQueue = ctx.Builder(pos) + .Callable("QueueCreate") + .Add(0, itemType) + .Add(1, ctx.NewCallable(pos, "Void", {})) + .Add(2, BuildUint64(pos, 0, ctx)) + .Callable(3, "DependsOn") + .Add(0, dependsOn) + .Seal() + .Seal() + .Build(); + + auto winFramesCollector = ctx.Builder(pos) + .Callable("WinFramesCollector") + .Add(0, stream) + .Add(1, unboundedQueue) + .Add(2, SerializeWindowAggregatorParamsToExpr(params, pos, rangeCallableName, ctx)) + .Seal() + .Build(); + + return {.Queue = std::move(unboundedQueue), .WinFramesCollector = std::move(winFramesCollector)}; +} + +TWinFramesCollectorBuildResult BuildWinFramesCollector(TPositionHandle pos, + TExprNode::TPtr stream, + const TTypeAnnotationNode& itemType, + const NWindow::TStringCoreWinFramesCollectorParams& params, + TStringBuf rangeCallableName, + TExprNode::TPtr dependsOn, + TExprContext& ctx) +{ + return BuildWinFramesCollector(pos, stream, ExpandType(pos, itemType, ctx), params, rangeCallableName, dependsOn, ctx); +} + +TExprNode::TPtr BuildQueue(TPositionHandle pos, + const TExprNode::TPtr& itemType, + ui64 queueSize, + ui64 initSize, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) { TExprNode::TPtr size; if (queueSize == Max<ui64>()) { size = ctx.NewCallable(pos, "Void", {}); @@ -625,7 +1252,7 @@ TExprNode::TPtr BuildQueue(TPositionHandle pos, const TTypeAnnotationNode& itemT return ctx.Builder(pos) .Callable("QueueCreate") - .Add(0, ExpandType(pos, itemType, ctx)) + .Add(0, itemType) .Add(1, size) .Add(2, BuildUint64(pos, initSize, ctx)) .Callable(3, "DependsOn") @@ -635,10 +1262,16 @@ TExprNode::TPtr BuildQueue(TPositionHandle pos, const TTypeAnnotationNode& itemT .Build(); } +TExprNode::TPtr BuildQueue(TPositionHandle pos, const TTypeAnnotationNode& itemType, ui64 queueSize, ui64 initSize, + const TExprNode::TPtr& dependsOn, TExprContext& ctx) +{ + return BuildQueue(pos, ExpandType(pos, itemType, ctx), queueSize, initSize, dependsOn, ctx); +} + TExprNode::TPtr CoalesceQueueOutput(TPositionHandle pos, const TExprNode::TPtr& output, bool rawOutputIsOptional, const TExprNode::TPtr& defaultValue, TExprContext& ctx) { - // output is has type Optional<RawOutputType> + // Output has type Optional<RawOutputType>. if (!rawOutputIsOptional) { return ctx.Builder(pos) .Callable("Coalesce") @@ -712,6 +1345,28 @@ TExprNode::TPtr BuildInitLambdaForChain1Map(TPositionHandle pos, const TExprNode .Build(); } +TExprNode::TPtr Unwrap(TPositionHandle pos, TExprNode::TPtr output, TExprNode::TPtr calculate, TExprNode::TPtr originalInit, TExprNode::TPtr rowArg, TExprContext& ctx) { + // Output is always non-empty optional in this case + // we do IfPresent with some fake output value to remove optional + // this will have exactly the same result as Unwrap(output). + return ctx.Builder(pos) + .Callable("IfPresent") + .Add(0, output) + .Lambda(1) + .Param("unwrapped") + .Arg("unwrapped") + .Seal() + .Apply(2, calculate) + .With(0) + .Apply(originalInit) + .With(0, rowArg) + .Seal() + .Done() + .Seal() + .Seal() + .Build(); +} + TExprNode::TPtr BuildUpdateLambdaForChain1Map(TPositionHandle pos, const TExprNode::TPtr& updateStateLambda, const TExprNode::TPtr& calculateLambda, TExprContext& ctx) { @@ -778,8 +1433,17 @@ public: virtual TExprNode::TPtr BuildUpdateLambda(const TExprNode::TPtr& dataQueue, TExprContext& ctx) const = 0; virtual TExprNode::TPtr ExtractLaggingOutput(const TExprNode::TPtr& lagQueue, - const TExprNode::TPtr& dependsOn, TExprContext& ctx) const - { + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) const { + Y_UNUSED(lagQueue); + Y_UNUSED(dependsOn); + Y_UNUSED(ctx); + return {}; + } + + virtual TExprNode::TPtr ExtractShiftedOutput(const TExprNode::TPtr& lagQueue, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) const { Y_UNUSED(lagQueue); Y_UNUSED(dependsOn); Y_UNUSED(ctx); @@ -794,7 +1458,9 @@ private: class TChain1MapTraitsLagLead : public TChain1MapTraits { public: - TChain1MapTraitsLagLead(TStringBuf name, const TRawTrait& raw, TMaybe<ui64> queueOffset) + using TQueueParam = std::variant<ui64, THandle>; + + TChain1MapTraitsLagLead(TStringBuf name, const TRawTrait& raw, TMaybe<TQueueParam> queueOffset) : TChain1MapTraits(name, raw.Pos) , QueueOffset_(queueOffset) , LeadLagLambda_(raw.CalculateLambda) @@ -845,7 +1511,7 @@ private: auto body = ctx.Builder(GetPos()) .Callable("IfPresent") - .Add(0, BuildQueuePeek(GetPos(), dataQueue, *QueueOffset_, rowArg, ctx)) + .Add(0, GetSingleElement(dataQueue, rowArg, ctx)) .Add(1, AddOptionalIfNotAlreadyOptionalOrNull(LeadLagLambda_, ctx)) .Callable(2, "Null") .Seal() @@ -855,7 +1521,15 @@ private: return ctx.NewLambda(GetPos(), ctx.NewArguments(GetPos(), {rowArg}), std::move(body)); } - const TMaybe<ui64> QueueOffset_; + TExprNode::TPtr GetSingleElement(TExprNode::TPtr dataQueue, TExprNode::TPtr rowArg, TExprContext& ctx) const { + if (std::holds_alternative<THandle>(*QueueOffset_)) { + return ::NYql::BuildWinFrame(GetPos(), dataQueue, std::get<THandle>(*QueueOffset_), rowArg, ctx, /*isSingleElement=*/true); + } else { + return BuildQueuePeek(GetPos(), dataQueue, std::get<ui64>(*QueueOffset_), rowArg, ctx); + } + } + + const TMaybe<TQueueParam> QueueOffset_; const TExprNode::TPtr LeadLagLambda_; }; @@ -1338,6 +2012,10 @@ public: , CalculateLambda_(raw.CalculateLambda) , DefaultValue_(raw.DefaultValue) { + YQL_ENSURE(InitLambda_); + YQL_ENSURE(UpdateLambda_); + YQL_ENSURE(CalculateLambda_); + YQL_ENSURE(DefaultValue_); } protected: @@ -1503,9 +2181,11 @@ private: class TChain1MapTraitsFull : public TChain1MapTraitsStateBase { public: - TChain1MapTraitsFull(TStringBuf name, const TRawTrait& raw, ui64 currentRowIndex) + using TQueueParam = std::variant<ui64, THandle>; + + TChain1MapTraitsFull(TStringBuf name, const TRawTrait& raw, TQueueParam queueParam) : TChain1MapTraitsStateBase(name, raw) - , QueueBegin_(currentRowIndex + 1) + , QueueParam_(queueParam) { } @@ -1519,7 +2199,7 @@ public: auto rowArg = ctx.NewArgument(GetPos(), "row"); auto state = ctx.Builder(GetPos()) .Callable("Fold") - .Add(0, BuildQueueRange(GetPos(), dataQueue, QueueBegin_, Max<ui64>(), rowArg, ctx)) + .Add(0, BuildQueueRange(dataQueue, rowArg, ctx)) .Apply(1, originalInit) .With(0, rowArg) .Seal() @@ -1559,15 +2239,96 @@ public: } private: - const ui64 QueueBegin_; + TExprNode::TPtr BuildQueueRange(const TExprNode::TPtr& queue, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) const { + if (std::holds_alternative<ui64>(QueueParam_)) { + return ::NYql::BuildQueueRange(GetPos(), queue, std::get<ui64>(QueueParam_), Max<ui64>(), dependsOn, ctx); + } else { + return ctx.Builder(GetPos()) + .Callable("ListSkip") + .Add(0, ::NYql::BuildWinFrame(GetPos(), queue, std::get<THandle>(QueueParam_), dependsOn, ctx, /*isSingleElement=*/false)) + .Add(1, BuildUint64(GetPos(), 1, ctx)) + .Seal() + .Build(); + } + } + + const TQueueParam QueueParam_; +}; + +class TChain1MapTraitsIncremental : public TChain1MapTraitsStateBase { +public: + TChain1MapTraitsIncremental(TStringBuf name, const TRawTrait& raw, TMaybe<THandle> handle) + : TChain1MapTraitsStateBase(name, raw) + , Handle_(handle) + , OutputIsOptional_(raw.OutputType->IsOptionalOrNull()) + { + } + + // Lambda(row) -> AsTuple(output, state) + TExprNode::TPtr BuildInitLambda(const TExprNode::TPtr& dataQueue, TExprContext& ctx) const override { + Y_UNUSED(dataQueue); + return BuildInitLambdaForChain1Map(GetPos(), GetInitLambda(), GetCalculateLambda(), ctx); + } + + // Lambda(row, state) -> AsTuple(output, state) + TExprNode::TPtr BuildUpdateLambda(const TExprNode::TPtr& dataQueue, TExprContext& ctx) const override { + Y_UNUSED(dataQueue); + return BuildUpdateLambdaForChain1Map(GetPos(), GetUpdateLambda(), GetCalculateLambda(), ctx); + } + + TExprNode::TPtr ExtractShiftedOutput(const TExprNode::TPtr& queue, + const TExprNode::TPtr& dependsOn, + TExprContext& ctx) const override + { + if (!Handle_.Defined()) { + return {}; + } + + if (FrameNeverEmpty_) { + return ctx.Builder(GetPos()) + .Callable("Member") + .Callable(0, "Unwrap") + .Add(0, ::NYql::BuildWinFrame(GetPos(), queue, *Handle_, dependsOn, ctx, /*isSingleElement=*/true)) + .Seal() + .Atom(1, GetName()) + .Seal() + .Build(); + } + + auto output = ctx.Builder(GetPos()) + .Callable("Map") + .Add(0, ::NYql::BuildWinFrame(GetPos(), queue, *Handle_, dependsOn, ctx, /*isSingleElement=*/true)) + .Lambda(1) + .Param("struct") + .Callable("Member") + .Arg(0, "struct") + .Atom(1, GetName()) + .Seal() + .Seal() + .Seal() + .Build(); + return CoalesceQueueOutput(GetPos(), output, OutputIsOptional_, GetDefaultValue(), ctx); + } + +private: + const TMaybe<THandle> Handle_; + const bool OutputIsOptional_; }; class TChain1MapTraitsGeneric : public TChain1MapTraitsStateBase { public: - TChain1MapTraitsGeneric(TStringBuf name, const TRawTrait& raw, ui64 queueBegin, ui64 queueEnd) + struct TFixedQueueRange { + ui64 QueueBegin; + ui64 QueueEnd; + }; + + using TInputQueueRange = std::variant<THandle, TFixedQueueRange>; + + TChain1MapTraitsGeneric(TStringBuf name, const TRawTrait& raw, TInputQueueRange queueParam) : TChain1MapTraitsStateBase(name, raw) - , QueueBegin_(queueBegin) - , QueueEnd_(queueEnd) + , QueueParam_(queueParam) , OutputIsOptional_(raw.OutputType->IsOptionalOrNull()) { } @@ -1599,6 +2360,18 @@ public: } private: + TExprNode::TPtr BuildQueueRange(TPositionHandle pos, const TExprNode::TPtr& queue, + const TExprNode::TPtr& dependsOn, TExprContext& ctx) const { + if (std::holds_alternative<TFixedQueueRange>(QueueParam_)) { + auto [from, to] = std::get<TFixedQueueRange>(QueueParam_); + return ::NYql::BuildQueueRange(pos, queue, from, to, dependsOn, ctx); + } else { + auto handle = std::get<THandle>(QueueParam_); + return ::NYql::BuildWinFrame(pos, queue, handle, dependsOn, ctx, /*isSingleElement=*/false); + } + return nullptr; + } + TExprNode::TPtr BuildFinalOutput(const TExprNode::TPtr& rowArg, const TExprNode::TPtr& dataQueue, TExprContext& ctx) const { YQL_ENSURE(dataQueue); auto originalInit = GetInitLambda(); @@ -1607,7 +2380,7 @@ private: auto fold1 = ctx.Builder(GetPos()) .Callable("Fold1") - .Add(0, BuildQueueRange(GetPos(), dataQueue, QueueBegin_, QueueEnd_, rowArg, ctx)) + .Add(0, BuildQueueRange(GetPos(), dataQueue, rowArg, ctx)) .Add(1, ctx.DeepCopyLambda(*originalInit)) .Add(2, ctx.DeepCopyLambda(*originalUpdate)) .Seal() @@ -1623,32 +2396,13 @@ private: .Build(); if (FrameNeverEmpty_) { - // output is always non-empty optional in this case - // we do IfPresent with some fake output value to remove optional - // this will have exactly the same result as Unwrap(output) - return ctx.Builder(GetPos()) - .Callable("IfPresent") - .Add(0, output) - .Lambda(1) - .Param("unwrapped") - .Arg("unwrapped") - .Seal() - .Apply(2, calculate) - .With(0) - .Apply(originalInit) - .With(0, rowArg) - .Seal() - .Done() - .Seal() - .Seal() - .Build(); + return Unwrap(GetPos(), /*output=*/output, /*calculate=*/calculate, /*originalInit=*/originalInit, /*rowArg=*/rowArg, ctx); } return CoalesceQueueOutput(GetPos(), output, OutputIsOptional_, GetDefaultValue(), ctx); } - const ui64 QueueBegin_; - const ui64 QueueEnd_; + TInputQueueRange QueueParam_; const bool OutputIsOptional_; }; @@ -1721,8 +2475,198 @@ struct TQueueParams { const TTypeAnnotationNode* LagQueueItemType = nullptr; }; -TVector<TChain1MapTraits::TPtr> BuildFoldMapTraits(TQueueParams& queueParams, const TExprNode::TPtr& frames, - const TMaybe<TString>& partitionRowsColumn, const TStructExprType& rowType, TExprContext& ctx) { +TChain1MapTraits::TPtr ProcessRowFrameAggregateTraitNewPipeline(const TRawTrait& trait, + TStringBuf name, + TCoreWinFrameCollectorBounds<TString>& bounds, + TCoreWinFrameCollectorBounds<TString>& incrementalBounds) { + switch (GetFrameTypeNew(trait.FrameSettings)) { + case EFrameBoundsNewType::INCREMENTAL: { + auto last = trait.FrameSettings.GetRowFrame().second; + MKQL_ENSURE(last.Defined(), "Last offset required."); + auto getIncrementalHandle = [&]() -> TMaybe<THandle> { + if (*last == 0) { + return TMaybe<THandle>(); + } + return incrementalBounds.AddRowIncremental(FromSettingsNumbers(*last)); + + }; + TMaybe<THandle> handle = getIncrementalHandle(); + return new TChain1MapTraitsIncremental(name, trait, handle); + } + case EFrameBoundsNewType::FULL: { + auto handle = bounds.AddRow(TInputRowWindowFrame(TInputRow{TInputRow::TUnbounded{}, EDirection::Preceding}, TInputRow{TInputRow::TUnbounded{}, EDirection::Following})); + return new TChain1MapTraitsFull(name, trait, handle); + } + case EFrameBoundsNewType::GENERIC: { + auto first = trait.FrameSettings.GetRowFrame().first; + auto last = trait.FrameSettings.GetRowFrame().second; + YQL_ENSURE(first, "First offset must be defined."); + auto handle = bounds.AddRow({FromSettingsNumbers(*first), FromSettingsNumbers(last, EDirection::Following)}); + return new TChain1MapTraitsGeneric(name, trait, handle); + } + case EFrameBoundsNewType::EMPTY: { + return new TChain1MapTraitsEmpty(name, trait); + } + } +} + +TChain1MapTraits::TPtr ProcessRangeFrameAggregateTraitNewPipeline(const TRawTrait& trait, + TStringBuf name, + TCoreWinFrameCollectorBounds<TString>& bounds, + TCoreWinFrameCollectorBounds<TString>& incrementalBounds) { + switch (GetFrameTypeNew(trait.FrameSettings)) { + case EFrameBoundsNewType::INCREMENTAL: { + auto last = trait.FrameSettings.GetRangeFrame().GetLast(); + MKQL_ENSURE(!last.IsInf(), "Last offset required."); + auto getIncrementalHandle = [&]() -> TMaybe<THandle> { + return incrementalBounds.AddRangeIncremental(last); + }; + TMaybe<THandle> handle = getIncrementalHandle(); + return new TChain1MapTraitsIncremental(name, trait, handle); + } + case EFrameBoundsNewType::FULL: { + // Note: AddRow since UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING are the same for all frame types. + auto handle = bounds.AddRow(TInputRowWindowFrame(TInputRow{TInputRow::TUnbounded{}, EDirection::Preceding}, TInputRow{TInputRow::TUnbounded{}, EDirection::Following})); + return new TChain1MapTraitsFull(name, trait, handle); + } + case EFrameBoundsNewType::GENERIC: { + auto first = trait.FrameSettings.GetRangeFrame().GetFirst(); + auto last = trait.FrameSettings.GetRangeFrame().GetLast(); + YQL_ENSURE(!first.IsInf(), "First offset must be defined."); + auto handle = bounds.AddRange({first, last}); + return new TChain1MapTraitsGeneric(name, trait, handle); + } + case EFrameBoundsNewType::EMPTY: { + return new TChain1MapTraitsEmpty(name, trait); + } + } +} + +TChain1MapTraits::TPtr ProcessRowFrameAggregateTraitOldPipeline(TQueueParams& queueParams, + const TRawTrait& trait, + TStringBuf name, + ui64 currentRowIndex) { + auto first = trait.FrameSettings.GetRowFrame().first; + auto last = trait.FrameSettings.GetRowFrame().second; + switch (FrameBoundsType(trait.FrameSettings.GetRowFrame())) { + case EFrameBoundsType::CURRENT: + case EFrameBoundsType::LAGGING: { + TMaybe<ui64> lagQueueIndex; + auto end = *last; + YQL_ENSURE(end <= 0); + if (end < 0) { + YQL_ENSURE(queueParams.LagQueueSize >= ui64(0 - end)); + lagQueueIndex = queueParams.LagQueueSize + end; + } + return new TChain1MapTraitsCurrentOrLagging(name, trait, lagQueueIndex); + } + case EFrameBoundsType::LEADING: { + YQL_ENSURE(last, "Last offset must be specified."); + auto end = *last; + YQL_ENSURE(end > 0); + ui64 lastRowIndex = currentRowIndex + ui64(end); + return new TChain1MapTraitsLeading(name, trait, currentRowIndex, lastRowIndex); + } + case EFrameBoundsType::FULL: { + return new TChain1MapTraitsFull(name, trait, currentRowIndex + 1); + } + case EFrameBoundsType::GENERIC: { + queueParams.DataQueueNeeded = true; + YQL_ENSURE(first.Defined()); + ui64 beginIndex = currentRowIndex + *first; + ui64 endIndex = last.Defined() ? (currentRowIndex + *last + 1) : Max<ui64>(); + return new TChain1MapTraitsGeneric(name, trait, TChain1MapTraitsGeneric::TFixedQueueRange{beginIndex, endIndex}); + } + case EFrameBoundsType::EMPTY: { + return new TChain1MapTraitsEmpty(name, trait); + } + } +} + +TChain1MapTraits::TPtr ProcessLeadLag(const TRawTrait& trait, + TStringBuf name, + TCoreWinFrameCollectorBounds<TString>& bounds, + ui64 currentRowIndex, + TTypeAnnotationContext& types) { + YQL_ENSURE(!trait.UpdateLambda); + YQL_ENSURE(!trait.DefaultValue); + if (!IsWindowNewPipelineEnabled(types)) { + TMaybe<ui64> queueOffset; + if (*trait.CalculateLambdaLead != 0) { + queueOffset = currentRowIndex + *trait.CalculateLambdaLead; + } + return new TChain1MapTraitsLagLead(name, trait, queueOffset); + } else { + if (*trait.CalculateLambdaLead == 0) { + return new TChain1MapTraitsLagLead(name, trait, {}); + } else { + auto handle = bounds.AddRow({FromSettingsNumbers(*trait.CalculateLambdaLead), FromSettingsNumbers(*trait.CalculateLambdaLead)}); + return new TChain1MapTraitsLagLead(name, trait, handle); + } + } +} + +TChain1MapTraits::TPtr ProcessRowShiftIndependetTraits(const TRawTrait& trait, + TStringBuf name, + const TMaybe<TString>& partitionRowsColumn) { + YQL_ENSURE(!trait.UpdateLambda); + YQL_ENSURE(!trait.DefaultValue); + if (trait.CalculateLambda->IsCallable("RowNumber")) { + return new TChain1MapTraitsRowNumber(name, trait); + } else if (trait.CalculateLambda->IsCallable("Rank")) { + return new TChain1MapTraitsRank(name, trait); + } else if (trait.CalculateLambda->IsCallable("CumeDist")) { + return new TChain1MapTraitsCumeDist(name, trait, *partitionRowsColumn); + } else if (trait.CalculateLambda->IsCallable("NTile")) { + return new TChain1MapTraitsNTile(name, trait, *partitionRowsColumn); + } else if (trait.CalculateLambda->IsCallable("PercentRank")) { + return new TChain1MapTraitsPercentRank(name, trait, *partitionRowsColumn); + } else { + YQL_ENSURE(trait.CalculateLambda->IsCallable("DenseRank")); + return new TChain1MapTraitsDenseRank(name, trait); + } +} + +TChain1MapTraits::TPtr ProcessFrameIndependedTraits(const TRawTrait& trait, + TStringBuf name, + TCoreWinFrameCollectorBounds<TString>& bounds, + const TMaybe<TString>& partitionRowsColumn, + ui64 currentRowIndex, + TTypeAnnotationContext& types) { + YQL_ENSURE(!trait.UpdateLambda); + YQL_ENSURE(!trait.DefaultValue); + if (trait.CalculateLambdaLead.Defined()) { + return ProcessLeadLag(trait, name, bounds, currentRowIndex, types); + } else { + return ProcessRowShiftIndependetTraits(trait, name, partitionRowsColumn); + } +} + +TVector<TChain1MapTraits::TPtr> BuildFoldMapTraitsForNonNumericRange(const TExprNode::TPtr& frames, const TStructExprType& rowType, const TMaybe<TString>& partitionRowsColumn, TExprContext& ctx) { + TVector<TChain1MapTraits::TPtr> result; + TCalcOverWindowTraits traits = ExtractCalcOverWindowTraits(frames, rowType, ctx); + for (const auto& item : traits.RawTraits) { + TStringBuf name = item.first; + const TRawTrait& trait = item.second; + if (!trait.InitLambda) { + result.push_back(ProcessRowShiftIndependetTraits(trait, name, partitionRowsColumn)); + continue; + } + YQL_ENSURE(trait.FrameSettings.GetFrameType() == EFrameType::FrameByRange); + YQL_ENSURE(trait.FrameSettings.IsLeftInf() && trait.FrameSettings.IsRightCurrent()); + result.push_back(new TChain1MapTraitsIncremental(name, trait, {})); + } + return result; +} + +TVector<TChain1MapTraits::TPtr> BuildFoldMapTraitsForRowsAndNumericRanges(TQueueParams& queueParams, + TCoreWinFrameCollectorBounds<TString>& bounds, + TCoreWinFrameCollectorBounds<TString>& incrementalBounds, + const TExprNode::TPtr& frames, + const TMaybe<TString>& partitionRowsColumn, + const TStructExprType& rowType, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { queueParams = {}; TVector<TChain1MapTraits::TPtr> result; @@ -1730,15 +2674,15 @@ TVector<TChain1MapTraits::TPtr> BuildFoldMapTraits(TQueueParams& queueParams, co TCalcOverWindowTraits traits = ExtractCalcOverWindowTraits(frames, rowType, ctx); if (traits.LagQueueItemType->Cast<TStructExprType>()->GetSize()) { - YQL_ENSURE(traits.MaxUnboundedPrecedingLag > 0); - queueParams.LagQueueSize = traits.MaxUnboundedPrecedingLag; + YQL_ENSURE(traits.QueueParams.MaxUnboundedPrecedingLag > 0); + queueParams.LagQueueSize = traits.QueueParams.MaxUnboundedPrecedingLag; queueParams.LagQueueItemType = traits.LagQueueItemType; } ui64 currentRowIndex = 0; - if (traits.MaxDataOutpace || traits.MaxDataLag) { - queueParams.DataOutpace = traits.MaxDataOutpace; - queueParams.DataLag = traits.MaxDataLag; + if (traits.QueueParams.MaxDataOutpace || traits.QueueParams.MaxDataLag) { + queueParams.DataOutpace = traits.QueueParams.MaxDataOutpace; + queueParams.DataLag = traits.QueueParams.MaxDataLag; currentRowIndex = queueParams.DataLag; queueParams.DataQueueNeeded = true; } @@ -1748,79 +2692,22 @@ TVector<TChain1MapTraits::TPtr> BuildFoldMapTraits(TQueueParams& queueParams, co const TRawTrait& trait = item.second; if (!trait.InitLambda) { - YQL_ENSURE(!trait.UpdateLambda); - YQL_ENSURE(!trait.DefaultValue); - - if (trait.CalculateLambdaLead.Defined()) { - TMaybe<ui64> queueOffset; - if (*trait.CalculateLambdaLead) { - queueOffset = currentRowIndex + *trait.CalculateLambdaLead; - } - - result.push_back(new TChain1MapTraitsLagLead(name, trait, queueOffset)); - } else if (trait.CalculateLambda->IsCallable("RowNumber")) { - result.push_back(new TChain1MapTraitsRowNumber(name, trait)); - } else if (trait.CalculateLambda->IsCallable("Rank")) { - result.push_back(new TChain1MapTraitsRank(name, trait)); - } else if (trait.CalculateLambda->IsCallable("CumeDist")) { - result.push_back(new TChain1MapTraitsCumeDist(name, trait, *partitionRowsColumn)); - } else if (trait.CalculateLambda->IsCallable("NTile")) { - result.push_back(new TChain1MapTraitsNTile(name, trait, *partitionRowsColumn)); - } else if (trait.CalculateLambda->IsCallable("PercentRank")) { - result.push_back(new TChain1MapTraitsPercentRank(name, trait, *partitionRowsColumn)); - } else { - YQL_ENSURE(trait.CalculateLambda->IsCallable("DenseRank")); - result.push_back(new TChain1MapTraitsDenseRank(name, trait)); - } - + result.push_back(ProcessFrameIndependedTraits(trait, name, bounds, partitionRowsColumn, currentRowIndex, typeCtx)); continue; } - if (trait.FrameSettings.GetFrameType() == EFrameType::FrameByRange) { - result.push_back(new TChain1MapTraitsCurrentOrLagging(name, trait, {})); - continue; - } - - YQL_ENSURE(trait.FrameSettings.GetFrameType() == EFrameType::FrameByRows); - switch(FrameBoundsType(trait.FrameSettings)) { - case EFrameBoundsType::CURRENT: - case EFrameBoundsType::LAGGING: { - TMaybe<ui64> lagQueueIndex; - auto end = *trait.FrameSettings.GetLastOffset(); - YQL_ENSURE(end <= 0); - if (end < 0) { - YQL_ENSURE(queueParams.LagQueueSize >= ui64(0 - end)); - lagQueueIndex = queueParams.LagQueueSize + end; - } - - result.push_back(new TChain1MapTraitsCurrentOrLagging(name, trait, lagQueueIndex)); - break; - } - case EFrameBoundsType::LEADING: { - auto end = *trait.FrameSettings.GetLastOffset(); - YQL_ENSURE(end > 0); - ui64 lastRowIndex = currentRowIndex + ui64(end); - result.push_back(new TChain1MapTraitsLeading(name, trait, currentRowIndex, lastRowIndex)); - break; - } - case EFrameBoundsType::FULL: { - result.push_back(new TChain1MapTraitsFull(name, trait, currentRowIndex)); - break; - } - case EFrameBoundsType::GENERIC: { - queueParams.DataQueueNeeded = true; - auto first = trait.FrameSettings.GetFirstOffset(); - auto last = trait.FrameSettings.GetLastOffset(); - YQL_ENSURE(first.Defined()); - ui64 beginIndex = currentRowIndex + *first; - ui64 endIndex = last.Defined() ? (currentRowIndex + *last + 1) : Max<ui64>(); - result.push_back(new TChain1MapTraitsGeneric(name, trait, beginIndex, endIndex)); - break; - } - case EFrameBoundsType::EMPTY: { - result.push_back(new TChain1MapTraitsEmpty(name, trait)); - break; + if (IsWindowNewPipelineEnabled(typeCtx)) { + if (trait.FrameSettings.GetFrameType() == EFrameType::FrameByRows) { + result.push_back(ProcessRowFrameAggregateTraitNewPipeline(trait, name, bounds, incrementalBounds)); + } else { + YQL_ENSURE(trait.FrameSettings.GetFrameType() == EFrameType::FrameByRange); + YQL_ENSURE(IsRangeWindowFrameEnabled(typeCtx)); + result.push_back(ProcessRangeFrameAggregateTraitNewPipeline(trait, name, bounds, incrementalBounds)); } + } else { + YQL_ENSURE(trait.FrameSettings.GetFrameType() == EFrameType::FrameByRows); + result.push_back(ProcessRowFrameAggregateTraitOldPipeline(queueParams, trait, name, currentRowIndex)); + continue; } } @@ -1899,11 +2786,32 @@ TExprNode::TPtr SelectMembers(TPositionHandle pos, const T& members, const TExpr return ctx.NewCallable(pos, "AsStruct", std::move(structItems)); } -TExprNode::TPtr HandleLaggingItems(TPositionHandle pos, const TExprNode::TPtr& rowArg, - const TExprNode::TPtr& tupleOfOutputAndState, const TVector<TChain1MapTraits::TPtr>& traits, - const TExprNode::TPtr& lagQueue, TExprContext& ctx) -{ +template<typename T> +TExprNode::TPtr RemoveMembers(TPositionHandle pos, const T& members, const TExprNode::TPtr& structNode, TExprContext& ctx) { + return ctx.Builder(pos) + .Callable("RemoveMembers") + .Add(0, structNode) + .List(1) + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { + size_t i = 0; + for (auto name : members) { + parent.Atom(i++, name); + } + return parent; + }) + .Seal() + .Seal() + .Build(); +} +TExprNode::TPtr HandleLaggingItems(TPositionHandle pos, + const TExprNode::TPtr& rowArg, + const TExprNode::TPtr& tupleOfOutputAndState, + const TVector<TChain1MapTraits::TPtr>& traits, + const TExprNode::TPtr& lagQueue, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) +{ TExprNodeList laggingStructItems; TSet<TStringBuf> laggingNames; TSet<TStringBuf> otherNames; @@ -1928,7 +2836,7 @@ TExprNode::TPtr HandleLaggingItems(TPositionHandle pos, const TExprNode::TPtr& r if (laggingStructItems.empty()) { return tupleOfOutputAndState; } - + YQL_ENSURE(!IsWindowNewPipelineEnabled(typeCtx)); YQL_ENSURE(lagQueue); auto output = ctx.NewCallable(pos, "Nth", { tupleOfOutputAndState, ctx.NewAtom(pos, "0")}); @@ -1972,9 +2880,59 @@ TExprNode::TPtr HandleLaggingItems(TPositionHandle pos, const TExprNode::TPtr& r .Seal() .Build(); } +TExprNode::TPtr ReplaceAllShiftedElements(TPositionHandle pos, const TExprNode::TPtr& rowArg, const TExprNodeList& laggingStructItems, TSet<TStringBuf> laggingNames, TExprContext& ctx) { + auto otherOutput = RemoveMembers(pos, laggingNames, rowArg, ctx); + auto laggingOutput = ctx.NewCallable(pos, "AsStruct", TExprNodeList(laggingStructItems)); + return ctx.Builder(pos) + .Callable("FlattenMembers") + .List(0) + .Atom(0, "") + .Add(1, laggingOutput) + .Seal() + .List(1) + .Atom(0, "") + .Add(1, otherOutput) + .Seal() + .Seal() + .Build(); +} -TExprNode::TPtr BuildChain1MapInitLambda(TPositionHandle pos, const TVector<TChain1MapTraits::TPtr>& traits, - const TExprNode::TPtr& dataQueue, ui64 lagQueueSize, const TTypeAnnotationNode* lagQueueItemType, TExprContext& ctx) +TExprNode::TPtr HandleIncrementalOutput(TPositionHandle pos, + const TExprNode::TPtr& rowArg, + const TVector<TChain1MapTraits::TPtr>& traits, + const TExprNode::TPtr& dataQueue, + TExprContext& ctx) +{ + TExprNodeList laggingStructItems; + TSet<TStringBuf> laggingNames; + for (auto& trait : traits) { + auto name = trait->GetName(); + auto laggingOutput = trait->ExtractShiftedOutput(dataQueue, rowArg, ctx); + if (laggingOutput) { + laggingNames.insert(name); + laggingStructItems.push_back( + ctx.Builder(pos) + .List() + .Atom(0, name) + .Add(1, laggingOutput) + .Seal() + .Build() + ); + } + } + + YQL_ENSURE(!laggingStructItems.empty()); + YQL_ENSURE(dataQueue); + return ReplaceAllShiftedElements(pos, rowArg, laggingStructItems, laggingNames, ctx);; +} + +TExprNode::TPtr BuildChain1MapInitLambda(TPositionHandle pos, + const TVector<TChain1MapTraits::TPtr>& traits, + const TExprNode::TPtr& dataQueue, + ui64 lagQueueSize, + const TTypeAnnotationNode* lagQueueItemType, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { auto rowArg = ctx.NewArgument(pos, "row"); @@ -2001,14 +2959,18 @@ TExprNode::TPtr BuildChain1MapInitLambda(TPositionHandle pos, const TVector<TCha auto asStruct = ctx.NewCallable(pos, "AsStruct", std::move(structItems)); auto tupleOfOutputAndState = ConvertStructOfTuplesToTupleOfStructs(pos, asStruct, ctx); - tupleOfOutputAndState = HandleLaggingItems(pos, rowArg, tupleOfOutputAndState, traits, lagQueue, ctx); + tupleOfOutputAndState = HandleLaggingItems(pos, rowArg, tupleOfOutputAndState, traits, lagQueue, ctx, typeCtx); auto finalBody = AddInputMembersToOutput(pos, tupleOfOutputAndState, rowArg, ctx); return ctx.NewLambda(pos, ctx.NewArguments(pos, {rowArg}), std::move(finalBody)); } -TExprNode::TPtr BuildChain1MapUpdateLambda(TPositionHandle pos, const TVector<TChain1MapTraits::TPtr>& traits, - const TExprNode::TPtr& dataQueue, bool haveLagQueue, TExprContext& ctx) +TExprNode::TPtr BuildChain1MapUpdateLambda(TPositionHandle pos, + const TVector<TChain1MapTraits::TPtr>& traits, + const TExprNode::TPtr& dataQueue, + bool haveLagQueue, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { const auto rowArg = ctx.NewArgument(pos, "row"); const auto stateArg = ctx.NewArgument(pos, "state"); @@ -2058,7 +3020,7 @@ TExprNode::TPtr BuildChain1MapUpdateLambda(TPositionHandle pos, const TVector<TC auto asStruct = ctx.NewCallable(pos, "AsStruct", std::move(structItems)); auto tupleOfOutputAndState = ConvertStructOfTuplesToTupleOfStructs(pos, asStruct, ctx); - tupleOfOutputAndState = HandleLaggingItems(pos, rowArg, tupleOfOutputAndState, traits, lagQueue, ctx); + tupleOfOutputAndState = HandleLaggingItems(pos, rowArg, tupleOfOutputAndState, traits, lagQueue, ctx, typeCtx); auto finalBody = AddInputMembersToOutput(pos, tupleOfOutputAndState, rowArg, ctx); return ctx.NewLambda(pos, ctx.NewArguments(pos, {rowArg, stateArg}), std::move(finalBody)); @@ -2066,8 +3028,7 @@ TExprNode::TPtr BuildChain1MapUpdateLambda(TPositionHandle pos, const TVector<TC bool IsNonCompactFullFrame(const TExprNode& winOnRows, TExprContext& ctx) { TWindowFrameSettings frameSettings = TWindowFrameSettings::Parse(winOnRows, ctx); - return frameSettings.GetFrameType() == FrameByRows && - !frameSettings.IsCompact() && !frameSettings.GetFirstOffset().Defined() && !frameSettings.GetLastOffset().Defined(); + return frameSettings.IsFullPartition() && !frameSettings.IsCompact(); } TExprNode::TPtr DeduceCompatibleSort(const TExprNode::TPtr& traitsOne, const TExprNode::TPtr& traitsTwo) { @@ -2771,24 +3732,66 @@ TExprNode::TPtr TryExpandNonCompactFullFrames(TPositionHandle pos, const TExprNo .Done().Ptr(); } -void SplitFramesByType(const TExprNode::TPtr& frames, TExprNode::TPtr& rowFrames, TExprNode::TPtr& rangeFrames, TExprNode::TPtr& groupFrames, TExprContext& ctx) { - TExprNodeList rows; - TExprNodeList range; - TExprNodeList groups; +struct TSplitResult { + TExprNode::TPtr Rows; + TExprNode::TPtr NonNumericRanges; + TExprNode::TPtr NumericRangesAndRows; + TString NumericBoundsCallableName; +}; + +TSplitResult SplitFramesByType(const TExprNode::TPtr& frames, TExprContext& ctx, TTypeAnnotationContext& typeCtx) { + TExprNodeList nonNumericRanges; + TExprNodeList numbericRangesAndRows; + TMaybe<TString> numericBoundsCallableName; for (auto& winOn : frames->ChildrenList()) { if (TCoWinOnRows::Match(winOn.Get())) { - rows.push_back(std::move(winOn)); + numbericRangesAndRows.push_back(std::move(winOn)); } else if (TCoWinOnRange::Match(winOn.Get())) { - range.push_back(std::move(winOn)); + auto settings = TWindowFrameSettings::Parse(*winOn, ctx); + if (settings.GetRangeFrame().IsNumeric() && IsRangeWindowFrameEnabled(typeCtx)) { + auto currentCallableName = settings.GetRangeFrame().BoundsCallable(); + if (!numericBoundsCallableName.Defined()) { + numericBoundsCallableName = currentCallableName; + } else { + YQL_ENSURE(*numericBoundsCallableName == currentCallableName, + "All numeric range frames must have the same BoundsCallable, got: " + << *numericBoundsCallableName << " and " << currentCallableName); + } + numbericRangesAndRows.push_back(std::move(winOn)); + } else { + nonNumericRanges.push_back(std::move(winOn)); + } } else { YQL_ENSURE(TCoWinOnGroups::Match(winOn.Get())); - groups.push_back(std::move(winOn)); + YQL_ENSURE(0, "Unexpected WinOnGroups."); } } - rowFrames = ctx.NewList(frames->Pos(), std::move(rows)); - rangeFrames = ctx.NewList(frames->Pos(), std::move(range)); - groupFrames = ctx.NewList(frames->Pos(), std::move(groups)); + return TSplitResult { + .NonNumericRanges = ctx.NewList(frames->Pos(), std::move(nonNumericRanges)), + .NumericRangesAndRows = ctx.NewList(frames->Pos(), std::move(numbericRangesAndRows)), + .NumericBoundsCallableName = numericBoundsCallableName.GetOrElse(TString()), + }; +} + +ESortOrder ExtractAndVerifyRangeSortOrder(const TExprNode::TPtr& frames, TExprContext& ctx) { + TMaybe<ESortOrder> sortOrder; + for (auto& winOn : frames->ChildrenList()) { + if (TCoWinOnRange::Match(winOn.Get())) { + auto settings = TWindowFrameSettings::Parse(*winOn, ctx); + if (settings.GetFrameType() != EFrameType::FrameByRange) { + continue; + } + auto currentSortOrder = settings.GetRangeFrame().GetSortOrder(); + if (!sortOrder) { + sortOrder = currentSortOrder; + } else { + YQL_ENSURE(*sortOrder == currentSortOrder, "All Range frames must have the same SortOrder"); + } + } + } + + return sortOrder.GetOrElse(ESortOrder::Unimportant); } const TStructExprType* ApplyFramesToType(const TStructExprType& inputType, const TStructExprType& finalOutputType, const TExprNode& frames, TExprContext& ctx) { @@ -2895,7 +3898,7 @@ TExprNode::TPtr AddPartitionRowsColumn(TPositionHandle pos, const TExprNode::TPt .Build(); } -TExprNode::TPtr RemovePartitionRowsColumn(TPositionHandle pos, const TExprNode::TPtr& input, const TString& columnName, TExprContext& ctx) { +TExprNode::TPtr RemoveRowsColumn(TPositionHandle pos, const TExprNode::TPtr& input, const TString& columnName, TExprContext& ctx) { return ctx.Builder(pos) .Callable("Map") .Add(0, input) @@ -2910,8 +3913,46 @@ TExprNode::TPtr RemovePartitionRowsColumn(TPositionHandle pos, const TExprNode:: .Build(); } -TExprNode::TPtr ProcessRowsFrames(TPositionHandle pos, const TExprNode::TPtr& input, const TStructExprType& rowType, const TExprNode::TPtr& dependsOn, - const TExprNode::TPtr& frames, const TMaybe<TString>& partitionRowsColumn, TExprContext& ctx) +TExprNode::TPtr ProccessAllIncrementalShifts(TPositionHandle pos, + const TExprNode::TPtr& stream, + const TCoreWinFrameCollectorBounds<TString>& incrementalBounds, + TExprNode::TPtr streamDependsOn, + const TVector<TChain1MapTraits::TPtr>& traits, + TMaybe<ESortOrder> sortOrder, + TStringBuf rangeCallableName, + TExprContext& ctx) { + TCoreWinFramesCollectorParams params(incrementalBounds, sortOrder.GetOrElse(ESortOrder::Unimportant), TString(SortedColumnMemberName)); + auto processedItemType = ctx.Builder(pos) + .Callable("StreamItemType") + .Callable(0, "TypeOf") + .Add(0, stream) + .Seal() + .Seal() + .Build(); + auto WinFramesCollectorResult = BuildWinFramesCollector(pos, stream, processedItemType, params, rangeCallableName, streamDependsOn, ctx); + auto arg = ctx.NewArgument(pos, "row"); + + auto body = HandleIncrementalOutput(pos, arg, traits, WinFramesCollectorResult.Queue, ctx); + auto lambda = ctx.NewLambda(pos, ctx.NewArguments(pos, {arg}), std::move(body)); + + return ctx.Builder(pos) + .Callable("OrderedMap") + .Add(0, WinFramesCollectorResult.WinFramesCollector) + .Add(1, lambda) + .Seal() + .Build(); +} + +TExprNode::TPtr ProcessRowsAndNumericRangeFrames(TPositionHandle pos, + const TExprNode::TPtr& input, + const TStructExprType& rowType, + const TExprNode::TPtr& dependsOn, + const TExprNode::TPtr& frames, + const TMaybe<TString>& partitionRowsColumn, + TMaybe<ESortOrder> sortOrder, + TStringBuf rangeCallableName, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { if (frames->ChildrenSize() == 0) { return input; @@ -2919,25 +3960,42 @@ TExprNode::TPtr ProcessRowsFrames(TPositionHandle pos, const TExprNode::TPtr& in TExprNode::TPtr processed = input; TExprNode::TPtr dataQueue; TQueueParams queueParams; - TVector<TChain1MapTraits::TPtr> traits = BuildFoldMapTraits(queueParams, frames, partitionRowsColumn, rowType, ctx); - if (queueParams.DataQueueNeeded) { - ui64 queueSize = (queueParams.DataOutpace == Max<ui64>()) ? Max<ui64>() : (queueParams.DataOutpace + queueParams.DataLag + 2); - dataQueue = BuildQueue(pos, rowType, queueSize, queueParams.DataLag, dependsOn, ctx); - processed = ctx.Builder(pos) - .Callable("PreserveStream") - .Add(0, processed) - .Add(1, dataQueue) - .Add(2, BuildUint64(pos, queueParams.DataOutpace, ctx)) - .Seal() - .Build(); + // Deduplicate all same bounds. + TCoreWinFrameCollectorBounds<TString> bounds(/*dedup=*/true); + TCoreWinFrameCollectorBounds<TString> incrementalBounds(/*dedup=*/true); + TVector<TChain1MapTraits::TPtr> traits = BuildFoldMapTraitsForRowsAndNumericRanges(queueParams, bounds, incrementalBounds, frames, partitionRowsColumn, rowType, ctx, typeCtx); + + if (IsWindowNewPipelineEnabled(typeCtx)) { + if (!bounds.Empty()) { + TCoreWinFramesCollectorParams params(bounds, sortOrder.GetOrElse(ESortOrder::Unimportant), TString(SortedColumnMemberName)); + auto WinFramesCollectorResult = BuildWinFramesCollector(pos, processed, rowType, params, rangeCallableName, dependsOn, ctx); + dataQueue = WinFramesCollectorResult.Queue; + processed = WinFramesCollectorResult.WinFramesCollector; + } + } else { + YQL_ENSURE(bounds.Empty(), "Bounds should be filled only inside new pipeline."); + if (queueParams.DataQueueNeeded) { + ui64 queueSize = (queueParams.DataOutpace == Max<ui64>()) ? Max<ui64>() : (queueParams.DataOutpace + queueParams.DataLag + 2); + dataQueue = BuildQueue(pos, rowType, queueSize, queueParams.DataLag, dependsOn, ctx); + processed = ctx.Builder(pos) + .Callable("PreserveStream") + .Add(0, processed) + .Add(1, dataQueue) + .Add(2, BuildUint64(pos, queueParams.DataOutpace, ctx)) + .Seal() + .Build(); + } } + bool haveLagQueue = !IsWindowNewPipelineEnabled(typeCtx) && queueParams.LagQueueSize != 0; + ui64 lagQueueSize = IsWindowNewPipelineEnabled(typeCtx) ? 0: queueParams.LagQueueSize; + processed = ctx.Builder(pos) .Callable("OrderedMap") .Callable(0, "Chain1Map") .Add(0, std::move(processed)) - .Add(1, BuildChain1MapInitLambda(pos, traits, dataQueue, queueParams.LagQueueSize, queueParams.LagQueueItemType, ctx)) - .Add(2, BuildChain1MapUpdateLambda(pos, traits, dataQueue, queueParams.LagQueueSize != 0, ctx)) + .Add(1, BuildChain1MapInitLambda(pos, traits, dataQueue, lagQueueSize, queueParams.LagQueueItemType, ctx, typeCtx)) + .Add(2, BuildChain1MapUpdateLambda(pos, traits, dataQueue, haveLagQueue, ctx, typeCtx)) .Seal() .Lambda(1) .Param("pair") @@ -2949,29 +4007,39 @@ TExprNode::TPtr ProcessRowsFrames(TPositionHandle pos, const TExprNode::TPtr& in .Seal() .Build(); + if (IsWindowNewPipelineEnabled(typeCtx)) { + if (!incrementalBounds.Empty()) { + processed = ProccessAllIncrementalShifts(pos, processed, incrementalBounds, dependsOn, traits, sortOrder, rangeCallableName, ctx); + } + } else { + YQL_ENSURE(incrementalBounds.Empty(), "Incremental bounds should be filled only inside new pipeline."); + } + return WrapWithWinContext(processed, ctx); } -TExprNode::TPtr ProcessRangeFrames(TPositionHandle pos, const TExprNode::TPtr& input, const TStructExprType& rowType, const TExprNode::TPtr& sortKey, const TExprNode::TPtr& frames, - const TMaybe<TString>& partitionRowsColumn, TExprContext& ctx) { +TExprNode::TPtr ProcessRangeNonNumericFrames(TPositionHandle pos, + const TExprNode::TPtr& input, + const TStructExprType& rowType, + const TExprNode::TPtr& sortKey, + const TExprNode::TPtr& frames, + const TMaybe<TString>& partitionRowsColumn, + TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { if (frames->ChildrenSize() == 0) { return input; } TExprNode::TPtr processed = input; - TQueueParams queueParams; - TVector<TChain1MapTraits::TPtr> traits = BuildFoldMapTraits(queueParams, frames, partitionRowsColumn, rowType, ctx); - YQL_ENSURE(!queueParams.DataQueueNeeded); - YQL_ENSURE(queueParams.LagQueueSize == 0); - YQL_ENSURE(queueParams.LagQueueItemType == nullptr); + TVector<TChain1MapTraits::TPtr> traits = BuildFoldMapTraitsForNonNumericRange(frames, rowType, partitionRowsColumn, ctx); // same processing as in WinOnRows processed = ctx.Builder(pos) .Callable("OrderedMap") .Callable(0, "Chain1Map") .Add(0, std::move(processed)) - .Add(1, BuildChain1MapInitLambda(pos, traits, nullptr, 0, nullptr, ctx)) - .Add(2, BuildChain1MapUpdateLambda(pos, traits, nullptr, false, ctx)) + .Add(1, BuildChain1MapInitLambda(pos, traits, nullptr, 0, nullptr, ctx, typeCtx)) + .Add(2, BuildChain1MapUpdateLambda(pos, traits, nullptr, false, ctx, typeCtx)) .Seal() .Lambda(1) .Param("pair") @@ -3178,10 +4246,16 @@ TExprNode::TPtr ProcessRangeFrames(TPositionHandle pos, const TExprNode::TPtr& i return processed; } -TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, const TExprNode::TPtr& inputList, const TExprNode::TPtr& keyColumns, - const TExprNode::TPtr& sortTraits, const TExprNode::TPtr& frames, const TExprNode::TPtr& sessionTraits, - const TExprNode::TPtr& sessionColumns, const TStructExprType& outputRowType, TExprContext& ctx, TTypeAnnotationContext& types) -{ +TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, + const TExprNode::TPtr& inputList, + const TExprNode::TPtr& keyColumns, + const TExprNode::TPtr& sortTraits, + const TExprNode::TPtr& frames, + const TExprNode::TPtr& sessionTraits, + const TExprNode::TPtr& sessionColumns, + const TStructExprType& outputRowType, + TExprContext& ctx, + TTypeAnnotationContext& types) { if (auto expanded = TryExpandNonCompactFullFrames(pos, inputList, keyColumns, sortTraits, frames, sessionTraits, sessionColumns, ctx)) { YQL_CLOG(INFO, Core) << "Expanded non-compact CalcOverWindow"; return expanded; @@ -3194,7 +4268,8 @@ TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, const TExprNode: TExprNode::TPtr sessionInit; TExprNode::TPtr sessionUpdate; ExtractSessionWindowParams(pos, sessionTraits, sessionKey, sessionKeyType, sessionParamsType, sessionSortTraits, sessionInit, sessionUpdate, ctx); - + auto splitResult = SplitFramesByType(frames, ctx, types); + auto sortOrderForNumeric = ExtractAndVerifyRangeSortOrder(splitResult.NumericRangesAndRows, ctx); const auto originalRowType = inputList->GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>(); TVector<const TItemExprType*> rowItems = originalRowType->GetItems(); if (sessionKeyType) { @@ -3202,6 +4277,9 @@ TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, const TExprNode: rowItems.push_back(ctx.MakeType<TItemExprType>(SessionStartMemberName, sessionKeyType)); rowItems.push_back(ctx.MakeType<TItemExprType>(SessionParamsMemberName, sessionParamsType)); } + if (ShouldAddSortedColumn(sortOrderForNumeric)) { + rowItems.push_back(GetSortedColumnType(sortTraits, ctx)); + } auto rowType = ctx.MakeType<TStructExprType>(rowItems); auto keySelector = BuildKeySelector(pos, *rowType->Cast<TStructExprType>(), keyColumns, ctx); @@ -3247,12 +4325,6 @@ TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, const TExprNode: sessionKey = sessionInit = sessionUpdate = {}; } - TExprNode::TPtr rowsFrames; - TExprNode::TPtr rangeFrames; - TExprNode::TPtr groupsFrames; - SplitFramesByType(frames, rowsFrames, rangeFrames, groupsFrames, ctx); - YQL_ENSURE(groupsFrames->ChildrenSize() == 0); - auto topLevelStreamArg = ctx.NewArgument(pos, "stream"); TExprNode::TPtr processed = topLevelStreamArg; @@ -3265,22 +4337,47 @@ TExprNode::TPtr ExpandSingleCalcOverWindow(TPositionHandle pos, const TExprNode: // All RANGE frames (even simplest RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) // will require additional memory to store TableRow()'s - so we want to start with minimum size of row // (i.e. process range frames first) - processed = ProcessRangeFrames(pos, processed, *rowType, originalSortKey, rangeFrames, partitionRowsColumn, ctx); - rowType = ApplyFramesToType(*rowType, outputRowType, *rangeFrames, ctx); - processed = ProcessRowsFrames(pos, processed, *rowType, topLevelStreamArg, rowsFrames, partitionRowsColumn, ctx); + processed = ProcessRangeNonNumericFrames(pos, processed, *rowType, originalSortKey, splitResult.NonNumericRanges, partitionRowsColumn, ctx, types); + rowType = ApplyFramesToType(*rowType, outputRowType, *splitResult.NonNumericRanges, ctx); + processed = ProcessRowsAndNumericRangeFrames(pos, processed, *rowType, topLevelStreamArg, splitResult.NumericRangesAndRows, partitionRowsColumn, sortOrderForNumeric, splitResult.NumericBoundsCallableName, ctx, types); auto topLevelStreamProcessingLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, {topLevelStreamArg}), std::move(processed)); YQL_CLOG(INFO, Core) << "Expanded compact CalcOverWindow"; auto res = BuildPartitionsByKeys(pos, input, keySelector, sortOrder, sortKey, topLevelStreamProcessingLambda, sessionKey, sessionInit, sessionUpdate, sessionColumns, ctx); + + + if (ShouldAddSortedColumn(sortOrderForNumeric)) { + res = PushSortedColumnInsideStream(res, ctx); + res = RemoveRowsColumn(pos, res, TString(SortedColumnMemberName), ctx); + } + if (partitionRowsColumn) { - res = RemovePartitionRowsColumn(pos, res, *partitionRowsColumn, ctx); + res = RemoveRowsColumn(pos, res, *partitionRowsColumn, ctx); } return res; } +bool IsUniversal(const TExprNode::TPtr& frameSpec) { + auto bounds = {GetSettingByName(frameSpec->Children(), "begin"), GetSettingByName(frameSpec->Children(), "end")}; + for (auto bound: bounds) { + if (!bound) { + continue; + } + + if (bound->GetTypeAnn() && bound->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Universal) { + return true; + } + + if (bound->IsList() && bound->ChildrenSize() >= 2 && bound->Child(1)->GetTypeAnn() && bound->Child(1)->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Universal) { + return true; + } + } + return false; +} + } // namespace TExprNode::TPtr ExpandCalcOverWindow(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& types) { @@ -3460,8 +4557,12 @@ bool IsUnbounded(const NNodes::TCoFrameBound& bound) { return false; } -bool IsCurrentRow(const NNodes::TCoFrameBound& bound) { - return bound.Setting().Value() == "currentRow"; +TWindowFrameSettings::TWindowFrameSettings(const TFrame& frameBounds, bool neverEmpty, bool compact, bool isAlwaysEmpty) + : FrameBounds_(frameBounds) + , NeverEmpty_(neverEmpty) + , Compact_(compact) + , IsAlwaysEmpty_(isAlwaysEmpty) +{ } TWindowFrameSettings TWindowFrameSettings::Parse(const TExprNode& node, TExprContext& ctx) { @@ -3472,218 +4573,101 @@ TWindowFrameSettings TWindowFrameSettings::Parse(const TExprNode& node, TExprCon } TMaybe<TWindowFrameSettings> TWindowFrameSettings::TryParse(const TExprNode& node, TExprContext& ctx, bool& isUniversal) { - isUniversal = false; - TWindowFrameSettings settings; - - if (node.IsCallable("WinOnRows")) { - settings.Type_ = EFrameType::FrameByRows; - } else if (node.IsCallable("WinOnRange")) { - settings.Type_ = EFrameType::FrameByRange; - } else { - YQL_ENSURE(node.IsCallable("WinOnGroups")); - settings.Type_ = EFrameType::FrameByGroups; - } auto frameSpec = node.Child(0); + isUniversal = false; if (frameSpec->Type() == TExprNode::List) { - bool hasBegin = false; - bool hasEnd = false; - - for (const auto& setting : frameSpec->Children()) { - if (!EnsureTupleMinSize(*setting, 1, ctx)) { - return {}; - } - - if (!EnsureAtom(setting->Head(), ctx)) { - return {}; - } - - const auto settingName = setting->Head().Content(); - if (settingName != "begin" && settingName != "end" && settingName != "compact") { - ctx.AddError( - TIssue(ctx.GetPosition(setting->Pos()), TStringBuilder() << "Invalid frame bound '" << settingName << "'")); - return {}; - } - - if (settingName == "compact") { - settings.Compact_ = true; - continue; - } - - if (!EnsureTupleSize(*setting, 2, ctx)) { - return {}; - } - - bool& hasBound = (settingName == "begin") ? hasBegin : hasEnd; - if (hasBound) { - ctx.AddError( - TIssue(ctx.GetPosition(setting->Pos()), TStringBuilder() << "Duplicate " << settingName << " frame bound detected")); - return {}; - } - - hasBound = true; - TMaybe<i32>& boundOffset = (settingName == "begin") ? settings.FirstOffset_ : settings.LastOffset_; - TExprNode::TPtr& frameBound = (settingName == "begin") ? settings.First_ : settings.Last_; - - if (setting->Tail().IsList()) { - TExprNode::TPtr fb = setting->TailPtr(); - if (!EnsureTupleMinSize(*fb, 1, ctx)) { - return {}; - } - if (!EnsureAtom(fb->Head(), ctx)) { - return {}; - } - - auto type = fb->Head().Content(); - if (type == "currentRow") { - if (fb->ChildrenSize() == 1) { - if (!node.IsCallable("WinOnRange")) { - ctx.AddError(TIssue(ctx.GetPosition(fb->Pos()), TStringBuilder() << "currentRow should only be used for RANGE")); - return {}; - } - frameBound = fb; - continue; - } - ctx.AddError(TIssue(ctx.GetPosition(fb->Pos()), TStringBuilder() << "Expecting no value for '" << type << "'")); - return {}; - } - - if (!(type == "preceding" || type == "following")) { - ctx.AddError(TIssue(ctx.GetPosition(fb->Pos()), TStringBuilder() << "Expecting preceding or following, but got '" << type << "'")); - return {}; - } - - if (!EnsureTupleSize(*fb, 2, ctx)) { - return {}; - } - - auto boundValue = fb->ChildPtr(1); - if (boundValue->IsAtom()) { - if (boundValue->Content() == "unbounded") { - frameBound = fb; - continue; - } - ctx.AddError(TIssue(ctx.GetPosition(fb->Pos()), TStringBuilder() << "Expecting unbounded, but got '" << boundValue->Content() << "'")); - return {}; - } - - if (node.IsCallable({"WinOnRows", "WinOnGroups"})) { - if (!EnsureDataType(*boundValue, ctx)) { - return {}; - } - - auto slot = boundValue->GetTypeAnn()->Cast<TDataExprType>()->GetSlot(); - bool groups = node.IsCallable("WinOnGroups"); - if (!IsDataTypeIntegral(slot)) { - ctx.AddError(TIssue(ctx.GetPosition(boundValue->Pos()), - TStringBuilder() << "Expecting integral values for " << (groups ? "GROUPS" : "ROWS") << " but got " << *boundValue->GetTypeAnn())); - return {}; - } - - if (!groups) { - auto maybeIntLiteral = TMaybeNode<TCoIntegralCtor>(boundValue); - if (!maybeIntLiteral) { - // TODO: this is not strictly necessary, and only needed for current implementation via Queue - ctx.AddError(TIssue(ctx.GetPosition(boundValue->Pos()), - TStringBuilder() << "Expecting literal values for ROWS")); - return {}; - } - auto strLiteralValue = maybeIntLiteral.Cast().Literal().Value(); - if (strLiteralValue.StartsWith("-")) { - ctx.AddError(TIssue(ctx.GetPosition(boundValue->Pos()), - TStringBuilder() << "Expecting positive literal values for ROWS, but got " << strLiteralValue)); - return {}; - } - - ui64 literalValue = FromString<ui64>(strLiteralValue); - if (literalValue > std::numeric_limits<i32>::max()) { - ctx.AddError(TIssue(ctx.GetPosition(boundValue->Pos()), - TStringBuilder() << "ROWS offset too big: " << strLiteralValue << ", maximum is " << std::numeric_limits<i32>::max())); - return {}; - } - - i32 castedValue = (i32)literalValue; - if (type == "preceding") { - castedValue = -castedValue; - } - boundOffset = castedValue; - } - } else if (!EnsureComparableType(boundValue->Pos(), *boundValue->GetTypeAnn(), ctx)) { - return {}; - } - frameBound = fb; - } else if (setting->Tail().IsCallable("Int32")) { - auto& valNode = setting->Tail().Head(); - YQL_ENSURE(valNode.IsAtom()); - i32 value; - YQL_ENSURE(TryFromString(valNode.Content(), value)); - boundOffset = value; - } else if (!setting->Tail().IsCallable("Void")) { - const TTypeAnnotationNode* type = setting->Tail().GetTypeAnn(); - if (type && type->GetKind() == ETypeAnnotationKind::Universal) { - isUniversal = true; - return TWindowFrameSettings{}; - } - - TStringBuilder errMsg; - if (!type) { - errMsg << "lambda"; - } else if (setting->Tail().IsCallable()) { - errMsg << setting->Tail().Content() << " with type " << *type; - } else { - errMsg << *type; - } - - ctx.AddError(TIssue(ctx.GetPosition(setting->Tail().Pos()), - TStringBuilder() << "Invalid " << settingName << " frame bound - expecting Void or Int32 callable, but got: " << errMsg)); - return {}; - } + if (!VerifySettings(frameSpec->Children(), ctx)) { + return {}; } - - if (!hasBegin || !hasEnd) { - ctx.AddError(TIssue(ctx.GetPosition(frameSpec->Pos()), - TStringBuilder() << "Missing " << (!hasBegin ? "begin" : "end") << " bound in frame definition")); + isUniversal = IsUniversal(frameSpec); + if (isUniversal) { return {}; } - } else if (frameSpec->IsCallable("Void")) { - settings.FirstOffset_ = {}; - settings.LastOffset_ = 0; + return TryParseWindowFrameSettingsFromList(node, ctx); } else { const TTypeAnnotationNode* type = frameSpec->GetTypeAnn(); ctx.AddError(TIssue(ctx.GetPosition(frameSpec->Pos()), - TStringBuilder() << "Invalid window frame - expecting Tuple or Void, but got: " << (type ? FormatType(type) : "lambda"))); + TStringBuilder() << "Invalid window frame - expecting Tuple, but got: " << (type ? FormatType(type) : "lambda"))); return {}; } +} - // frame will always contain rows if it includes current row - if (!settings.FirstOffset_) { - settings.NeverEmpty_ = !settings.LastOffset_.Defined() || *settings.LastOffset_ >= 0; - } else if (!settings.LastOffset_.Defined()) { - settings.NeverEmpty_ = !settings.FirstOffset_.Defined() || *settings.FirstOffset_ <= 0; - } else { - settings.NeverEmpty_ = *settings.FirstOffset_ <= *settings.LastOffset_ && *settings.FirstOffset_ <= 0 && *settings.LastOffset_ >= 0; - } +bool TWindowFrameSettings::IsFullPartition() const { + return IsLeftInf() && IsRightInf(); +} - return settings; +EFrameType TWindowFrameSettings::GetFrameType() const { + return std::visit(TOverloaded{ + [&](const TRowFrame&) { + return FrameByRows; + }, + [&](const TRangeFrame&) { + return FrameByRange; + }, + [&](const TGroupsFrame&) { + return FrameByGroups; + }, + }, FrameBounds_); } -TMaybe<i32> TWindowFrameSettings::GetFirstOffset() const { - YQL_ENSURE(Type_ == FrameByRows); - return FirstOffset_; +bool TWindowFrameSettings::IsLeftInf() const { + return std::visit(TOverloaded{ + [&](const TRowFrame& rowFrame) { + return !rowFrame.first.Defined(); + }, + [&](const TRangeFrame& rangeFrame) { + return rangeFrame.GetFirst().IsInf(); + }, + [&](const TGroupsFrame&) { + YQL_ENSURE(0, "Not implemented."); + return false; + }, + }, FrameBounds_); } -TMaybe<i32> TWindowFrameSettings::GetLastOffset() const { - YQL_ENSURE(Type_ == FrameByRows); - return LastOffset_; +bool TWindowFrameSettings::IsRightInf() const { + return std::visit(TOverloaded{ + [&](const TRowFrame& rowFrame) { + return !rowFrame.second.Defined(); + }, + [&](const TRangeFrame& rangeFrame) { + return rangeFrame.GetLast().IsInf(); + }, + [&](const TGroupsFrame&) { + YQL_ENSURE(0, "Not implemented."); + return false; + }, + }, FrameBounds_); } -TCoFrameBound TWindowFrameSettings::GetFirst() const { - YQL_ENSURE(First_); - return TCoFrameBound(First_); +bool TWindowFrameSettings::IsLeftCurrent() const { + return std::visit(TOverloaded{ + [&](const TRowFrame& rowFrame) { + return rowFrame.first.Defined() && rowFrame.first == 0; + }, + [&](const TRangeFrame& rangeFrame) { + return !rangeFrame.GetFirst().IsInf() && rangeFrame.GetFirst().GetUnderlyingValue() == "0"; + }, + [&](const TGroupsFrame&) { + YQL_ENSURE(0, "Not implemented."); + return false; + }, + }, FrameBounds_); } -TCoFrameBound TWindowFrameSettings::GetLast() const { - YQL_ENSURE(Last_); - return TCoFrameBound(Last_); +bool TWindowFrameSettings::IsRightCurrent() const { + return std::visit(TOverloaded{ + [&](const TRowFrame& rowFrame) { + return rowFrame.second.Defined() && rowFrame.second == 0; + }, + [&](const TRangeFrame& rangeFrame) { + return !rangeFrame.GetLast().IsInf() && rangeFrame.GetLast().GetUnderlyingValue() == "0"; + }, + [&](const TGroupsFrame&) { + YQL_ENSURE(0, "Not implemented."); + return false; + }, + }, FrameBounds_); } TExprNode::TPtr ZipWithSessionParamsLambda(TPositionHandle pos, const TExprNode::TPtr& partitionKeySelector, diff --git a/yql/essentials/core/yql_opt_window.h b/yql/essentials/core/yql_opt_window.h index 3d7912aa870..41d0ab15297 100644 --- a/yql/essentials/core/yql_opt_window.h +++ b/yql/essentials/core/yql_opt_window.h @@ -1,5 +1,10 @@ #pragma once + #include <yql/essentials/core/expr_nodes/yql_expr_nodes.h> +#include <yql/essentials/core/sql_types/window_number_and_direction.h> +#include <yql/essentials/core/sql_types/sort_order.h> + +#include <util/generic/overloaded.h> namespace NYql { @@ -18,32 +23,117 @@ enum EFrameType { using NNodes::TCoWinOnBase; using NNodes::TCoFrameBound; -bool IsUnbounded(const NNodes::TCoFrameBound& bound); -bool IsCurrentRow(const NNodes::TCoFrameBound& bound); +enum class EFrameBoundsType: ui8 { + EMPTY, + LAGGING, + CURRENT, + LEADING, + FULL, + GENERIC, +}; + +enum class EFrameBoundsNewType: ui8 { + EMPTY, + INCREMENTAL, + FULL, + GENERIC, +}; class TWindowFrameSettings { public: + using TRowFrame = std::pair<TMaybe<i32>, TMaybe<i32>>; + + class TRangeFrame { + public: + using ESortOrder = NYql::ESortOrder; + using TBoundType = NYql::NWindow::TNumberAndDirection<TString>; + + TRangeFrame(std::pair<TBoundType, TBoundType> frame, bool isNumeric, ESortOrder sortOrder, const TString& boundsCallable) + : Frame_(frame) + , IsNumeric_(isNumeric) + , SortOrder_(sortOrder) + , BoundsCallable_(boundsCallable) + { + } + + const TBoundType& GetFirst() const { + return Frame_.first; + } + const TBoundType& GetLast() const { + return Frame_.second; + } + + bool IsNumeric() const { + return IsNumeric_; + } + + ESortOrder GetSortOrder() const { + return SortOrder_; + } + + TStringBuf BoundsCallable() const { + return BoundsCallable_; + } + + private: + std::pair<TBoundType, TBoundType> Frame_; + bool IsNumeric_; + ESortOrder SortOrder_; + TString BoundsCallable_; + }; + + using TGroupsFrame = std::monostate; + + using TFrame = std::variant<TRowFrame, TRangeFrame, TGroupsFrame>; + + TWindowFrameSettings(const TFrame& frameBounds, bool neverEmpty, bool compact, bool isAlwaysEmpty); + static TWindowFrameSettings Parse(const TExprNode& node, TExprContext& ctx); static TMaybe<TWindowFrameSettings> TryParse(const TExprNode& node, TExprContext& ctx, bool& isUniversal); - // This two functions can only be used for FrameByRows or FrameByGroups - TMaybe<i32> GetFirstOffset() const; - TMaybe<i32> GetLastOffset() const; + bool IsNonEmpty() const { + return NeverEmpty_; + } + + bool IsCompact() const { + return Compact_; + } + + bool IsAlwaysEmpty() const { + return IsAlwaysEmpty_; + } + + EFrameType GetFrameType() const; - TCoFrameBound GetFirst() const; - TCoFrameBound GetLast() const; + bool IsFullPartition() const; + + const TRowFrame& GetRowFrame() const { + YQL_ENSURE(GetFrameType() == FrameByRows); + return std::get<TRowFrame>(FrameBounds_); + } + + const TRangeFrame& GetRangeFrame() const { + YQL_ENSURE(GetFrameType() == FrameByRange); + return std::get<TRangeFrame>(FrameBounds_); + } + + const TGroupsFrame& GetGroupsFrame() const { + YQL_ENSURE(GetFrameType() == FrameByGroups); + return std::get<TGroupsFrame>(FrameBounds_); + } + + bool IsLeftInf() const; + bool IsRightInf() const; + + bool IsLeftCurrent() const; + bool IsRightCurrent() const; - bool IsNonEmpty() const { return NeverEmpty_; } - bool IsCompact() const { return Compact_; } - EFrameType GetFrameType() const { return Type_; } private: - EFrameType Type_ = FrameByRows; - TExprNode::TPtr First_; - TMaybe<i32> FirstOffset_; - TExprNode::TPtr Last_; - TMaybe<i32> LastOffset_; + TFrame FrameBounds_; + bool NeverEmpty_ = false; bool Compact_ = false; + bool IsAlwaysEmpty_ = false; }; struct TSessionWindowParams { diff --git a/yql/essentials/core/yql_type_annotation.h b/yql/essentials/core/yql_type_annotation.h index 52d0d7cfbe5..4d74e8a33a2 100644 --- a/yql/essentials/core/yql_type_annotation.h +++ b/yql/essentials/core/yql_type_annotation.h @@ -443,6 +443,7 @@ struct TTypeAnnotationContext: public TThrRefBase { THashMap<std::tuple<TString, TString, const TTypeAnnotationNode*>, TUdfCachedInfo> UdfTypeCache; // (name,typecfg,type)->info bool UseTableMetaFromGraph = false; bool DiscoveryMode = false; + bool WindowNewPipeline = false; bool ForceDq = false; bool DqCaptured = false; // TODO: Add before/after recapture transformers EFallbackPolicy DqFallbackPolicy = EFallbackPolicy::Default; diff --git a/yql/essentials/core/yql_window_features.cpp b/yql/essentials/core/yql_window_features.cpp new file mode 100644 index 00000000000..e71705e404d --- /dev/null +++ b/yql/essentials/core/yql_window_features.cpp @@ -0,0 +1,23 @@ +#include "yql_window_features.h" + +#include <yql/essentials/core/yql_opt_utils.h> +#include <yql/essentials/core/yql_type_annotation.h> +#include <yql/essentials/minikql/mkql_runtime_version.h> + +namespace NYql { + +bool IsRangeWindowFrameEnabled(TTypeAnnotationContext& types) { + return IsWindowNewPipelineEnabled(types) && types.LangVer >= MakeLangVersion(2025, 5); +} + +bool IsWindowNewPipelineEnabled(TTypeAnnotationContext& types) { + if (types.WindowNewPipeline && NKikimr::NMiniKQL::RuntimeVersion >= 72u) { + // The new window pipeline generates code that is not robust to the absence of ForbidConstantDepends. + // Therefore, we must ensure that it is enabled. + YQL_ENSURE(IsForbidConstantDependsEnabled(types), "This feature must be enabled."); + return true; + } + return false; +} + +} // namespace NYql diff --git a/yql/essentials/core/yql_window_features.h b/yql/essentials/core/yql_window_features.h new file mode 100644 index 00000000000..7bba4a8cc3d --- /dev/null +++ b/yql/essentials/core/yql_window_features.h @@ -0,0 +1,11 @@ +#pragma once + +namespace NYql { + +struct TTypeAnnotationContext; + +bool IsRangeWindowFrameEnabled(TTypeAnnotationContext& types); + +bool IsWindowNewPipelineEnabled(TTypeAnnotationContext& types); + +} // namespace NYql diff --git a/yql/essentials/core/yql_window_frames_collector_params_serializer.cpp b/yql/essentials/core/yql_window_frames_collector_params_serializer.cpp new file mode 100644 index 00000000000..2f7d307479b --- /dev/null +++ b/yql/essentials/core/yql_window_frames_collector_params_serializer.cpp @@ -0,0 +1,190 @@ +#include <yql/essentials/core/yql_window_frames_collector_params_serializer.h> + +namespace NYql::NWindow { + +namespace { + +constexpr TStringBuf KeyMin = "Min"; +constexpr TStringBuf KeyMax = "Max"; +constexpr TStringBuf KeyRangeIntervals = "RangeIntervals"; +constexpr TStringBuf KeyRowIntervals = "RowIntervals"; +constexpr TStringBuf KeyRangeIncrementals = "RangeIncrementals"; +constexpr TStringBuf KeyRowIncrementals = "RowIncrementals"; +constexpr TStringBuf KeySortOrder = "SortOrder"; +constexpr TStringBuf KeyBounds = "Bounds"; +constexpr TStringBuf KeySortColumnName = "SortColumnName"; +constexpr TStringBuf KeyDirection = "Direction"; +constexpr TStringBuf KeyNumber = "Number"; + +TExprNode::TPtr BuildNumberVariantType(TPositionHandle pos, TStringBuf dataTypeName, TExprContext& ctx) { + return ctx.Builder(pos) + .Callable("VariantType") + .Callable(0, "StructType") + .List(0) + .Atom(0, "Unbounded") + .Callable(1, "VoidType") + .Seal() + .Seal() + .List(1) + .Atom(0, "Bounded") + .Callable(1, "DataType") + .Atom(0, dataTypeName) + .Seal() + .Seal() + .Seal() + .Seal() + .Build(); +} + +template <typename T> +TExprNode::TPtr SerializeNumberAndDirection( + TPositionHandle pos, + const TNumberAndDirection<T>& value, + TStringBuf dataTypeName, + TStringBuf callableName, + TExprContext& ctx) +{ + auto variantType = BuildNumberVariantType(pos, dataTypeName, ctx); + + TExprNode::TPtr numberVariant; + if (value.IsInf()) { + numberVariant = ctx.Builder(pos) + .Callable("Variant") + .Callable(0, "Void") + .Seal() + .Atom(1, "Unbounded") + .Add(2, variantType) + .Seal() + .Build(); + } else { + numberVariant = ctx.Builder(pos) + .Callable("Variant") + .Callable(0, callableName) + .Atom(0, ToString(value.GetUnderlyingValue())) + .Seal() + .Atom(1, "Bounded") + .Add(2, variantType) + .Seal() + .Build(); + } + + return ctx.Builder(pos) + .Callable("AsStruct") + .List(0) + .Atom(0, KeyDirection) + .Callable(1, "String") + .Atom(0, DirectionToString(value.GetDirection())) + .Seal() + .Seal() + .List(1) + .Atom(0, KeyNumber) + .Add(1, numberVariant) + .Seal() + .Seal() + .Build(); +} + +template <typename T> +TExprNode::TPtr SerializeWindowFrame( + TPositionHandle pos, + const TWindowFrame<TNumberAndDirection<T>>& frame, + TStringBuf dataTypeName, + TStringBuf callableName, + TExprContext& ctx) +{ + return ctx.Builder(pos) + .Callable("AsStruct") + .List(0) + .Atom(0, KeyMin) + .Add(1, SerializeNumberAndDirection(pos, frame.Min(), dataTypeName, callableName, ctx)) + .Seal() + .List(1) + .Atom(0, KeyMax) + .Add(1, SerializeNumberAndDirection(pos, frame.Max(), dataTypeName, callableName, ctx)) + .Seal() + .Seal() + .Build(); +} + +TExprNode::TPtr SerializeWindowFrameAggregatedBounds(TPositionHandle pos, const TCoreWinFrameCollectorBounds<TString>& bounds, TStringBuf rangeCallableName, TExprContext& ctx) { + TExprNodeList rangeIntervalItems; + for (const auto& frame : bounds.RangeIntervals()) { + rangeIntervalItems.push_back(SerializeWindowFrame(pos, frame, rangeCallableName, rangeCallableName, ctx)); + } + + TExprNodeList rowIntervalItems; + for (const auto& frame : bounds.RowIntervals()) { + rowIntervalItems.push_back(SerializeWindowFrame(pos, frame, "Uint64", "Uint64", ctx)); + } + + TExprNodeList rangeIncrementalItems; + for (const auto& item : bounds.RangeIncrementals()) { + rangeIncrementalItems.push_back(SerializeNumberAndDirection(pos, item, rangeCallableName, rangeCallableName, ctx)); + } + + TExprNodeList rowIncrementalItems; + for (const auto& item : bounds.RowIncrementals()) { + rowIncrementalItems.push_back(SerializeNumberAndDirection(pos, item, "Uint64", "Uint64", ctx)); + } + + return ctx.Builder(pos) + .Callable("AsStruct") + .List(0) + .Atom(0, KeyRangeIntervals) + .Callable(1, "AsList") + .Add(std::move(rangeIntervalItems)) + .Seal() + .Seal() + .List(1) + .Atom(0, KeyRowIntervals) + .Callable(1, "AsList") + .Add(std::move(rowIntervalItems)) + .Seal() + .Seal() + .List(2) + .Atom(0, KeyRangeIncrementals) + .Callable(1, "AsList") + .Add(std::move(rangeIncrementalItems)) + .Seal() + .Seal() + .List(3) + .Atom(0, KeyRowIncrementals) + .Callable(1, "AsList") + .Add(std::move(rowIncrementalItems)) + .Seal() + .Seal() + .Seal() + .Build(); +} + +} // anonymous namespace + +TExprNode::TPtr SerializeWindowAggregatorParamsToExpr( + const TStringCoreWinFramesCollectorParams& params, + TPositionHandle pos, + TStringBuf rangeCallableName, + TExprContext& ctx) +{ + return ctx.Builder(pos) + .Callable("AsStruct") + .List(0) + .Atom(0, KeySortOrder) + .Callable(1, "String") + .Atom(0, ToString(params.GetSortOrder())) + .Seal() + .Seal() + .List(1) + .Atom(0, KeyBounds) + .Add(1, SerializeWindowFrameAggregatedBounds(pos, params.GetBounds(), rangeCallableName, ctx)) + .Seal() + .List(2) + .Atom(0, KeySortColumnName) + .Callable(1, "String") + .Atom(0, params.GetSortColumnName()) + .Seal() + .Seal() + .Seal() + .Build(); +} + +} // namespace NYql::NWindow diff --git a/yql/essentials/core/yql_window_frames_collector_params_serializer.h b/yql/essentials/core/yql_window_frames_collector_params_serializer.h new file mode 100644 index 00000000000..7887acbf7db --- /dev/null +++ b/yql/essentials/core/yql_window_frames_collector_params_serializer.h @@ -0,0 +1,14 @@ +#pragma once + +#include <yql/essentials/core/sql_types/window_frames_collector_params.h> +#include <yql/essentials/ast/yql_expr.h> + +namespace NYql::NWindow { + +TExprNode::TPtr SerializeWindowAggregatorParamsToExpr( + const TStringCoreWinFramesCollectorParams& params, + TPositionHandle pos, + TStringBuf rangeCallableName, + TExprContext& ctx); + +} // namespace NYql::NWindow diff --git a/yql/essentials/data/language/pragmas_opensource.json b/yql/essentials/data/language/pragmas_opensource.json index 6b61b13e210..19c63ec83c0 100644 --- a/yql/essentials/data/language/pragmas_opensource.json +++ b/yql/essentials/data/language/pragmas_opensource.json @@ -1059,6 +1059,9 @@ "name": "DisableWarnUntypedStringLiterals" }, { + "name": "DisableWindowNewPipeline" + }, + { "name": "DiscoveryMode" }, { @@ -1239,6 +1242,9 @@ "name": "WarningMsg" }, { + "name": "WindowNewPipeline" + }, + { "name": "YqlSelect" }, { diff --git a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp index d96bb0ff200..094878197d3 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp @@ -275,6 +275,8 @@ struct TCallableComputationNodeBuilderFuncMapFiller { {"QueueRange", &WrapQueueRange}, {"Seq", &WrapSeq}, {"PreserveStream", &WrapPreserveStream}, + {"WinFramesCollector", &WrapWinFramesCollector}, + {"WinFrame", &WrapWinFrame}, {"FromYsonSimpleType", &WrapFromYsonSimpleType}, {"TryWeakMemberFromDict", &WrapTryWeakMemberFromDict}, {"TimezoneId", &WrapTimezoneId}, diff --git a/yql/essentials/minikql/comp_nodes/mkql_queue.cpp b/yql/essentials/minikql/comp_nodes/mkql_queue.cpp index 32345df9859..ecd3782d68f 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_queue.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_queue.cpp @@ -1,8 +1,12 @@ #include "mkql_queue.h" +#include "mkql_window_frames_collector_params_deserializer.h" #include <yql/essentials/minikql/computation/mkql_computation_node_holders.h> #include <yql/essentials/minikql/mkql_node_cast.h> #include <yql/essentials/minikql/mkql_program_builder.h> +#include <yql/essentials/core/sql_types/window_frame_bounds.h> +#include <yql/essentials/minikql/mkql_core_win_frames_collector.h> +#include <yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h> #include <yql/essentials/public/udf/udf_string.h> namespace NKikimr { @@ -19,25 +23,28 @@ public: , Buffer(capacity, TUnboxedValue(), initSize) , BufferBytes(CurrentMemUsage()) { - MKQL_MEM_TAKE(memInfo, &Buffer, BufferBytes); } ~TQueueResource() { - Y_DEBUG_ABORT_UNLESS(BufferBytes == CurrentMemUsage()); - MKQL_MEM_RETURN(GetMemInfo(), &Buffer, CurrentMemUsage()); Buffer.Clear(); } void UpdateBufferStats() { - MKQL_MEM_RETURN(GetMemInfo(), &Buffer, BufferBytes); BufferBytes = CurrentMemUsage(); - MKQL_MEM_TAKE(GetMemInfo(), &Buffer, BufferBytes); } TSafeCircularBuffer<TUnboxedValue>& GetBuffer() { return Buffer; } + const TFrameBoundsIndices& GetFrameBoundsIndices() const { + return FrameBoundsIndices; + } + + TFrameBoundsIndices& GetFrameBoundsIndices() { + return FrameBoundsIndices; + } + private: NUdf::TStringRef GetResourceTag() const override { return NUdf::TStringRef(ResourceTag); @@ -53,6 +60,7 @@ private: const TStringBuf ResourceTag; TSafeCircularBuffer<TUnboxedValue> Buffer; + TFrameBoundsIndices FrameBoundsIndices; size_t BufferBytes; }; @@ -60,6 +68,8 @@ class TQueueResourceUser { public: TQueueResourceUser(TStringBuf&& tag, IComputationNode* resource); TSafeCircularBuffer<NUdf::TUnboxedValue>& CheckAndGetBuffer(const NUdf::TUnboxedValuePod& resource) const; + TFrameBoundsIndices& CheckAndGetFrameBoundsIndices(const NUdf::TUnboxedValuePod& resource); + const TFrameBoundsIndices& CheckAndGetFrameBoundsIndices(const NUdf::TUnboxedValuePod& resource) const; void UpdateBufferStats(const NUdf::TUnboxedValuePod& resource) const; protected: @@ -79,6 +89,14 @@ TSafeCircularBuffer<TUnboxedValue>& TQueueResourceUser::CheckAndGetBuffer(const return GetResource(resource).GetBuffer(); } +TFrameBoundsIndices& TQueueResourceUser::CheckAndGetFrameBoundsIndices(const NUdf::TUnboxedValuePod& resource) { + return GetResource(resource).GetFrameBoundsIndices(); +} + +const TFrameBoundsIndices& TQueueResourceUser::CheckAndGetFrameBoundsIndices(const NUdf::TUnboxedValuePod& resource) const { + return GetResource(resource).GetFrameBoundsIndices(); +} + void TQueueResourceUser::UpdateBufferStats(const TUnboxedValuePod& resource) const { GetResource(resource).UpdateBufferStats(); } @@ -89,6 +107,85 @@ TQueueResource& TQueueResourceUser::GetResource(const TUnboxedValuePod& resource return *static_cast<TQueueResource*>(resource.GetResource()); } +template <bool AlwaysExist> +class TQueueRange: public TComputationValue<TQueueRange<AlwaysExist>>, public TQueueResourceUser { +public: + class TIterator: public TComputationValue<TIterator>, public TQueueResourceUser { + public: + TIterator(TMemoryUsageInfo* memInfo, TUnboxedValue queue, size_t begin, size_t end, ui64 generation, TStringBuf tag, IComputationNode* resource) + : TComputationValue<TIterator>(memInfo) + , TQueueResourceUser(std::move(tag), resource) + , Queue(queue) + , Buffer(CheckAndGetBuffer(queue)) + , Current(begin) + , End(end) + , Generation(generation) + { + } + + private: + bool Next(NUdf::TUnboxedValue& value) override { + MKQL_ENSURE(Generation == Buffer.Generation(), + "Queue generation changed while doing QueueRange: expected " << Generation << ", got: " << Buffer.Generation()); + if (Current >= End) { + return false; + } + + const auto& valRef = Buffer.Get(Current++); + value = !valRef ? NUdf::TUnboxedValuePod() : valRef.MakeOptional(); + return true; + } + + bool Skip() override { + if (Current >= End) { + return false; + } + Current++; + return true; + } + + const TUnboxedValue Queue; + const TSafeCircularBuffer<TUnboxedValue>& Buffer; + size_t Current; + const size_t End; + const ui64 Generation; + }; + + TQueueRange(TMemoryUsageInfo* memInfo, TComputationContext& compCtx, TUnboxedValue queue, size_t begin, size_t end, TStringBuf tag, IComputationNode* resource) + : TComputationValue<TQueueRange<AlwaysExist>>(memInfo) + , TQueueResourceUser(std::move(tag), resource) + , CompCtx(compCtx) + , Queue(queue) + , Begin(begin) + , End(std::min(end, CheckAndGetBuffer(Queue).Size())) + , Generation(CheckAndGetBuffer(Queue).Generation()) + { + } + +private: + ui64 GetListLength() const final { + return Begin < End ? (End - Begin) : 0; + } + + bool HasListItems() const final { + return GetListLength() != 0; + } + + bool HasFastListLength() const final { + return true; + } + + NUdf::TUnboxedValue GetListIterator() const final { + return CompCtx.HolderFactory.Create<TIterator>(Queue, Begin, End, Generation, Tag, Resource); + } + + TComputationContext& CompCtx; + const TUnboxedValue Queue; + const size_t Begin; + const size_t End; + const ui64 Generation; +}; + class TQueueCreateWrapper: public TMutableComputationNode<TQueueCreateWrapper> { typedef TMutableComputationNode<TQueueCreateWrapper> TBaseComputation; @@ -203,84 +300,6 @@ class TQueueRangeWrapper: public TMutableComputationNode<TQueueRangeWrapper>, pu typedef TMutableComputationNode<TQueueRangeWrapper> TBaseComputation; public: - class TValue: public TComputationValue<TValue>, public TQueueResourceUser { - public: - class TIterator: public TComputationValue<TIterator>, public TQueueResourceUser { - public: - TIterator(TMemoryUsageInfo* memInfo, TUnboxedValue queue, size_t begin, size_t end, ui64 generation, TStringBuf tag, IComputationNode* resource) - : TComputationValue<TIterator>(memInfo) - , TQueueResourceUser(std::move(tag), resource) - , Queue(queue) - , Buffer(CheckAndGetBuffer(queue)) - , Current(begin) - , End(end) - , Generation(generation) - { - } - - private: - bool Next(NUdf::TUnboxedValue& value) override { - MKQL_ENSURE(Generation == Buffer.Generation(), - "Queue generation changed while doing QueueRange: expected " << Generation << ", got: " << Buffer.Generation()); - if (Current >= End) { - return false; - } - - const auto& valRef = Buffer.Get(Current++); - value = !valRef ? NUdf::TUnboxedValuePod() : valRef.MakeOptional(); - return true; - } - - bool Skip() override { - if (Current >= End) { - return false; - } - Current++; - return true; - } - - const TUnboxedValue Queue; - const TSafeCircularBuffer<TUnboxedValue>& Buffer; - size_t Current; - const size_t End; - const ui64 Generation; - }; - - TValue(TMemoryUsageInfo* memInfo, TComputationContext& compCtx, TUnboxedValue queue, size_t begin, size_t end, TStringBuf tag, IComputationNode* resource) - : TComputationValue<TValue>(memInfo) - , TQueueResourceUser(std::move(tag), resource) - , CompCtx(compCtx) - , Queue(queue) - , Begin(begin) - , End(std::min(end, CheckAndGetBuffer(Queue).Size())) - , Generation(CheckAndGetBuffer(Queue).Generation()) - { - } - - private: - ui64 GetListLength() const final { - return Begin < End ? (End - Begin) : 0; - } - - bool HasListItems() const final { - return GetListLength() != 0; - } - - bool HasFastListLength() const final { - return true; - } - - NUdf::TUnboxedValue GetListIterator() const final { - return CompCtx.HolderFactory.Create<TIterator>(Queue, Begin, End, Generation, Tag, Resource); - } - - TComputationContext& CompCtx; - const TUnboxedValue Queue; - const size_t Begin; - const size_t End; - const ui64 Generation; - }; - TQueueRangeWrapper(TComputationMutables& mutables, TComputationNodePtrVector&& dependentNodes, const TResourceType* resourceType, IComputationNode* resource, IComputationNode* begin, IComputationNode* end) : TBaseComputation(mutables) @@ -297,7 +316,7 @@ public: auto begin = Begin->GetValue(ctx).Get<ui64>(); auto end = End->GetValue(ctx).Get<ui64>(); - return ctx.HolderFactory.Create<TValue>(ctx, queue, begin, end, Tag, Resource); + return ctx.HolderFactory.Create<TQueueRange</*AlwaysExist=*/false>>(ctx, queue, begin, end, Tag, Resource); } private: @@ -414,6 +433,152 @@ private: const ui64 Outpace; }; +template <typename TFactory, ESortOrder SortOrder> +class TAggregateWindowValue: public TComputationValue<TAggregateWindowValue<TFactory, SortOrder>>, public TQueueResourceUser { +public: + using TBase = TComputationValue<TAggregateWindowValue<TFactory, SortOrder>>; + + TAggregateWindowValue(TMemoryUsageInfo* memInfo, + NUdf::TUnboxedValue&& stream, + NUdf::TUnboxedValue&& queue, + TStringBuf tag, + IComputationNode* resource, + const TFactory& factory) + : TBase(memInfo) + , TQueueResourceUser(std::move(tag), resource) + , Stream(std::move(stream)) + , Queue(std::move(queue)) + , Buffer(TQueueResourceUser::CheckAndGetBuffer(Queue)) + , AggregatedBounds(factory(Buffer, + std::bind(&TAggregateWindowValue::ConsumeStream, this, std::placeholders::_1), + TQueueResourceUser::CheckAndGetFrameBoundsIndices(Queue))) + { + } + +private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& value) override { + switch (AggregatedBounds.Next()) { + case EConsumeStatus::Ok: + value = AggregatedBounds.GetCurrentElement(); + return NUdf::EFetchStatus::Ok; + case EConsumeStatus::Wait: + return NUdf::EFetchStatus::Yield; + case EConsumeStatus::End: + if (!Cleaned_) { + AggregatedBounds.Clean(); + Cleaned_ = true; + } + return NUdf::EFetchStatus::Finish; + } + } + + EConsumeStatus ConsumeStream(TUnboxedValue& value) { + switch (Stream.Fetch(value)) { + case EFetchStatus::Ok: + return EConsumeStatus::Ok; + case EFetchStatus::Finish: + return EConsumeStatus::End; + case EFetchStatus::Yield: + return EConsumeStatus::Wait; + } + } + + const NUdf::TUnboxedValue Stream; + const NUdf::TUnboxedValue Queue; + TSafeCircularBuffer<TUnboxedValue>& Buffer; + bool Cleaned_ = false; + std::invoke_result_t<TFactory, TSafeCircularBuffer<TUnboxedValue>&, std::function<EConsumeStatus(TUnboxedValue&)>, TFrameBoundsIndices&> AggregatedBounds; +}; + +template <typename TFactory, ESortOrder SortOrder> +class WinFramesCollector: public TMutableComputationNode<WinFramesCollector<TFactory, SortOrder>>, public TQueueResourceUser { + typedef TMutableComputationNode<WinFramesCollector> TBaseComputation; + +public: + WinFramesCollector(TComputationMutables& mutables, + IComputationNode* stream, + const TResourceType* resourceType, + IComputationNode* resource, + TFactory&& factory) + : TBaseComputation(mutables) + , TQueueResourceUser(resourceType->GetTag(), resource) + , Stream(stream) + , Factory(std::move(factory)) + { + } + + NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const { + return ctx.HolderFactory.Create<TAggregateWindowValue<TFactory, SortOrder>>(Stream->GetValue(ctx), Resource->GetValue(ctx), Tag, Resource, Factory); + } + +private: + void RegisterDependencies() const final { + this->DependsOn(Resource); + this->DependsOn(Stream); + } + + IComputationNode* const Stream; + const TFactory Factory; +}; + +template <bool IsRange, bool IsIncremental, bool ReturnSingleElement> +class TWinFrame: public TMutableComputationNode<TWinFrame<IsRange, IsIncremental, ReturnSingleElement>>, public TQueueResourceUser { + typedef TMutableComputationNode<TWinFrame<IsRange, IsIncremental, ReturnSingleElement>> TBaseComputation; + +public: + TWinFrame(TComputationMutables& mutables, TComputationNodePtrVector&& dependentNodes, const TResourceType* resourceType, IComputationNode* resource, + ui64 handle) + : TBaseComputation(mutables) + , TQueueResourceUser(resourceType->GetTag(), resource) + , Handle(handle) + , DependentNodes(std::move(dependentNodes)) + { + } + + NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const { + auto queue = Resource->GetValue(ctx); + + auto windows = this->CheckAndGetFrameBoundsIndices(queue); + auto frame = this->GetWindowFrame(Handle, windows); + if constexpr (ReturnSingleElement) { + if (frame.Size() == 0) { + return TUnboxedValuePod(); + } else { + const auto& valRef = CheckAndGetBuffer(queue).Get(frame.Max() - 1); + return valRef.MakeOptional(); + } + return CheckAndGetBuffer(queue).Get(frame.Min()); + } else { + return ctx.HolderFactory.Create<TQueueRange</*AlwaysExist=*/true>>(ctx, queue, frame.Min(), frame.Max(), Tag, Resource); + } + } + + TRowWindowFrame GetWindowFrame(ui64 handle, const TFrameBoundsIndices& windows) const { + if constexpr (IsRange) { + if constexpr (IsIncremental) { + return windows.GetIntervalInQueueByRangeIncremental(handle); + } else { + return windows.GetIntervalInQueueByRange(handle); + } + } else { + if constexpr (IsIncremental) { + return windows.GetIntervalInQueueByRowIncremental(handle); + } else { + return windows.GetIntervalInQueueByRow(handle); + } + } + } + +private: + void RegisterDependencies() const final { + this->DependsOn(Resource); + std::for_each(DependentNodes.cbegin(), DependentNodes.cend(), std::bind(&TWinFrame::DependsOn, this, std::placeholders::_1)); + } + + const ui64 Handle; + const TComputationNodePtrVector DependentNodes; +}; + template <class T, class... Args> IComputationNode* MakeNodeWithDeps(TCallable& callable, const TComputationNodeFactoryContext& ctx, unsigned reqArgs, Args... args) { TComputationNodePtrVector dependentNodes(callable.GetInputsCount() - reqArgs); @@ -423,6 +588,167 @@ IComputationNode* MakeNodeWithDeps(TCallable& callable, const TComputationNodeFa return new T(ctx.Mutables, std::move(dependentNodes), std::forward<Args>(args)...); } +template <ESortOrder SortOrder, typename TStreamType, typename TBoundType, typename StreamScale, typename RangeBoundScale> +IComputationNode* DispatchWinStreamCollectorBasedOnSortedColumn(const TRuntimeNode& paramsNode, + const TComputationNodeFactoryContext& ctx, + IComputationNode* stream, + TResourceType* resourceType, + IComputationNode* resource, + ui32 memberIndex, + StreamScale streamScale, + RangeBoundScale boundScale) { + using TStream = NUdf::TDataType<TStreamType>::TLayout; + using TBound = NUdf::TDataType<TBoundType>::TLayout; + + static_assert(std::is_same_v<TBound, decltype(boundScale(TBound{}))>, "Scaled bound type must match original bound type"); + + // Verify that the range type from params matches the expected TBoundType. + auto rangeDataType = ExtractRangeDataTypeFromWindowAggregatorParams(paramsNode); + MKQL_ENSURE(rangeDataType != nullptr, "Range type must be present for sorted window frames."); + MKQL_ENSURE(rangeDataType->GetSchemeType() == NUdf::TDataType<TBoundType>::Id, + "Range type from params must match the expected bound type."); + + auto bounds = DeserializeBounds<TBound>(paramsNode); + + using TScaledStream = decltype(streamScale(TStream{})); + auto streamElementGetter = [memberIndex, streamScale](const TUnboxedValuePod& pod) -> TMaybe<TScaledStream> { + auto structElement = pod.GetElement(memberIndex); + if (!structElement) { + return {}; + } + return std::invoke(streamScale, structElement.Get<TStream>()); + }; + + auto factory = TCoreWinFramesCollector<TUnboxedValue, decltype(streamElementGetter), SortOrder>::CreateFactory( + bounds, std::move(streamElementGetter)); + + return new WinFramesCollector<decltype(factory), SortOrder>(ctx.Mutables, + stream, + resourceType, + resource, + std::move(factory)); +} + +template <typename T> +T NoScale(T elem) { + return elem; +} + +template <ESortOrder SortOrder> +IComputationNode* DispatchWinStreamCollectorBasedOnStreamType(const TRuntimeNode& paramsNode, + const TComputationNodeFactoryContext& ctx, + IComputationNode* stream, + TResourceType* resourceType, + IComputationNode* resource, + TType* sortColumnType, + ui32 memberIndex) { + bool isOptional; + sortColumnType = UnpackOptional(sortColumnType, isOptional); + + MKQL_ENSURE(sortColumnType->IsData(), "Expected data type."); + switch (*AS_TYPE(TDataType, sortColumnType)->GetDataSlot()) { + case EDataSlot::Int8: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, i8, i8>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<i8>, NoScale<i8>); + case EDataSlot::Uint8: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, ui8, ui8>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<ui8>, NoScale<ui8>); + case EDataSlot::Int16: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, i16, i16>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<i16>, NoScale<i16>); + case EDataSlot::Uint16: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, ui16, ui16>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<ui16>, NoScale<ui16>); + case EDataSlot::Int32: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, i32, i32>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<i32>, NoScale<i32>); + case EDataSlot::Uint32: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, ui32, ui32>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<ui32>, NoScale<ui32>); + case EDataSlot::Int64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, i64, i64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<i64>, NoScale<i64>); + case EDataSlot::Uint64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, ui64, ui64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<ui64>, NoScale<ui64>); + case EDataSlot::Double: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, double, double>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<double>, NoScale<double>); + case EDataSlot::Float: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, float, float>(paramsNode, ctx, stream, resourceType, resource, memberIndex, NoScale<float>, NoScale<float>); + case EDataSlot::Date: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TDate, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TDate>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::Datetime: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TDatetime, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TDatetime>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::Timestamp: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTimestamp, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTimestamp>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::Interval: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TInterval, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::TzDate: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzDate, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzDate>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::TzDatetime: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzDatetime, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzDatetime>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::TzTimestamp: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzTimestamp, NUdf::TInterval>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzTimestamp>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval>>); + case EDataSlot::Date32: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TDate32, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TDate32>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::Datetime64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TDatetime64, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TDatetime64>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::Timestamp64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTimestamp64, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTimestamp64>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::Interval64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TInterval64, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::TzDate32: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzDate32, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzDate32>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::TzDatetime64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzDatetime64, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzDatetime64>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + case EDataSlot::TzTimestamp64: + return DispatchWinStreamCollectorBasedOnSortedColumn<SortOrder, NUdf::TTzTimestamp64, NUdf::TInterval64>(paramsNode, ctx, stream, resourceType, resource, memberIndex, ToScaledDate<NUdf::TDataType<NUdf::TTzTimestamp64>>, ToScaledDate<NUdf::TDataType<NUdf::TInterval64>>); + default: + MKQL_ENSURE(false, "Unexpected type for window collecting."); + return nullptr; + } +} + +IComputationNode* DispatchWinStreamCollectorBasedOnOrderedColumn(const TRuntimeNode& paramsNode, + const TComputationNodeFactoryContext& ctx, + TType* streamType, + IComputationNode* stream, + TResourceType* resourceType, + IComputationNode* resource) { + auto sortOrder = DeserializeSortOrder(paramsNode); + auto sortColumnName = DeserializeSortColumnName(paramsNode); + + if (!ExtractRangeDataTypeFromWindowAggregatorParams(paramsNode)) { + auto bounds = DeserializeBounds<ui64>(paramsNode); + MKQL_ENSURE(bounds.RangeIntervals().empty() && bounds.RangeIncrementals().empty(), "Unexpected bounds."); + // TODO(atarasov5): Remove the fake getter in favor of explicitly specifying an void template. + auto elementGetter = [](const TUnboxedValue&) -> TMaybe<ui64> { + MKQL_ENSURE(0, "Shouldn't be called."); + return ui64(0); + }; + + auto factory = TCoreWinFramesCollector<TUnboxedValue, decltype(elementGetter), ESortOrder::Unimportant>::CreateFactory( + bounds, std::move(elementGetter)); + return new WinFramesCollector<decltype(factory), ESortOrder::Unimportant>(ctx.Mutables, + stream, + resourceType, + resource, + std::move(factory)); + } + + MKQL_ENSURE(streamType->IsStream(), "Expected stream type."); + auto streamItemType = AS_TYPE(TStreamType, streamType)->GetItemType(); + MKQL_ENSURE(streamItemType->IsStruct(), "Expected stream of struct type."); + auto structType = AS_TYPE(TStructType, streamItemType); + + auto memberIndex = structType->FindMemberIndex(sortColumnName); + MKQL_ENSURE(memberIndex, "Stream struct must have a field named '" << sortColumnName << "' (params.SortedColumn)"); + + auto sortColumnType = structType->GetMemberType(*memberIndex); + + switch (sortOrder) { + case ESortOrder::Asc: + return DispatchWinStreamCollectorBasedOnStreamType<ESortOrder::Asc>(paramsNode, ctx, stream, resourceType, resource, sortColumnType, *memberIndex); + case ESortOrder::Desc: + return DispatchWinStreamCollectorBasedOnStreamType<ESortOrder::Desc>(paramsNode, ctx, stream, resourceType, resource, sortColumnType, *memberIndex); + default: + MKQL_ENSURE(false, "Unexpected sort order"); + return nullptr; + } +} + } // namespace IComputationNode* WrapQueueCreate(TCallable& callable, const TComputationNodeFactoryContext& ctx) { @@ -494,5 +820,72 @@ IComputationNode* WrapPreserveStream(TCallable& callable, const TComputationNode return new TPreserveStreamWrapper(ctx.Mutables, stream, resourceType, resource, outpace); } +// ############################################################################# +// ###### Wrappers that are used by CoreWinFramesCollector API ####### +// ############################################################################# + +IComputationNode* WrapWinFramesCollector(TCallable& callable, const TComputationNodeFactoryContext& ctx) { + MKQL_ENSURE(callable.GetInputsCount() == 3, "WinFramesCollector: Expected 3 args"); + auto stream = LocateNode(ctx.NodeLocator, callable, 0); + auto streamType = callable.GetInput(0).GetStaticType(); + auto resource = LocateNode(ctx.NodeLocator, callable, 1); + auto resourceType = AS_TYPE(TResourceType, callable.GetInput(1)); + auto paramsNode = callable.GetInput(2); + + return DispatchWinStreamCollectorBasedOnOrderedColumn(paramsNode, ctx, streamType, stream, resourceType, resource); +} + +IComputationNode* WrapWinFrame(TCallable& callable, const TComputationNodeFactoryContext& ctx) { + const unsigned reqArgs = 5; + MKQL_ENSURE(callable.GetInputsCount() >= reqArgs, "QueueRange: Expected at least " << reqArgs << " arg"); + auto resourceType = AS_TYPE(TResourceType, callable.GetInput(0)); + + TDataType* handleDataType = AS_TYPE(TDataType, callable.GetInput(1)); + MKQL_ENSURE(handleDataType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64 as handle."); + TDataType* IsIncrementalDataType = AS_TYPE(TDataType, callable.GetInput(2)); + MKQL_ENSURE(IsIncrementalDataType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool as IsIncremental marker."); + TDataType* isRangeDataType = AS_TYPE(TDataType, callable.GetInput(3)); + MKQL_ENSURE(isRangeDataType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool as IsRange marker."); + TDataType* isSingleElementDataType = AS_TYPE(TDataType, callable.GetInput(4)); + MKQL_ENSURE(isSingleElementDataType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool as IsSingleElement marker."); + auto resource = LocateNode(ctx.NodeLocator, callable, 0); + + auto handle = AS_VALUE(TDataLiteral, callable.GetInput(1))->AsValue().Get<ui64>(); + auto IsIncremental = AS_VALUE(TDataLiteral, callable.GetInput(2))->AsValue().Get<bool>(); + auto isRange = AS_VALUE(TDataLiteral, callable.GetInput(3))->AsValue().Get<bool>(); + bool isSingleElement = AS_VALUE(TDataLiteral, callable.GetInput(4))->AsValue().Get<bool>(); + + // Instantiate the correct template specialization based on runtime values + if (isRange) { + if (IsIncremental) { + if (isSingleElement) { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/true, /*IsIncremental=*/true, /*ReturnSingleElement=*/true>>(callable, ctx, reqArgs, resourceType, resource, handle); + } else { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/true, /*IsIncremental=*/true, /*ReturnSingleElement=*/false>>(callable, ctx, reqArgs, resourceType, resource, handle); + } + } else { + if (isSingleElement) { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/true, /*IsIncremental=*/false, /*ReturnSingleElement=*/true>>(callable, ctx, reqArgs, resourceType, resource, handle); + } else { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/true, /*IsIncremental=*/false, /*ReturnSingleElement=*/false>>(callable, ctx, reqArgs, resourceType, resource, handle); + } + } + } else { + if (IsIncremental) { + if (isSingleElement) { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/false, /*IsIncremental=*/true, /*ReturnSingleElement=*/true>>(callable, ctx, reqArgs, resourceType, resource, handle); + } else { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/false, /*IsIncremental=*/true, /*ReturnSingleElement=*/false>>(callable, ctx, reqArgs, resourceType, resource, handle); + } + } else { + if (isSingleElement) { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/false, /*IsIncremental=*/false, /*ReturnSingleElement=*/true>>(callable, ctx, reqArgs, resourceType, resource, handle); + } else { + return MakeNodeWithDeps<TWinFrame</*IsRange=*/false, /*IsIncremental=*/false, /*ReturnSingleElement=*/false>>(callable, ctx, reqArgs, resourceType, resource, handle); + } + } + } +} + } // namespace NMiniKQL } // namespace NKikimr diff --git a/yql/essentials/minikql/comp_nodes/mkql_queue.h b/yql/essentials/minikql/comp_nodes/mkql_queue.h index 36685a11591..ef29c6d5e7e 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_queue.h +++ b/yql/essentials/minikql/comp_nodes/mkql_queue.h @@ -14,5 +14,11 @@ IComputationNode* WrapQueuePeek(TCallable& callable, const TComputationNodeFacto IComputationNode* WrapQueueRange(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapPreserveStream(TCallable& callable, const TComputationNodeFactoryContext& ctx); +// ############################################################################# +// ###### Wrappers that are used by CoreWinFramesCollector API ####### +// ############################################################################# +IComputationNode* WrapWinFramesCollector(TCallable& callable, const TComputationNodeFactoryContext& ctx); +IComputationNode* WrapWinFrame(TCallable& callable, const TComputationNodeFactoryContext& ctx); + } // namespace NMiniKQL } // namespace NKikimr diff --git a/yql/essentials/minikql/comp_nodes/mkql_safe_circular_buffer.h b/yql/essentials/minikql/comp_nodes/mkql_safe_circular_buffer.h index 12865d8d012..ee6f6fed3dc 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_safe_circular_buffer.h +++ b/yql/essentials/minikql/comp_nodes/mkql_safe_circular_buffer.h @@ -33,8 +33,8 @@ public: if (IsFull()) { Grow(); } - Buffer_[RealIndex(Size_)] = std::move(data); Size_++; + Buffer_[RealIndex(Size_ - 1)] = std::move(data); } const T& Get(size_t index) const { @@ -92,6 +92,13 @@ public: Buffer_.shrink_to_fit(); } + void Reserve(size_t capacity) { + if (capacity <= Capacity()) { + return; + } + Grow(capacity); + } + private: static inline constexpr size_t GrowFactor = 2; @@ -99,21 +106,28 @@ private: return Size() == Capacity(); } - void Grow() { + void Grow(size_t to) { YQL_ENSURE(Unbounded_, "Cannot reallocate buffer in Bounded mode"); // Rotate elements so that logical first element is at position 0. std::rotate(Buffer_.begin(), Buffer_.begin() + Head_, Buffer_.end()); - // Double buffer size. - Buffer_.resize(Capacity() * GrowFactor + 1, EmptyValue_); + Buffer_.resize(to, EmptyValue_); // Reset Head since elements now start at position 0. Head_ = 0; } + void Grow() { + if (Capacity() == 0) { + Grow(1); + } + // Double buffer size. + Grow(Capacity() * GrowFactor); + } + size_t RealIndex(size_t index, bool mayOutOfBounds = false) const { auto capacity = Capacity(); Y_ABORT_UNLESS(capacity); if (!mayOutOfBounds) { - Y_ABORT_UNLESS(index < capacity); + Y_ABORT_UNLESS(index < Size()); } return (Head_ + index) % capacity; } diff --git a/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.cpp b/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.cpp new file mode 100644 index 00000000000..e2ca43307d5 --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.cpp @@ -0,0 +1,180 @@ +#include "mkql_window_frames_collector_params_deserializer.h" + +#include <yql/essentials/minikql/mkql_node_cast.h> +#include <yql/essentials/core/sql_types/window_direction.h> + +namespace NKikimr::NMiniKQL { + +using namespace NYql::NWindow; +using NYql::ESortOrder; + +namespace { + +constexpr TStringBuf KeyMin = "Min"; +constexpr TStringBuf KeyMax = "Max"; +constexpr TStringBuf KeyRangeIntervals = "RangeIntervals"; +constexpr TStringBuf KeyRowIntervals = "RowIntervals"; +constexpr TStringBuf KeyRangeIncrementals = "RangeIncrementals"; +constexpr TStringBuf KeyRowIncrementals = "RowIncrementals"; +constexpr TStringBuf KeySortOrder = "SortOrder"; +constexpr TStringBuf KeyBounds = "Bounds"; +constexpr TStringBuf KeySortColumnName = "SortColumnName"; +constexpr TStringBuf KeyDirection = "Direction"; +constexpr TStringBuf KeyNumber = "Number"; +constexpr TStringBuf KeyUnbounded = "Unbounded"; +constexpr TStringBuf KeyBounded = "Bounded"; + +TRuntimeNode GetMember(const TStructLiteral* structLit, TStringBuf name) { + auto index = structLit->GetType()->FindMemberIndex(name); + MKQL_ENSURE(index, "Member '" << name << "' not found"); + return structLit->GetValue(*index); +} + +const TListLiteral* GetList(const TRuntimeNode& node) { + return node.GetStaticType()->IsEmptyList() ? nullptr : AS_VALUE(TListLiteral, node); +} + +TString GetString(const TRuntimeNode& node) { + return TString(AS_VALUE(TDataLiteral, node)->AsValue().AsStringRef()); +} + +template <typename T> +T GetValue(const TRuntimeNode& node) { + return AS_VALUE(TDataLiteral, node)->AsValue().Get<T>(); +} + +template <typename T> +TNumberAndDirection<T> DeserializeNumberAndDirection(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + + EDirection direction; + MKQL_ENSURE(TryFromString(GetString(GetMember(structLit, KeyDirection)), direction), "Unknown direction"); + + auto variantLit = AS_VALUE(TVariantLiteral, GetMember(structLit, KeyNumber)); + auto alternatives = AS_TYPE(TStructType, variantLit->GetType()->GetUnderlyingType()); + auto unboundedIndex = alternatives->FindMemberIndex(KeyUnbounded); + MKQL_ENSURE(unboundedIndex, "Unbounded not found"); + + return variantLit->GetIndex() == *unboundedIndex + ? TNumberAndDirection<T>(typename TNumberAndDirection<T>::TUnbounded{}, direction) + : TNumberAndDirection<T>(GetValue<T>(variantLit->GetItem()), direction); +} + +template <typename T> +TWindowFrame<TNumberAndDirection<T>> DeserializeWindowFrame(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + return {DeserializeNumberAndDirection<T>(GetMember(structLit, KeyMin)), + DeserializeNumberAndDirection<T>(GetMember(structLit, KeyMax))}; +} + +template <typename T, typename Deserializer> +TVector<T> DeserializeList(const TRuntimeNode& node, Deserializer deserializer) { + auto listLit = GetList(node); + if (!listLit) { + return {}; + } + + TVector<T> result; + result.reserve(listLit->GetItemsCount()); + for (ui32 i = 0; i < listLit->GetItemsCount(); ++i) { + result.push_back(deserializer(listLit->GetItems()[i])); + } + return result; +} + +template <typename TRangeType> +TCoreWinFrameCollectorBounds<TRangeType> DeserializeBoundsImpl(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + // No deduplication is allowed here. We must add as much bounds as provided by |node|. + TCoreWinFrameCollectorBounds<TRangeType> bounds(/*dedup=*/false); + + for (auto& frame : DeserializeList<TWindowFrame<TNumberAndDirection<TRangeType>>>( + GetMember(structLit, KeyRangeIntervals), DeserializeWindowFrame<TRangeType>)) { + bounds.AddRange(std::move(frame)); + } + for (auto& frame : DeserializeList<TInputRowWindowFrame>( + GetMember(structLit, KeyRowIntervals), DeserializeWindowFrame<ui64>)) { + bounds.AddRow(std::move(frame)); + } + for (auto& delta : DeserializeList<TNumberAndDirection<TRangeType>>( + GetMember(structLit, KeyRangeIncrementals), DeserializeNumberAndDirection<TRangeType>)) { + bounds.AddRangeIncremental(std::move(delta)); + } + for (auto& delta : DeserializeList<TInputRow>( + GetMember(structLit, KeyRowIncrementals), DeserializeNumberAndDirection<ui64>)) { + bounds.AddRowIncremental(std::move(delta)); + } + + return bounds; +} + +TDataType* ExtractTypeFromNumberAndDirection(TType* type) { + auto structType = AS_TYPE(TStructType, type); + auto numberIndex = structType->FindMemberIndex(KeyNumber); + MKQL_ENSURE(numberIndex, "Number not found"); + + auto alternatives = AS_TYPE(TStructType, AS_TYPE(TVariantType, structType->GetMemberType(*numberIndex))->GetUnderlyingType()); + auto boundedIndex = alternatives->FindMemberIndex(KeyBounded); + MKQL_ENSURE(boundedIndex, "Bounded not found"); + + return AS_TYPE(TDataType, alternatives->GetMemberType(*boundedIndex)); +} + +TDataType* ExtractTypeFromWindowFrame(TType* type) { + auto structType = AS_TYPE(TStructType, type); + auto minIndex = structType->FindMemberIndex(KeyMin); + MKQL_ENSURE(minIndex, "Min not found"); + return ExtractTypeFromNumberAndDirection(structType->GetMemberType(*minIndex)); +} + +} // anonymous namespace + +ESortOrder DeserializeSortOrder(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + ESortOrder sortOrder; + MKQL_ENSURE(TryFromString(GetString(GetMember(structLit, KeySortOrder)), sortOrder), "Unknown sort order"); + return sortOrder; +} + +TString DeserializeSortColumnName(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + return GetString(GetMember(structLit, KeySortColumnName)); +} + +template <typename TRangeType> +TCoreWinFrameCollectorBounds<TRangeType> DeserializeBounds(const TRuntimeNode& node) { + auto structLit = AS_VALUE(TStructLiteral, node); + return DeserializeBoundsImpl<TRangeType>(GetMember(structLit, KeyBounds)); +} + +TDataType* ExtractRangeDataTypeFromWindowAggregatorParams(const TRuntimeNode& node) { + auto boundsLit = AS_VALUE(TStructLiteral, GetMember(AS_VALUE(TStructLiteral, node), KeyBounds)); + + auto extractFromList = [&](TStringBuf key, auto extractor) -> TDataType* { + if (auto listLit = GetList(GetMember(boundsLit, key))) { + return extractor(listLit->GetType()->GetItemType()); + } + return nullptr; + }; + + auto intervalsType = extractFromList(KeyRangeIntervals, ExtractTypeFromWindowFrame); + auto incrementalsType = extractFromList(KeyRangeIncrementals, ExtractTypeFromNumberAndDirection); + + if (intervalsType && incrementalsType) { + MKQL_ENSURE(intervalsType->IsSameType(*incrementalsType), "RangeIntervals and RangeIncrementals type mismatch"); + } + return intervalsType ? intervalsType : incrementalsType; +} + +template TCoreWinFrameCollectorBounds<i8> DeserializeBounds<i8>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<ui8> DeserializeBounds<ui8>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<i16> DeserializeBounds<i16>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<ui16> DeserializeBounds<ui16>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<i32> DeserializeBounds<i32>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<ui32> DeserializeBounds<ui32>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<i64> DeserializeBounds<i64>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<ui64> DeserializeBounds<ui64>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<float> DeserializeBounds<float>(const TRuntimeNode&); +template TCoreWinFrameCollectorBounds<double> DeserializeBounds<double>(const TRuntimeNode&); + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.h b/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.h new file mode 100644 index 00000000000..95a53b33b2a --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/mkql_window_frames_collector_params_deserializer.h @@ -0,0 +1,18 @@ +#pragma once + +#include <yql/essentials/core/sql_types/sort_order.h> +#include <yql/essentials/core/sql_types/window_frame_bounds.h> +#include <yql/essentials/minikql/mkql_node.h> + +namespace NKikimr::NMiniKQL { + +NYql::ESortOrder DeserializeSortOrder(const TRuntimeNode& node); + +TString DeserializeSortColumnName(const TRuntimeNode& node); + +template <typename TRangeType> +NYql::NWindow::TCoreWinFrameCollectorBounds<TRangeType> DeserializeBounds(const TRuntimeNode& node); + +TDataType* ExtractRangeDataTypeFromWindowAggregatorParams(const TRuntimeNode& node); + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/comp_nodes/ya.make.inc b/yql/essentials/minikql/comp_nodes/ya.make.inc index 7f91cbcdd16..b4b0999ee53 100644 --- a/yql/essentials/minikql/comp_nodes/ya.make.inc +++ b/yql/essentials/minikql/comp_nodes/ya.make.inc @@ -96,6 +96,7 @@ SET(ORIG_SOURCES mkql_pickle.cpp mkql_prepend.cpp mkql_queue.cpp + mkql_window_frames_collector_params_deserializer.cpp mkql_random.cpp mkql_range.cpp mkql_reduce.cpp diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector.h b/yql/essentials/minikql/mkql_core_win_frames_collector.h new file mode 100644 index 00000000000..58e2942a94f --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector.h @@ -0,0 +1,711 @@ +#pragma once + +#include <yql/essentials/minikql/comp_nodes/mkql_safe_circular_buffer.h> +#include <yql/essentials/minikql/defs.h> +#include <yql/essentials/minikql/mkql_saturated_math.h> +#include <yql/essentials/core/sql_types/sort_order.h> +#include <yql/essentials/core/sql_types/window_frame_bounds.h> +#include <yql/essentials/core/sql_types/window_frames_collector_params.h> + +#include <util/generic/vector.h> +#include <util/generic/maybe.h> +#include <util/system/types.h> +#include <util/system/yassert.h> +#include <util/generic/scope.h> + +#include <utility> +#include <algorithm> + +namespace NKikimr::NMiniKQL { + +using NYql::ESortOrder; +using NYql::NWindow::EDirection; +using NYql::NWindow::TCoreWinFrameCollectorBounds; +using NYql::NWindow::TCoreWinFramesCollectorParams; +using NYql::NWindow::TInputRange; +using NYql::NWindow::TInputRangeWindowFrame; +using NYql::NWindow::TInputRow; +using NYql::NWindow::TInputRowWindowFrame; +using NYql::NWindow::TRow; +using NYql::NWindow::TRowWindowFrame; + +enum class EConsumeStatus { + Ok, + Wait, + End, +}; + +inline IOutputStream& operator<<(IOutputStream& out, EConsumeStatus status) { + switch (status) { + case EConsumeStatus::Ok: + return out << "Ok"; + case EConsumeStatus::Wait: + return out << "Wait"; + case EConsumeStatus::End: + return out << "End"; + } +} + +template <typename TStream, typename TStreamElement> +class TStreamConsumer { +public: + explicit TStreamConsumer(TStream stream) + : Stream_(std::move(stream)) + { + } + + void Consume(TStreamElement& elem) { + LastConsumeStatus_ = Stream_(elem); + switch (LastConsumedStatus()) { + case EConsumeStatus::Ok: + ConsumedElements_++; + break; + case EConsumeStatus::Wait: + case EConsumeStatus::End: + break; + }; + return; + } + + EConsumeStatus LastConsumedStatus() const { + return LastConsumeStatus_; + } + + bool AllElementsAreConsumed() const { + return LastConsumedStatus() == EConsumeStatus::End; + } + + TRow ConsumedElements() const { + return ConsumedElements_; + } + +private: + TStream Stream_; + TRow ConsumedElements_ = 0; + EConsumeStatus LastConsumeStatus_ = EConsumeStatus::Ok; +}; + +// Structure to hold current window states for all window types. +class TFrameBoundsIndices { +public: + bool IsEmpty() const { + return RangeIntervals_.empty() && RowIntervals_.empty() && + RangeIncrementals_.empty() && RowIncrementals_.empty(); + } + + TRowWindowFrame GetIntervalInQueueByRange(size_t idx) const { + return RangeIntervals_.at(idx); + } + + TRowWindowFrame GetIntervalInQueueByRow(size_t idx) const { + return RowIntervals_.at(idx); + } + + TRowWindowFrame GetIntervalInQueueByRangeIncremental(size_t idx) const { + return RangeIncrementals_.at(idx); + } + + TRowWindowFrame GetIntervalInQueueByRowIncremental(size_t idx) const { + return RowIncrementals_.at(idx); + } + +private: + template <typename, typename, ESortOrder> + friend class TCoreWinFramesCollector; + + TVector<TRowWindowFrame> RangeIntervals_; + TVector<TRowWindowFrame> RowIntervals_; + TVector<TRowWindowFrame> RangeIncrementals_; + TVector<TRowWindowFrame> RowIncrementals_; +}; + +// Manages window frames bounds collection over a stream of elements with queue-based buffering. +// Processes elements from a stream, maintains a circular buffer queue, and tracks multiple +// window frame intervals for operations. +// +// Capabilities: +// - Range-based windows: Define window bounds based on actual element values. +// - Row-based windows: Define window bounds based on row positions relative to current row. +// - Incremental mode: Track only the right boundary for incremental collection, initially empty until first element is tracked. +// When at least one element appears inside window bounds frame current state cannot become empty again. +// - Multiple simultaneous windows: Support multiple window specifications with different bounds in a single pass. +// - Automatic queue management: Efficiently removes elements no longer needed by any window frame. +// - Sort order awareness: Handles both ascending and descending sort orders. +// +// Example with 4 elements [10, 20, 30, 40]: +// +// Row-based window (ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING): +// Position 0 (value=10): window [0, 2) includes [10, 20] +// Position 1 (value=20): window [0, 3) includes [10, 20, 30] +// Position 2 (value=30): window [1, 4) includes [20, 30, 40] +// Position 3 (value=40): window [2, 4) includes [30, 40] +// +// Range-based window (RANGE BETWEEN 15 PRECEDING AND 15 FOLLOWING): +// Position 0 (value=10): window [0, 2) includes [10, 20] // values in [-5, 25] +// Position 1 (value=20): window [0, 3) includes [10, 20, 30] // values in [5, 35] +// Position 2 (value=30): window [1, 4) includes [20, 30, 40] // values in [15, 45] +// Position 3 (value=40): window [2, 4) includes [30, 40] // values in [25, 55] +// +// Row incremental (1 FOLLOWING) - tracks only right boundary: +// At position 0: incremental [1, 2) includes [20] +// At position 1: incremental [2, 3) includes [30] +// At position 2: incremental [3, 4) includes [40] +// At position 3: incremental [3, 4) includes [40] // Since this is last available state for that boundary. +// +// Range incremental (15 FOLLOWING) - tracks only right boundary: +// At position 0 (value=10): incremental [1, 2) includes [20] // new values in (10, 25) +// At position 1 (value=20): incremental [2, 3) includes [30] // new values in (20, 35) +// At position 2 (value=30): incremental [3, 4) includes [40] // new values in (30, 45) +// At position 3 (value=40): incremental [3, 4) includes [40] // Since this is last available state for that boundary. +// +// Important: All indices in the examples above are relative to the original stream positions, +// not positions inside the currently maintained queue. TCoreWinFramesCollector automatically removes +// elements that are no longer needed by any window frame, causing indices to be rebased after each cleanup. +// So actual indices that methods return will be relative to the current queue state. +template <typename TStreamElement, typename TElementGetter, ESortOrder SortOrder> +class TCoreWinFramesCollector { +public: + using TElementGetterResult = std::invoke_result_t<TElementGetter, const TStreamElement&>; + using TRangeElement = typename TElementGetterResult::value_type; + using TQueue = TSafeCircularBuffer<TStreamElement>; + using TStream = std::function<EConsumeStatus(TStreamElement&)>; + + static auto CreateFactory(TCoreWinFrameCollectorBounds<TRangeElement> inputBounds, + TElementGetter elementGetter) { + // Validate intervals + for (const auto& interval : inputBounds.RangeIntervals()) { + ValidateInterval(interval, "Range"); + } + for (const auto& interval : inputBounds.RowIntervals()) { + ValidateInterval(interval, "Row"); + } + + TPrecomputedBoundsData precomputed; + precomputed.RangeWindowFrames = inputBounds.RangeIntervals(); + precomputed.RangeIncrementals = inputBounds.RangeIncrementals(); + precomputed.RowWindowFrames = ConvertRowIntervals(inputBounds.RowIntervals()); + precomputed.RowIncrementals = ConvertRowIntervals(inputBounds.RowIncrementals()); + + if (!precomputed.RangeIncrementals.empty()) { + auto maxIt = std::ranges::max_element(precomputed.RangeIncrementals); + precomputed.MaxIncrementalRangeIntervals = std::distance(precomputed.RangeIncrementals.begin(), maxIt); + } + + if (!precomputed.RowIncrementals.empty()) { + auto maxIt = std::ranges::max_element(precomputed.RowIncrementals); + precomputed.MaxIncrementalRowIntervals = std::distance(precomputed.RowIncrementals.begin(), maxIt); + } + + if (!precomputed.RowWindowFrames.empty()) { + auto minBegin = std::ranges::min_element(precomputed.RowWindowFrames, + [](const auto& a, const auto& b) { + return a.Min() < b.Min(); + }) + ->Min(); + auto maxEnd = std::ranges::max_element(precomputed.RowWindowFrames, + [](const auto& a, const auto& b) { + return a.Max() < b.Max(); + }) + ->Max(); + precomputed.MaxRowInterval = TRowWindowFrame(minBegin, maxEnd); + } + + if (!precomputed.RangeWindowFrames.empty()) { + auto minElement = std::ranges::min_element(precomputed.RangeWindowFrames, + [](const auto& a, const auto& b) { + return a.Min() < b.Min(); + }); + auto maxElement = std::ranges::max_element(precomputed.RangeWindowFrames, + [](const auto& a, const auto& b) { + return a.Max() < b.Max(); + }); + precomputed.MaxRangeInterval = TInputRangeWindowFrame<TRangeElement>{ + minElement->Min(), + maxElement->Max()}; + } + + return [precomputed, elementGetter](TQueue& outputQueue, TStream stream, TFrameBoundsIndices& currentFrameBoundsIndices) { + return TCoreWinFramesCollector( + outputQueue, + stream, + elementGetter, + currentFrameBoundsIndices, + precomputed); + }; + } + + EConsumeStatus Next() + { + MKQL_ENSURE(QueueGeneration_ == OutputQueue_.Generation(), "Unexpected change."); + Y_DEFER { + QueueGeneration_ = OutputQueue_.Generation(); + }; + if (!ConsumeStreamUntilAllIntervalsAreSatisfied(CurrentPositionInQueue_ + 1)) { + return EConsumeStatus::Wait; + }; + + CurrentPositionInQueue_++; + CurrentPositionInStream_++; + if (StreamConsumer_.LastConsumedStatus() == EConsumeStatus::End && CurrentPositionInStream_ >= StreamConsumer_.ConsumedElements()) { + return EConsumeStatus::End; + } + + UpdateAllRowsIntervals(); + UpdateAllRowIncrementalsIntervals(); + if constexpr (IsRangeSupported()) { + UpdateAllRangesIntervals(); + UpdateAllRangeIncrementalsIntervals(); + } + + RemoveAllReduntantElementsFromQueue(); + + return EConsumeStatus::Ok; + } + + void Clean() { + MKQL_ENSURE(QueueGeneration_ == OutputQueue_.Generation(), "Unexpected change."); + Y_DEFER { + QueueGeneration_ = OutputQueue_.Generation(); + }; + OutputQueue_.Clean(); + } + + TStreamElement GetCurrentElement() const { + return OutputQueue_.Get(CurrentPositionInQueue_); + } + +private: + struct TPrecomputedBoundsData { + TVector<TInputRangeWindowFrame<TRangeElement>> RangeWindowFrames; + TVector<TRowWindowFrame> RowWindowFrames; + TVector<TInputRange<TRangeElement>> RangeIncrementals; + TVector<TRow> RowIncrementals; + TMaybe<TRowWindowFrame> MaxRowInterval; + TMaybe<TInputRangeWindowFrame<TRangeElement>> MaxRangeInterval; + TMaybe<TRow> MaxIncrementalRangeIntervals; + TMaybe<TRow> MaxIncrementalRowIntervals; + }; + + TCoreWinFramesCollector(TQueue& outputQueue, + TStream stream, + TElementGetter elementGetter, + TFrameBoundsIndices& currentFrameBoundsIndices, + const TPrecomputedBoundsData& precomputed) + : OutputQueue_(outputQueue) + , MaxRowInterval_(precomputed.MaxRowInterval) + , MaxRangeInterval_(precomputed.MaxRangeInterval) + , RangeWindowFrames_(precomputed.RangeWindowFrames) + , RowWindowFrames_(precomputed.RowWindowFrames) + , RangeIncrementals_(precomputed.RangeIncrementals) + , RowIncrementals_(precomputed.RowIncrementals) + , MaxIncrementalRangeIntervals_(precomputed.MaxIncrementalRangeIntervals) + , MaxIncrementalRowIntervals_(precomputed.MaxIncrementalRowIntervals) + , StreamConsumer_(std::move(stream)) + , ElementGetter_(std::move(elementGetter)) + , CurrentFrameBoundsIndices_(currentFrameBoundsIndices) + { + MKQL_ENSURE(CurrentFrameBoundsIndices_.IsEmpty(), "FrameBoundsIndices must be empty on construction."); + MKQL_ENSURE(OutputQueue_.Size() == 0, "Queue must be empty."); + QueueGeneration_ = OutputQueue_.Generation(); + + CurrentFrameBoundsIndices_.RangeIntervals_.reserve(RangeWindowFrames_.size()); + CurrentFrameBoundsIndices_.RowIntervals_.reserve(RowWindowFrames_.size()); + CurrentFrameBoundsIndices_.RangeIncrementals_.reserve(RangeIncrementals_.size()); + CurrentFrameBoundsIndices_.RowIncrementals_.reserve(RowIncrementals_.size()); + + for (size_t i = 0; i < RangeWindowFrames_.size(); ++i) { + CurrentFrameBoundsIndices_.RangeIntervals_.emplace_back(TRowWindowFrame::CreateEmpty()); + } + for (size_t i = 0; i < RowWindowFrames_.size(); ++i) { + CurrentFrameBoundsIndices_.RowIntervals_.emplace_back(TRowWindowFrame::CreateEmpty()); + } + for (size_t i = 0; i < RangeIncrementals_.size(); ++i) { + CurrentFrameBoundsIndices_.RangeIncrementals_.emplace_back(TRowWindowFrame::CreateEmpty()); + } + for (size_t i = 0; i < RowIncrementals_.size(); ++i) { + CurrentFrameBoundsIndices_.RowIncrementals_.emplace_back(TRowWindowFrame::CreateEmpty()); + } + } + + using TStreamConsumer = TStreamConsumer<TStream, TStreamElement>; + + TMaybe<TInputRange<TRangeElement>> GetMaxRangeIncrementalElement() const { + if (MaxIncrementalRangeIntervals_.Defined()) { + return RangeIncrementals_[*MaxIncrementalRangeIntervals_]; + } + return {}; + } + + TMaybe<TRow> GetMaxRowIncrementalElement() const { + if (MaxIncrementalRowIntervals_.Defined()) { + return RowIncrementals_[*MaxIncrementalRowIntervals_]; + } + return {}; + } + + template <typename TInterval> + static void ValidateInterval(const TInterval& interval, const char* intervalType) { + const auto& minBound = interval.Min(); + const auto& maxBound = interval.Max(); + + if (!minBound.IsInf()) { + MKQL_ENSURE(minBound.GetUnderlyingValue() >= 0, + TStringBuilder() << intervalType << " interval Min value must be positive"); + } + if (minBound.IsInf() && minBound.GetDirection() != EDirection::Preceding) { + MKQL_ENSURE(false, TStringBuilder() << intervalType << " interval Min cannot be Unbounded Right"); + } + + if (!maxBound.IsInf()) { + MKQL_ENSURE(maxBound.GetUnderlyingValue() >= 0, + TStringBuilder() << intervalType << " interval Max value must be positive"); + } + if (maxBound.IsInf() && maxBound.GetDirection() != EDirection::Following) { + MKQL_ENSURE(false, TStringBuilder() << intervalType << " interval Max cannot be Unbounded Left"); + } + } + + TMaybe<TRangeElement> GetQueueValue(TRow idx) const { + return ElementGetter_(OutputQueue_.Get(idx)); + } + + consteval static EInfBoundary InfBoundaryAddElements() { + static_assert(IsRangeSupported()); + if constexpr (SortOrder == ESortOrder::Asc) { + return EInfBoundary::Left; + } else { + return EInfBoundary::Right; + } + } + + consteval static EInfBoundary InfBoundaryRemoveElements() { + static_assert(IsRangeSupported()); + if constexpr (SortOrder == ESortOrder::Asc) { + return EInfBoundary::Right; + } else { + return EInfBoundary::Left; + } + } + + template <typename T> + constexpr static bool IsNan(T value) { + if constexpr (std::is_floating_point_v<T>) { + return std::isnan(value); + } else { + return false; + } + } + + bool ShouldAddElement(TInputRange<TRangeElement> range, + TRow fromIdx, + TRow elemToTestIdx) const { + // Unbounded preceding or following case. Should be added no matter what. + if (range.IsInf()) { + return true; + } + + auto from = GetQueueValue(fromIdx); + auto elemToTest = GetQueueValue(elemToTestIdx); + + if (from.Defined() != elemToTest.Defined()) { + return fromIdx > elemToTestIdx; + } + // Two empty optionals are inside same interval always. Since they are equal. + if (!from.Defined() && !elemToTest.Defined()) { + return true; + } + + if (IsNan(*from) != IsNan(*elemToTest)) { + return fromIdx > elemToTestIdx; + } + + if (IsNan(*from) && IsNan(*elemToTest)) { + return true; + } + + // Just compare elements to match intervals. + return IsBelongToInterval<InfBoundaryAddElements()>(GetComparationDirection(range.GetDirection()), *from, range.GetUnderlyingValue(), *elemToTest); + } + + bool ShouldRemoveElement(TInputRange<TRangeElement> range, + TRow fromIdx, + TRow elemToTestIdx) const { + // Unbounded preceding or following case. Should be added no matter what. + if (range.IsInf()) { + return false; + } + + auto from = GetQueueValue(fromIdx); + auto elemToTest = GetQueueValue(elemToTestIdx); + if (from.Defined() != elemToTest.Defined()) { + return fromIdx > elemToTestIdx; + } + + // Two empty optionals are inside same interval always. Since they are equal. + if (!from.Defined() && !elemToTest.Defined()) { + return false; + } + + if (IsNan(*from) != IsNan(*elemToTest)) { + return fromIdx > elemToTestIdx; + } + + if (IsNan(*from) && IsNan(*elemToTest)) { + return false; + } + + // Just compare elements to match intervals. + return !IsBelongToInterval<InfBoundaryRemoveElements()>(GetComparationDirection(range.GetDirection()), *from, range.GetUnderlyingValue(), *elemToTest); + } + + consteval static bool IsRangeSupported() { + return SortOrder != ESortOrder::Unimportant; + } + + static constexpr EDirection GetComparationDirection(EDirection dir) { + if constexpr (SortOrder == ESortOrder::Asc) { + return dir; + } else { + return InvertDirection(dir); + } + } + + // Consumes elements from the stream until all window frame intervals can be satisfied. + // Checks if the queue has enough elements to cover all window specifications (both range and row based), + // and fetches more elements from the stream if needed. Returns false if stream returns Wait status. + bool ConsumeStreamUntilAllIntervalsAreSatisfied(TRow currentPositionInQueue) { + while (ShouldAddNewElement(currentPositionInQueue)) { + TStreamElement element; + StreamConsumer_.Consume(element); + if (StreamConsumer_.LastConsumedStatus() == EConsumeStatus::Wait) { + return false; + } + + if (StreamConsumer_.LastConsumedStatus() == EConsumeStatus::End) { + break; + } + OutputQueue_.PushBack(std::move(element)); + } + return true; + }; + + bool ShouldAddNewElement(TRow currentPositionInQueue) { + if (StreamConsumer_.LastConsumedStatus() == EConsumeStatus::End) { + return false; + } + + if (currentPositionInQueue >= static_cast<TRow>(OutputQueue_.Size())) { + return true; + } + + auto maxIncrementalRow = GetMaxRowIncrementalElement(); + + if (maxIncrementalRow && currentPositionInQueue + *maxIncrementalRow >= static_cast<TRow>(OutputQueue_.Size())) { + return true; + } + + if (MaxRowInterval_ && currentPositionInQueue + MaxRowInterval_->Max() >= static_cast<TRow>(OutputQueue_.Size())) { + return true; + } + + if constexpr (IsRangeSupported()) { + if (MaxRangeInterval_ && ShouldAddElement(MaxRangeInterval_->Max(), + currentPositionInQueue, + OutputQueue_.Size() - 1)) { + return true; + } + + auto maxIncrementalRange = GetMaxRangeIncrementalElement(); + + if (maxIncrementalRange && ShouldAddElement( + *maxIncrementalRange, + currentPositionInQueue, + OutputQueue_.Size() - 1)) { + return true; + } + } + + return false; + }; + + // Removes elements from the queue that are no longer needed by any window frame. + // Finds the minimum index still required across all active windows and removes all elements + // before that index. Updates all interval indices to reflect the new queue state after removal. + void RemoveAllReduntantElementsFromQueue() { + TRow removedElements = std::numeric_limits<TRow>::max(); + for (const auto* vector : GetAllFrameBoundsIndices()) { + for (const auto& interval : *vector) { + removedElements = std::min(removedElements, interval.Min()); + } + } + + removedElements = std::min(removedElements, CurrentPositionInQueue_); + + Y_ENSURE(removedElements != std::numeric_limits<TRow>::max()); + + for (TRow i = 0; i < removedElements; i++) { + OutputQueue_.PopFront(); + } + + CurrentPositionInQueue_ -= removedElements; + + for (auto& vector : GetAllFrameBoundsIndices()) { + for (auto& interval : *vector) { + interval.Min() = interval.Min() - removedElements; + interval.Max() = std::clamp(interval.Max() - removedElements, TRowWindowFrame::TBoundType(0), std::numeric_limits<TRowWindowFrame::TBoundType>::max()); + } + } + }; + + // Updates all range-based window frame intervals for the current position. + // Expands or contracts interval boundaries based on element values to match the specified + // RANGE BETWEEN conditions. + void UpdateAllRangesIntervals() + requires(IsRangeSupported()) + { + for (size_t idx = 0; idx < RangeWindowFrames_.size(); idx++) { + const auto& interval = RangeWindowFrames_[idx]; + auto& currentFrame = CurrentFrameBoundsIndices_.RangeIntervals_[idx]; + + while (currentFrame.Max() < static_cast<TRow>(OutputQueue_.Size()) && + ShouldAddElement(interval.Max(), + CurrentPositionInQueue_, + currentFrame.Max())) { + currentFrame = TRowWindowFrame(currentFrame.Min(), currentFrame.Max() + 1); + } + + while (currentFrame.Min() < static_cast<TRow>(OutputQueue_.Size()) && + ShouldRemoveElement(interval.Min(), + CurrentPositionInQueue_, + currentFrame.Min())) { + currentFrame = TRowWindowFrame(currentFrame.Min() + 1, currentFrame.Max()); + } + } + } + + // Updates all range-based incremental intervals for the current position. + // Incremental mode tracks only the right boundary. + void UpdateAllRangeIncrementalsIntervals() + requires(IsRangeSupported()) + { + for (size_t idx = 0; idx < RangeIncrementals_.size(); idx++) { + const auto& incremental = RangeIncrementals_[idx]; + auto currentFrameCopy = CurrentFrameBoundsIndices_.RangeIncrementals_[idx]; + + while (currentFrameCopy.Max() < static_cast<TRow>(OutputQueue_.Size()) && + ShouldAddElement(incremental, + CurrentPositionInQueue_, + currentFrameCopy.Max())) { + currentFrameCopy.Max()++; + } + if (!currentFrameCopy.Empty()) { + // Store only last element. + CurrentFrameBoundsIndices_.RangeIncrementals_[idx] = TRowWindowFrame(currentFrameCopy.Max() - 1, currentFrameCopy.Max()); + } else { + CurrentFrameBoundsIndices_.RangeIncrementals_[idx] = TRowWindowFrame(currentFrameCopy.Max(), currentFrameCopy.Max()); + } + } + } + + // Updates all row-based incremental intervals for the current position. + // Incremental mode tracks only the right boundary. + void UpdateAllRowIncrementalsIntervals() { + for (size_t idx = 0; idx < RowIncrementals_.size(); ++idx) { + auto& rowInterval = CurrentFrameBoundsIndices_.RowIncrementals_[idx]; + const auto& rowIncremental = RowIncrementals_[idx]; + TRow maxPos = CurrentPositionInQueue_ + rowIncremental + 1; + maxPos = ClampToQueue(maxPos); + if (maxPos > 0) { + rowInterval = TRowWindowFrame(maxPos - 1, maxPos); + } else { + rowInterval = TRowWindowFrame::CreateEmpty(); + } + } + } + + // Updates all row-based window frame intervals for the current position. + // Calculates interval boundaries based on row positions relative to the current row, + // implementing ROWS BETWEEN conditions. Simply adds/subtracts row offsets from current position. + void UpdateAllRowsIntervals() { + for (size_t idx = 0; idx < RowWindowFrames_.size(); ++idx) { + const auto& rowInterval = RowWindowFrames_[idx]; + auto& rowWindow = CurrentFrameBoundsIndices_.RowIntervals_[idx]; + + TRow minPos = CurrentPositionInQueue_ + rowInterval.Min(); + TRow maxPos = CurrentPositionInQueue_ + rowInterval.Max() + 1; + + rowWindow = TRowWindowFrame(ClampToQueue(minPos), ClampToQueue(maxPos)); + } + } + + TRow ClampToQueue(TRow value) { + return std::clamp(value, TRow(0), static_cast<TRow>(OutputQueue_.Size())); + } + + // Converts TInputRow (with direction and optional unbounded) to signed |TRow| offset. + // UNBOUNDED PRECEDING or UNBOUNDED FOLLOWING could be represented with max integer value, + // but we use a large enough bound (max/2) to avoid overflows when adding with other values. + // Left direction (PRECEDING) produces negative offsets, Right direction (FOLLOWING) produces positive offsets. + static TRow ConvertRowValue(const TInputRow& input) { + auto clampToValid = [](auto input) { + using T = decltype(input); + return std::clamp(input, T(0), T(std::numeric_limits<TRowWindowFrame::TBoundType>::max()) / 2); + }; + + TRowWindowFrame::TBoundType value = input.IsInf() ? std::numeric_limits<TRowWindowFrame::TBoundType>::max() : clampToValid(input.GetUnderlyingValue()); + value = clampToValid(value); + return (input.GetDirection() == EDirection::Preceding) ? -value : value; + } + + static TVector<TRowWindowFrame> ConvertRowIntervals(const TVector<TInputRowWindowFrame>& inputIntervals) { + TVector<TRowWindowFrame> result; + result.reserve(inputIntervals.size()); + + for (const auto& interval : inputIntervals) { + result.emplace_back(ConvertRowValue(interval.Min()), + ConvertRowValue(interval.Max())); + } + + return result; + } + + static TVector<TRow> ConvertRowIntervals(const TVector<TInputRow>& inputIntervals) { + TVector<TRow> result; + result.reserve(inputIntervals.size()); + + for (const auto& interval : inputIntervals) { + result.emplace_back(ConvertRowValue(interval)); + } + + return result; + } + + std::array<TVector<TRowWindowFrame>*, 4> GetAllFrameBoundsIndices() { + return {&CurrentFrameBoundsIndices_.RangeIntervals_, &CurrentFrameBoundsIndices_.RowIntervals_, &CurrentFrameBoundsIndices_.RangeIncrementals_, &CurrentFrameBoundsIndices_.RowIncrementals_}; + } + + TQueue& OutputQueue_; + + TRow CurrentPositionInQueue_ = -1; + TRow CurrentPositionInStream_ = -1; + + TMaybe<TRowWindowFrame> MaxRowInterval_; + TMaybe<TInputRangeWindowFrame<TRangeElement>> MaxRangeInterval_; + + TVector<TInputRangeWindowFrame<TRangeElement>> RangeWindowFrames_; + TVector<TRowWindowFrame> RowWindowFrames_; + + TVector<TInputRange<TRangeElement>> RangeIncrementals_; + TVector<TRow> RowIncrementals_; + + TMaybe<TRow> MaxIncrementalRangeIntervals_; + TMaybe<TRow> MaxIncrementalRowIntervals_; + + TStreamConsumer StreamConsumer_; + TElementGetter ElementGetter_; + + ui64 QueueGeneration_ = 0; + TFrameBoundsIndices& CurrentFrameBoundsIndices_; +}; + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector_test_helper.h b/yql/essentials/minikql/mkql_core_win_frames_collector_test_helper.h new file mode 100644 index 00000000000..e9e123030b1 --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector_test_helper.h @@ -0,0 +1,245 @@ +#pragma once + +#include <yql/essentials/minikql/mkql_core_win_frames_collector.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/fwd.h> + +namespace NKikimr::NMiniKQL::NTest::NWindow { + +struct TYield {}; + +struct TEmptyInterval {}; + +struct TIntervalCheck { + i64 ExpectedMin; + i64 ExpectedMax; +}; + +struct TIntervalCheckOrEmpty: std::variant<TIntervalCheck, TEmptyInterval> { + using TBase = std::variant<TIntervalCheck, TEmptyInterval>; + using TBase::TBase; + using TBase::operator=; + + // Allow brace initialization like {{0, 3}}. + TIntervalCheckOrEmpty(std::initializer_list<i64> init) { + Y_ENSURE(init.size() == 2); + auto it = init.begin(); + i64 min = *it++; + i64 max = *it; + *this = TIntervalCheck{min, max}; + } +}; + +template <typename TElement, ESortOrder SortOrder, typename TRangeElement = TElement> +struct TTestCase { + // Input interval bounds. Requires two: left and right. + TVector<TInputRowWindowFrame> RowIntervals; + TVector<TInputRangeWindowFrame<TRangeElement>> RangeIntervals; + + // Incremental bounds - for tracking only right boundary. + TVector<TInputRange<TRangeElement>> RangeIncrementals; + TVector<TInputRow> RowIncrementals; + + // Element getter function - by default returns element as-is (identity) wrapped in TMaybe. + std::function<TMaybe<TRangeElement>(const TElement&)> ElementGetter = [](const TElement& elem) -> TMaybe<TRangeElement> { + Y_ABORT_UNLESS((std::is_same_v<TElement, TRangeElement>), "ElementGetter must be provided when TRangeElement != TElement"); + return TMaybe<TRangeElement>(elem); + }; + + TVector<std::variant<TYield, TElement>> InputElements; + + // Expected results after each Next() call. + struct TExpectedState { + TElement CurrentElement; + TVector<TElement> QueueContent; + + TVector<TIntervalCheckOrEmpty> RowIntervalChecks; + TVector<TIntervalCheckOrEmpty> RangeIntervalChecks; + TVector<TIntervalCheckOrEmpty> RangeIncrementalChecks; + TVector<TIntervalCheckOrEmpty> RowIncrementalChecks; + }; + + TVector<TExpectedState> ExpectedStates; +}; + +template <typename TElement> +bool ElementsEqual(const TElement& a, const TElement& b) { + if constexpr (std::is_floating_point_v<TElement>) { + if (std::isnan(a) && std::isnan(b)) { + return true; + } + } + return a == b; +} + +template <typename TElement> +TString FormatQueueContent(const TSafeCircularBuffer<TElement>& queue) { + TStringBuilder result; + result << "["; + for (size_t i = 0; i < queue.Size(); ++i) { + if (i > 0) { + result << ", "; + } + result << queue.Get(i); + } + result << "]"; + return result; +} + +// Helper function to run a single test case. +template <typename TElement, ESortOrder SortOrder, typename TRangeElement = TElement> +void RunTestCase(const TTestCase<TElement, SortOrder, TRangeElement>& testCase) { + // Setup bounds. + TCoreWinFrameCollectorBounds<TRangeElement> bounds(/*dedup=*/false); + for (const auto& interval : testCase.RowIntervals) { + bounds.AddRow(interval); + } + for (const auto& interval : testCase.RangeIntervals) { + bounds.AddRange(interval); + } + for (const auto& incremental : testCase.RangeIncrementals) { + bounds.AddRangeIncremental(incremental); + } + for (const auto& incremental : testCase.RowIncrementals) { + bounds.AddRowIncremental(incremental); + } + + // Create queue and stream (unbounded buffer). + TSafeCircularBuffer<TElement> outputQueue(TMaybe<size_t>(), TElement{}); + + size_t inputIdx = 0; + auto stream = [&](TElement& elem) -> EConsumeStatus { + if (inputIdx >= testCase.InputElements.size()) { + return EConsumeStatus::End; + } + const auto& input = testCase.InputElements[inputIdx++]; + if (std::holds_alternative<TYield>(input)) { + return EConsumeStatus::Wait; + } + elem = std::get<TElement>(input); + return EConsumeStatus::Ok; + }; + + // Create current windows structure. + TFrameBoundsIndices currentWindows; + + // Create aggregator using factory method. + auto factory = TCoreWinFramesCollector<TElement, decltype(testCase.ElementGetter), SortOrder>::CreateFactory( + bounds, testCase.ElementGetter); + auto aggregator = factory(outputQueue, stream, currentWindows); + + // Process elements and check states + for (size_t i = 0; i < testCase.ExpectedStates.size(); ++i) { + EConsumeStatus status; + // Keep calling Next() until we get Ok (skipping Wait statuses) + do { + status = aggregator.Next(); + } while (status == EConsumeStatus::Wait); + + UNIT_ASSERT_VALUES_EQUAL_C( + status, + EConsumeStatus::Ok, + TStringBuilder() << "Step: " << (i + 1) << ", Status should be Ok"); + + const auto& expectedState = testCase.ExpectedStates[i]; + + // Check current element + auto currentElement = aggregator.GetCurrentElement(); + UNIT_ASSERT_C( + ElementsEqual(currentElement, expectedState.CurrentElement), + TStringBuilder() << "Step: " << (i + 1) + << ", Current element mismatch. Expected: " << expectedState.CurrentElement + << ", Got: " << currentElement); + + // Check queue content + if (outputQueue.Size() != expectedState.QueueContent.size()) { + UNIT_ASSERT_VALUES_EQUAL_C( + outputQueue.Size(), + expectedState.QueueContent.size(), + TStringBuilder() << "Step: " << (i + 1) + << ", Queue size mismatch. Actual queue: " << FormatQueueContent(outputQueue)); + } + + for (size_t j = 0; j < expectedState.QueueContent.size(); ++j) { + UNIT_ASSERT_C( + ElementsEqual(outputQueue.Get(j), expectedState.QueueContent[j]), + TStringBuilder() << "Step: " << (i + 1) + << ", Queue[" << j << "] mismatch. Expected: " << expectedState.QueueContent[j] + << ", Got: " << outputQueue.Get(j)); + } + + // Helper lambda to check interval + auto checkInterval = [&](const TIntervalCheckOrEmpty& checkVariant, auto getIntervalFunc, size_t idx, const char* intervalType) { + if (std::holds_alternative<TEmptyInterval>(checkVariant)) { + // Empty interval - verify it's actually empty (Min > Max) + auto interval = getIntervalFunc(idx); + UNIT_ASSERT_C( + interval.Min() >= interval.Max(), + TStringBuilder() << "Step: " << (i + 1) + << ", " << intervalType << " " << idx + << " should be empty (Min >= Max), but Min=" << interval.Min() + << ", Max=" << interval.Max()); + return; + } + const auto& check = std::get<TIntervalCheck>(checkVariant); + auto interval = getIntervalFunc(idx); + UNIT_ASSERT_VALUES_EQUAL_C( + interval.Min(), + check.ExpectedMin, + TStringBuilder() << "Step: " << (i + 1) + << ", " << intervalType << " " << idx << " Min"); + UNIT_ASSERT_VALUES_EQUAL_C( + interval.Max(), + check.ExpectedMax, + TStringBuilder() << "Step: " << (i + 1) + << ", " << intervalType << " " << idx << " Max"); + }; + + // Check row intervals + for (size_t idx = 0; idx < expectedState.RowIntervalChecks.size(); ++idx) { + checkInterval(expectedState.RowIntervalChecks[idx], + [&](size_t i) { return currentWindows.GetIntervalInQueueByRow(i); }, + idx, "Row interval"); + } + + // Check range intervals + for (size_t idx = 0; idx < expectedState.RangeIntervalChecks.size(); ++idx) { + checkInterval(expectedState.RangeIntervalChecks[idx], + [&](size_t i) { return currentWindows.GetIntervalInQueueByRange(i); }, + idx, "Range interval"); + } + + // Check range incremental intervals + for (size_t idx = 0; idx < expectedState.RangeIncrementalChecks.size(); ++idx) { + checkInterval(expectedState.RangeIncrementalChecks[idx], + [&](size_t i) { return currentWindows.GetIntervalInQueueByRangeIncremental(i); }, + idx, "Range incremental"); + } + + // Check row incremental intervals + for (size_t idx = 0; idx < expectedState.RowIncrementalChecks.size(); ++idx) { + checkInterval(expectedState.RowIncrementalChecks[idx], + [&](size_t i) { return currentWindows.GetIntervalInQueueByRowIncremental(i); }, + idx, "Row incremental"); + } + } + + // Check that after processing all elements, aggregator returns End + // Skip any remaining Wait statuses and verify we get End + constexpr size_t timesToCheckEnd = 10; + for (size_t j = 0; j < timesToCheckEnd; j++) { + EConsumeStatus finalStatus; + do { + finalStatus = aggregator.Next(); + } while (finalStatus == EConsumeStatus::Wait); + + UNIT_ASSERT_VALUES_EQUAL_C( + finalStatus, + EConsumeStatus::End, + TStringBuilder() << ", After all elements processed, status should be End"); + } +} + +} // namespace NKikimr::NMiniKQL::NTest::NWindow diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part1.cpp b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part1.cpp new file mode 100644 index 00000000000..87d02bba8b4 --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part1.cpp @@ -0,0 +1,714 @@ +#include "mkql_core_win_frames_collector_test_helper.h" + +using namespace NKikimr::NMiniKQL; +using namespace NKikimr::NMiniKQL::NTest::NWindow; + +// clang-format off +Y_UNIT_TEST_SUITE(TCoreWinFramesCollectorTest) { + +Y_UNIT_TEST(BasicRowInterval) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}}, + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 30, + .QueueContent = {10, 20, 30, 40, 50}, + .RowIntervalChecks = {{0, 5}}, + }, + { + .CurrentElement = 40, + .QueueContent = {20, 30, 40, 50}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 50, + .QueueContent = {30, 40, 50}, + .RowIntervalChecks = {{0, 3}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BasicRowIntervalUnimportant) { + TTestCase<ui64, ESortOrder::Unimportant> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}}, + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 30, + .QueueContent = {10, 20, 30, 40, 50}, + .RowIntervalChecks = {{0, 5}}, + }, + { + .CurrentElement = 40, + .QueueContent = {20, 30, 40, 50}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 50, + .QueueContent = {30, 40, 50}, + .RowIntervalChecks = {{0, 3}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BothLeftRowIntervals) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(3, EDirection::Preceding), + TInputRow(2, EDirection::Preceding) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10}, + .RowIntervalChecks = {{0, 0}}, + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20}, + .RowIntervalChecks = {{0, 0}}, + }, + { + .CurrentElement = 30, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 1}}, + }, + { + .CurrentElement = 40, + .QueueContent = {10, 20, 30, 40}, + .RowIntervalChecks = {{0, 2}}, + }, + { + .CurrentElement = 50, + .QueueContent = {20, 30, 40, 50}, + .RowIntervalChecks = {{0, 2}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BothRightRowIntervals) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(1, EDirection::Following), + TInputRow(3, EDirection::Following) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30, 40}, + .RowIntervalChecks = {{1, 4}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30, 40, 50}, + .RowIntervalChecks = {{1, 4}}, + }, + { + .CurrentElement = 30, + .QueueContent = {30, 40, 50}, + .RowIntervalChecks = {{1, 3}}, + }, + { + .CurrentElement = 40, + .QueueContent = {40, 50}, + .RowIntervalChecks = {{1, 2}}, + }, + { + .CurrentElement = 50, + .QueueContent = {50}, + .RowIntervalChecks = {{1, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(MaxValueLeftInterval) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(std::numeric_limits<ui64>::max(), EDirection::Preceding), + TInputRow(0, EDirection::Preceding) + ) + }, + .InputElements = {TYield(), ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10}, + .RowIntervalChecks = {{0, 1}}, + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20}, + .RowIntervalChecks = {{0, 2}}, + }, + { + .CurrentElement = 30, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}}, + }, + { + .CurrentElement = 40, + .QueueContent = {10, 20, 30, 40}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 50, + .QueueContent = {10, 20, 30, 40, 50}, + .RowIntervalChecks = {{0, 5}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(MaxValueRightInterval) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(0, EDirection::Following), + TInputRow(std::numeric_limits<ui32>::max(), EDirection::Following) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30, 40, 50}, + .RowIntervalChecks = {{0, 5}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30, 40, 50}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 30, + .QueueContent = {30, 40, 50}, + .RowIntervalChecks = {{0, 3}}, + }, + { + .CurrentElement = 40, + .QueueContent = {40, 50}, + .RowIntervalChecks = {{0, 2}}, + }, + { + .CurrentElement = 50, + .QueueContent = {50}, + .RowIntervalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BasicRangeInterval) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), TYield(), TYield(), ui64(20), ui64(25), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 25}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {15, 20, 25, 30}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 25, + .QueueContent = {20, 25, 30}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 30, + .QueueContent = {25, 30}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeIntervalPowersOfTwo) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{4, EDirection::Preceding}, + TInputRange<ui64>{4, EDirection::Following} + } + }, + .InputElements = {TYield(), TYield(), ui64(1), TYield(), ui64(2), ui64(4), ui64(8), ui64(16), ui64(32)}, + .ExpectedStates = { + { + .CurrentElement = 1, + .QueueContent = {1, 2, 4, 8}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 2, + .QueueContent = {1, 2, 4, 8}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 4, + .QueueContent = {1, 2, 4, 8, 16}, + .RangeIntervalChecks = {{0, 4}} + }, + { + .CurrentElement = 8, + .QueueContent = {4, 8, 16}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 16, + .QueueContent = {16, 32}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = 32, + .QueueContent = {32}, + .RangeIntervalChecks = {{0, 1}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(SingleElement) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .InputElements = {ui64(42), TYield(), TYield()}, + .ExpectedStates = { + { + .CurrentElement = 42, + .QueueContent = {42}, + .RowIntervalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(EmptyStream) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(YieldOnlyStream) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .InputElements = {TYield(), TYield()}, + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervals_OneInsideAnother) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{10, EDirection::Preceding}, + TInputRange<ui64>{10, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RangeIntervalChecks = { + {0, 2}, + {0, 1} + } + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RangeIntervalChecks = { + {0, 3}, + {1, 2} + } + }, + { + .CurrentElement = 30, + .QueueContent = {20, 30, 40, 50}, + .RangeIntervalChecks = { + {0, 3}, + {1, 2} + } + }, + { + .CurrentElement = 40, + .QueueContent = {30, 40, 50}, + .RangeIntervalChecks = { + {0, 3}, + {1, 2} + } + }, + { + .CurrentElement = 50, + .QueueContent = {40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {1, 2} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervals_PartiallyOverlapLeft) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{15, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{15, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RangeIntervalChecks = { + {0, 1}, + {0, 2} + } + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 30, + .QueueContent = {20, 30, 40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 40, + .QueueContent = {30, 40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 50, + .QueueContent = {40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {1, 2} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervals_CompletelyDisjointLeft) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{30, EDirection::Preceding}, + TInputRange<ui64>{20, EDirection::Preceding} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{10, EDirection::Preceding}, + TInputRange<ui64>{10, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50), ui64(60)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RangeIntervalChecks = { + {0, 0}, + {0, 2} + } + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RangeIntervalChecks = { + {0, 0}, + {0, 3} + } + }, + { + .CurrentElement = 30, + .QueueContent = {10, 20, 30, 40, 50}, + .RangeIntervalChecks = { + {0, 1}, + {1, 4} + } + }, + { + .CurrentElement = 40, + .QueueContent = {10, 20, 30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {2, 5} + } + }, + { + .CurrentElement = 50, + .QueueContent = {20, 30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {2, 5} + } + }, + { + .CurrentElement = 60, + .QueueContent = {30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {2, 4} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervals_CompletelyDisjointRight) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{10, EDirection::Preceding}, + TInputRange<ui64>{10, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{20, EDirection::Following}, + TInputRange<ui64>{30, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50), ui64(60)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30, 40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {2, 4} + } + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 3}, + {3, 5} + } + }, + { + .CurrentElement = 30, + .QueueContent = {20, 30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 3}, + {3, 5} + } + }, + { + .CurrentElement = 40, + .QueueContent = {30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 3}, + {3, 4} + } + }, + { + .CurrentElement = 50, + .QueueContent = {40, 50, 60}, + .RangeIntervalChecks = { + {0, 3}, + {3, 3} + } + }, + { + .CurrentElement = 60, + .QueueContent = {50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {2, 2} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervals_PartiallyOverlapRight) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{15, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Following}, + TInputRange<ui64>{25, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50), ui64(60)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30, 40}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30, 40, 50}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 30, + .QueueContent = {30, 40, 50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 40, + .QueueContent = {40, 50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {1, 3} + } + }, + { + .CurrentElement = 50, + .QueueContent = {50, 60}, + .RangeIntervalChecks = { + {0, 2}, + {1, 2} + } + }, + { + .CurrentElement = 60, + .QueueContent = {60}, + .RangeIntervalChecks = { + {0, 1}, + {1, 1} + } + } + } + }; + + RunTestCase(testCase); +} + +} diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part2.cpp b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part2.cpp new file mode 100644 index 00000000000..136bd075cdc --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part2.cpp @@ -0,0 +1,899 @@ +#include "mkql_core_win_frames_collector_test_helper.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NKikimr::NMiniKQL; +using namespace NKikimr::NMiniKQL::NTest::NWindow; + +// clang-format off +Y_UNIT_TEST_SUITE(TCoreWinFramesCollectorTestPart2) { + +Y_UNIT_TEST(RowAndRangeIntervals_Combined) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(1, EDirection::Preceding), + TInputRow(1, EDirection::Following) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(30), ui64(40)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 30}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {15, 20, 30}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 30, + .QueueContent = {20, 30, 40}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{1, 2}} + }, + { + .CurrentElement = 40, + .QueueContent = {30, 40}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{1, 2}} + } + } + }; + + RunTestCase(testCase); + +} + +Y_UNIT_TEST(RowAndRangeIntervals_RowCompletelyLeft) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(3, EDirection::Preceding), + TInputRow(1, EDirection::Preceding) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(25), ui64(30), ui64(35)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RowIntervalChecks = {{0, 0}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 25}, + .RowIntervalChecks = {{0, 1}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {10, 15, 20, 25, 30}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{1, 4}} + }, + { + .CurrentElement = 25, + .QueueContent = {10, 15, 20, 25, 30, 35}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 5}} + }, + { + .CurrentElement = 30, + .QueueContent = {15, 20, 25, 30, 35}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 5}} + }, + { + .CurrentElement = 35, + .QueueContent = {20, 25, 30, 35}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 4}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowAndRangeIntervals_RowCompletelyRight) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(1, EDirection::Following), + TInputRow(3, EDirection::Following) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(25), ui64(30), ui64(35)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20, 25}, + .RowIntervalChecks = {{1, 4}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 25, 30}, + .RowIntervalChecks = {{2, 5}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {15, 20, 25, 30, 35}, + .RowIntervalChecks = {{2, 5}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 25, + .QueueContent = {20, 25, 30, 35}, + .RowIntervalChecks = {{2, 4}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 30, + .QueueContent = {25, 30, 35}, + .RowIntervalChecks = {{2, 3}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 35, + .QueueContent = {30, 35}, + .RowIntervalChecks = {{2, 2}}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowAndRangeIntervals_RowPartiallyLeft) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(0, EDirection::Following) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{3, EDirection::Preceding}, + TInputRange<ui64>{7, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(25), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RowIntervalChecks = {{0, 1}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 25}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{1, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {10, 15, 20, 25, 30}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 4}} + }, + { + .CurrentElement = 25, + .QueueContent = {15, 20, 25, 30}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 4}} + }, + { + .CurrentElement = 30, + .QueueContent = {20, 25, 30}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{2, 3}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowAndRangeIntervals_RowPartiallyRight) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(0, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{7, EDirection::Preceding}, + TInputRange<ui64>{3, EDirection::Following} + } + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(25), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = 15, + .QueueContent = {10, 15, 20, 25}, + .RowIntervalChecks = {{1, 4}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 20, + .QueueContent = {15, 20, 25, 30}, + .RowIntervalChecks = {{1, 4}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 25, + .QueueContent = {20, 25, 30}, + .RowIntervalChecks = {{1, 3}}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 30, + .QueueContent = {25, 30}, + .RowIntervalChecks = {{1, 2}}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowInterval_Uint64MaxValuesRow) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(std::numeric_limits<ui32>::max(), EDirection::Preceding), + TInputRow(std::numeric_limits<ui32>::max(), EDirection::Following) + ) + }, + .InputElements = {ui64(0), std::numeric_limits<ui64>::max()}, + .ExpectedStates = { + { + .CurrentElement = 0, + .QueueContent = {0, std::numeric_limits<ui64>::max()}, + .RowIntervalChecks = {{0, 2}}, + }, + { + .CurrentElement = std::numeric_limits<ui64>::max(), + .QueueContent = {0, std::numeric_limits<ui64>::max()}, + .RowIntervalChecks = {{0, 2}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowInterval_Uint64MaxValuesRange) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>( + TInputRange<ui64>(std::numeric_limits<ui64>::max(), EDirection::Preceding), + TInputRange<ui64>(std::numeric_limits<ui64>::max(), EDirection::Following) + ) + }, + .InputElements = {ui64(0), std::numeric_limits<ui64>::max()}, + .ExpectedStates = { + { + .CurrentElement = 0, + .QueueContent = {0, std::numeric_limits<ui64>::max()}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = std::numeric_limits<ui64>::max(), + .QueueContent = {0, std::numeric_limits<ui64>::max()}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_FloatExtremeValues) { + float negMax = -std::numeric_limits<float>::max(); + float posMax = std::numeric_limits<float>::max(); + float nan1 = std::numeric_limits<float>::quiet_NaN(); + float nan2 = std::numeric_limits<float>::quiet_NaN(); + + TTestCase<float, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<float>{ + TInputRange<float>{100, EDirection::Preceding}, + TInputRange<float>{100, EDirection::Following} + } + }, + .InputElements = {negMax, posMax, nan1, nan2}, + .ExpectedStates = { + { + .CurrentElement = negMax, + .QueueContent = {negMax, posMax}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = posMax, + .QueueContent = {posMax, nan1}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = nan1, + .QueueContent = {nan1, nan2}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = nan2, + .QueueContent = {nan1, nan2}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + + +Y_UNIT_TEST(TwoRangeEmptyIntervals_BothDirections_Asc) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Following}, + TInputRange<ui64>{6, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{6, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Preceding} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20}, + .RangeIntervalChecks = { + {1, 1}, + {0, 0} + } + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30}, + .RangeIntervalChecks = { + {1, 1}, + {0, 0} + } + }, + { + .CurrentElement = 30, + .QueueContent = {30, 40}, + .RangeIntervalChecks = { + {1, 1}, + {0, 0} + } + }, + { + .CurrentElement = 40, + .QueueContent = {40}, + .RangeIntervalChecks = { + {1, 1}, + {0, 0} + } + } + } + }; + + RunTestCase(testCase); +} + + +Y_UNIT_TEST(TwoRangeEmptyIntervals_LargeOffsets_Asc) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{14, EDirection::Following}, + TInputRange<ui64>{15, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{15, EDirection::Preceding}, + TInputRange<ui64>{14, EDirection::Preceding} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RangeIntervalChecks = { + {2, 2}, + {0, 0} + } + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30, 40}, + .RangeIntervalChecks = { + {3, 3}, + {0, 0} + } + }, + { + .CurrentElement = 30, + .QueueContent = {20, 30, 40}, + .RangeIntervalChecks = { + {3, 3}, + {0, 0} + } + }, + { + .CurrentElement = 40, + .QueueContent = {30, 40}, + .RangeIntervalChecks = { + {2, 2}, + {0, 0} + } + } + } + }; + + RunTestCase(testCase); +} + + +Y_UNIT_TEST(RangeInterval_InvalidMinMaxOrder_Asc) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{10, EDirection::Following}, + TInputRange<ui64>{10, EDirection::Preceding} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10}, + .RangeIntervalChecks = { + {1, 0} + } + }, + { + .CurrentElement = 20, + .QueueContent = {20}, + .RangeIntervalChecks = { + {1, 0} + } + }, + { + .CurrentElement = 30, + .QueueContent = {30}, + .RangeIntervalChecks = { + {1, 0} + } + } + } + }; + + RunTestCase(testCase); +} + + +Y_UNIT_TEST(RangeInterval_InvalidMinMaxOrderLarge_Asc) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{25, EDirection::Following}, + TInputRange<ui64>{25, EDirection::Preceding} + } + }, + .InputElements = {ui64(10), ui64(20), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10}, + .RangeIntervalChecks = { + {1, 0} + } + }, + { + .CurrentElement = 20, + .QueueContent = {20}, + .RangeIntervalChecks = { + {1, 0} + } + }, + { + .CurrentElement = 30, + .QueueContent = {30}, + .RangeIntervalChecks = { + {1, 0} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BasicRowInterval_Desc) { + TTestCase<ui64, ESortOrder::Desc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(2, EDirection::Preceding), + TInputRow(2, EDirection::Following) + ) + }, + .InputElements = {ui64(50), ui64(40), ui64(30), ui64(20), ui64(10)}, + .ExpectedStates = { + { + .CurrentElement = 50, + .QueueContent = {50, 40, 30}, + .RowIntervalChecks = {{0, 3}}, + }, + { + .CurrentElement = 40, + .QueueContent = {50, 40, 30, 20}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 30, + .QueueContent = {50, 40, 30, 20, 10}, + .RowIntervalChecks = {{0, 5}}, + }, + { + .CurrentElement = 20, + .QueueContent = {40, 30, 20, 10}, + .RowIntervalChecks = {{0, 4}}, + }, + { + .CurrentElement = 10, + .QueueContent = {30, 20, 10}, + .RowIntervalChecks = {{0, 3}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(BasicRangeInterval_Desc) { + TTestCase<ui64, ESortOrder::Desc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(30), ui64(25), TYield(), ui64(20), ui64(15), TYield(), ui64(10)}, + .ExpectedStates = { + { + .CurrentElement = 30, + .QueueContent = {30, 25, 20}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = 25, + .QueueContent = {30, 25, 20, 15}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 20, + .QueueContent = {25, 20, 15, 10}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 15, + .QueueContent = {20, 15, 10}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 10, + .QueueContent = {15, 10}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowAndRangeIntervals_Desc) { + TTestCase<ui64, ESortOrder::Desc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(1, EDirection::Preceding), + TInputRow(1, EDirection::Following) + ) + }, + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{5, EDirection::Following} + } + }, + .InputElements = {ui64(40), ui64(30), ui64(20), ui64(15), ui64(10)}, + .ExpectedStates = { + { + .CurrentElement = 40, + .QueueContent = {40, 30}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = 30, + .QueueContent = {40, 30, 20}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{1, 2}} + }, + { + .CurrentElement = 20, + .QueueContent = {30, 20, 15, 10}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{1, 3}} + }, + { + .CurrentElement = 15, + .QueueContent = {20, 15, 10}, + .RowIntervalChecks = {{0, 3}}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 10, + .QueueContent = {15, 10}, + .RowIntervalChecks = {{0, 2}}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_StringLength) { + TTestCase<TString, ESortOrder::Asc, size_t> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<size_t>{ + TInputRange<size_t>{1, EDirection::Preceding}, + TInputRange<size_t>{0, EDirection::Following} + } + }, + .ElementGetter = [](const TString& str) -> TMaybe<size_t> { return TMaybe<size_t>(str.size()); }, + .InputElements = {TString("a"), TString("aa"), TString("aaa"), TString("aaaa")}, + .ExpectedStates = { + { + .CurrentElement = "a", + .QueueContent = {"a", "aa"}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = "aa", + .QueueContent = {"a", "aa", "aaa"}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = "aaa", + .QueueContent = {"aa", "aaa", "aaaa"}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = "aaaa", + .QueueContent = {"aaa", "aaaa"}, + .RangeIntervalChecks = {{0, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithOptionals_Asc) { + TTestCase<TMaybe<ui64>, ESortOrder::Asc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{0, EDirection::Following}, + TInputRange<ui64>{1, EDirection::Following} + } + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(2), TMaybe<ui64>(3)}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1), TMaybe<ui64>(2), TMaybe<ui64>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(2), + .QueueContent = {TMaybe<ui64>(2), TMaybe<ui64>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(3), + .QueueContent = {TMaybe<ui64>(3)}, + .RangeIntervalChecks = {{0, 1}} + } + } + }; + + RunTestCase(testCase); +} + + +Y_UNIT_TEST(RangeInterval_WithOptionals_Desc) { + TTestCase<TMaybe<ui8>, ESortOrder::Desc, ui8> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui8>{ + TInputRange<ui8>{2, EDirection::Following}, + TInputRange<ui8>{10, EDirection::Following} + } + }, + .ElementGetter = [](const TMaybe<ui8>& elem) -> TMaybe<ui8> { + return elem; + }, + .InputElements = {TMaybe<ui8>(), TMaybe<ui8>(), TMaybe<ui8>(3), TMaybe<ui8>(2), TMaybe<ui8>(1)}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui8>(), + .QueueContent = {TMaybe<ui8>(), TMaybe<ui8>(), TMaybe<ui8>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui8>(), + .QueueContent = {TMaybe<ui8>(), TMaybe<ui8>(), TMaybe<ui8>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui8>(3), + .QueueContent = {TMaybe<ui8>(3), TMaybe<ui8>(2), TMaybe<ui8>(1)}, + .RangeIntervalChecks = {{2, 3}} + }, + { + .CurrentElement = TMaybe<ui8>(2), + .QueueContent = {TMaybe<ui8>(2), TMaybe<ui8>(1)}, + .RangeIntervalChecks = {{2, 2}} + }, + { + .CurrentElement = TMaybe<ui8>(1), + .QueueContent = {TMaybe<ui8>(1)}, + .RangeIntervalChecks = {{1, 1}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_RepeatedElements) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{10, EDirection::Preceding}, + TInputRange<ui64>{0, EDirection::Following} + } + }, + .InputElements = {ui64(1), ui64(2), ui64(2), ui64(4)}, + .ExpectedStates = { + { + .CurrentElement = 1, + .QueueContent = {1, 2}, + .RangeIntervalChecks = {{0, 1}} + }, + { + .CurrentElement = 2, + .QueueContent = {1, 2, 2, 4}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 2, + .QueueContent = {1, 2, 2, 4}, + .RangeIntervalChecks = {{0, 3}} + }, + { + .CurrentElement = 4, + .QueueContent = {1, 2, 2, 4}, + .RangeIntervalChecks = {{0, 4}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithOptionals_Desc_EmptyInterval) { + TTestCase<TMaybe<ui8>, ESortOrder::Desc, ui8> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui8>{ + TInputRange<ui8>{10, EDirection::Following}, + TInputRange<ui8>{2, EDirection::Following} + } + }, + .ElementGetter = [](const TMaybe<ui8>& elem) -> TMaybe<ui8> { + return elem; + }, + .InputElements = {TMaybe<ui8>(), TMaybe<ui8>(), TMaybe<ui64>(3), TMaybe<ui64>(2), TMaybe<ui64>(1)}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(3)}, + .RangeIntervalChecks = {{0, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(3), + .QueueContent = {TMaybe<ui64>(3), TMaybe<ui64>(2), TMaybe<ui64>(1)}, + .RangeIntervalChecks = {{3, 3}} + }, + { + .CurrentElement = TMaybe<ui64>(2), + .QueueContent = {TMaybe<ui64>(2), TMaybe<ui64>(1)}, + .RangeIntervalChecks = {{2, 2}} + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1)}, + .RangeIntervalChecks = {{1, 1}} + } + } + }; + + RunTestCase(testCase); +} + +} diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part3.cpp b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part3.cpp new file mode 100644 index 00000000000..2b66ad7b003 --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part3.cpp @@ -0,0 +1,556 @@ +#include "mkql_core_win_frames_collector_test_helper.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NKikimr::NMiniKQL; +using namespace NKikimr::NMiniKQL::NTest::NWindow; + +// clang-format off +Y_UNIT_TEST_SUITE(TCoreWinFramesCollectorTestPart3) { + +Y_UNIT_TEST(RangeInterval_WithNulls_MultipleIntervals_Asc) { + TTestCase<TMaybe<ui64>, ESortOrder::Asc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Preceding}, + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Preceding}, + TInputRange<ui64>{3, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{3, EDirection::Following} + } + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {0, 6}, + {0, 2}, + {0, 2} + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {0, 6}, + {0, 2}, + {0, 2} + } + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {2, 6}, + {0, 4}, + {2, 4} + } + }, + { + .CurrentElement = TMaybe<ui64>(4), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {2, 6}, + {0, 5}, + {2, 5} + } + }, + { + .CurrentElement = TMaybe<ui64>(7), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {3, 6}, + {0, 5}, + {3, 5} + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1), TMaybe<ui64>(4), TMaybe<ui64>(7), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {5, 6}, + {0, 6}, + {5, 6} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNullsLast_Following_Desc) { + TTestCase<TMaybe<ui64>, ESortOrder::Desc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Following}, + TInputRange<ui64>{7, EDirection::Following} + }, + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(1), TMaybe<ui64>(), TMaybe<ui64>()}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 2}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNullsFirst_Following_Asc) { + TTestCase<TMaybe<ui64>, ESortOrder::Asc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Following}, + TInputRange<ui64>{7, EDirection::Following} + }, + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNullsLast_Preceding_Asc) { + TTestCase<TMaybe<ui64>, ESortOrder::Desc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{7, EDirection::Preceding} + }, + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(1), TMaybe<ui64>(), TMaybe<ui64>()}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 2}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNullsFirst_Preceding_Asc) { + TTestCase<TMaybe<ui64>, ESortOrder::Asc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{7, EDirection::Preceding} + }, + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(1)}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNaNsLast_Following_Desc) { + const float NaN = std::numeric_limits<float>::quiet_NaN(); + + TTestCase<float, ESortOrder::Desc, float> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<float>{ + TInputRange<float>{5.0f, EDirection::Following}, + TInputRange<float>{7.0f, EDirection::Following} + }, + }, + .ElementGetter = [](float elem) -> float { + return elem; + }, + .InputElements = {1.0f, NaN, NaN}, + .ExpectedStates = { + { + .CurrentElement = 1.0f, + .QueueContent = {1.0f, NaN}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN}, + .RangeIntervalChecks = { + {0, 2}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNaNsFirst_Following_Asc) { + const float NaN = std::numeric_limits<float>::quiet_NaN(); + + TTestCase<float, ESortOrder::Asc, float> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<float>{ + TInputRange<float>{5.0f, EDirection::Following}, + TInputRange<float>{7.0f, EDirection::Following} + }, + }, + .ElementGetter = [](float elem) -> float { + return elem; + }, + .InputElements = {NaN, NaN, 1.0f}, + .ExpectedStates = { + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN, 1.0f}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN, 1.0f}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = 1.0f, + .QueueContent = {1.0f}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNaNsLast_Preceding_Asc) { + const float NaN = std::numeric_limits<float>::quiet_NaN(); + + // Сохраняю тот же порядок сортировки (Desc), как и в исходном тесте + TTestCase<float, ESortOrder::Desc, float> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<float>{ + TInputRange<float>{5.0f, EDirection::Preceding}, + TInputRange<float>{7.0f, EDirection::Preceding} + }, + }, + .ElementGetter = [](float elem) -> float { + return elem; + }, + .InputElements = {1.0f, NaN, NaN}, + .ExpectedStates = { + { + .CurrentElement = 1.0f, + .QueueContent = {1.0f}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN}, + .RangeIntervalChecks = { + {0, 2}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNaNsFirst_Preceding_Asc) { + const float NaN = std::numeric_limits<float>::quiet_NaN(); + + TTestCase<float, ESortOrder::Asc, float> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<float>{ + TInputRange<float>{5.0f, EDirection::Preceding}, + TInputRange<float>{7.0f, EDirection::Preceding} + }, + }, + .ElementGetter = [](float elem) -> float { + return elem; + }, + .InputElements = {NaN, NaN, 1.0f}, + .ExpectedStates = { + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN, 1.0f}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = NaN, + .QueueContent = {NaN, NaN, 1.0f}, + .RangeIntervalChecks = { + {0, 2}, + } + }, + { + .CurrentElement = 1.0f, + .QueueContent = {1.0f}, + .RangeIntervalChecks = { + TEmptyInterval{}, + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeInterval_WithNulls_MultipleIntervals_Desc) { + TTestCase<TMaybe<ui64>, ESortOrder::Desc, ui64> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Preceding}, + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{TInputRange<ui64>::TUnbounded{}, EDirection::Preceding}, + TInputRange<ui64>{3, EDirection::Following} + }, + TInputRangeWindowFrame<ui64>{ + TInputRange<ui64>{5, EDirection::Preceding}, + TInputRange<ui64>{3, EDirection::Following} + } + }, + .ElementGetter = [](const TMaybe<ui64>& elem) -> TMaybe<ui64> { + return elem; + }, + .InputElements = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .ExpectedStates = { + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {0, 6}, + {0, 2}, + {0, 2} + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {0, 6}, + {0, 2}, + {0, 2} + } + }, + { + .CurrentElement = TMaybe<ui64>(7), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {2, 6}, + {0, 4}, + {2, 4} + } + }, + { + .CurrentElement = TMaybe<ui64>(4), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {2, 6}, + {0, 5}, + {2, 5} + } + }, + { + .CurrentElement = TMaybe<ui64>(1), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {3, 6}, + {0, 5}, + {3, 5} + } + }, + { + .CurrentElement = TMaybe<ui64>(), + .QueueContent = {TMaybe<ui64>(), TMaybe<ui64>(), TMaybe<ui64>(7), TMaybe<ui64>(4), TMaybe<ui64>(1), TMaybe<ui64>()}, + .RangeIntervalChecks = { + {0, 6}, + {5, 6}, + {0, 6}, + {5, 6} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowIntervalUnbounded) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(TInputRow::TUnbounded{}, EDirection::Preceding), + TInputRow(TInputRow::TUnbounded{}, EDirection::Following) + ), + TInputRowWindowFrame( + TInputRow(1, EDirection::Preceding), + TInputRow(1, EDirection::Preceding) + ) + }, + .InputElements = {ui64(10), ui64(20), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}, {0, 0}}, + }, + { + .CurrentElement = 20, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}, {0, 1}}, + },{ + .CurrentElement = 30, + .QueueContent = {10, 20, 30}, + .RowIntervalChecks = {{0, 3}, {1, 2}}, + }, + } + }; + + RunTestCase(testCase); +} + +} diff --git a/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part4.cpp b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part4.cpp new file mode 100644 index 00000000000..acead4ed333 --- /dev/null +++ b/yql/essentials/minikql/mkql_core_win_frames_collector_ut_part4.cpp @@ -0,0 +1,391 @@ +#include "mkql_core_win_frames_collector_test_helper.h" + +using namespace NKikimr::NMiniKQL; +using namespace NKikimr::NMiniKQL::NTest::NWindow; + +// clang-format off +Y_UNIT_TEST_SUITE(TCoreWinFramesCollectorTestPart4) { + +Y_UNIT_TEST(RangeIncremental_BasicIncremental) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{5, EDirection::Following} + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(25), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 15, + .QueueContent = {15, 20, 25}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20, 25, 30}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 25, + .QueueContent = {25, 30}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 30, + .QueueContent = {30}, + .RangeIncrementalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowIncremental_BasicIncremental) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RowIncrementals = { + TInputRow{2, EDirection::Following} + }, + .InputElements = {ui64(10), ui64(20), ui64(30), ui64(40), ui64(50)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 20, 30}, + .RowIncrementalChecks = {{2, 3}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30, 40}, + .RowIncrementalChecks = {{2, 3}}, + }, + { + .CurrentElement = 30, + .QueueContent = {30, 40, 50}, + .RowIncrementalChecks = {{2, 3}}, + }, + { + .CurrentElement = 40, + .QueueContent = {40, 50}, + .RowIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 50, + .QueueContent = {50}, + .RowIncrementalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeIncremental_WithGaps) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{10, EDirection::Following} + }, + .InputElements = {ui64(10), ui64(25), ui64(40), ui64(55)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 25}, + .RangeIncrementalChecks = {{0, 1}}, + }, + { + .CurrentElement = 25, + .QueueContent = {25, 40}, + .RangeIncrementalChecks = {{0, 1}}, + }, + { + .CurrentElement = 40, + .QueueContent = {40, 55}, + .RangeIncrementalChecks = {{0, 1}}, + }, + { + .CurrentElement = 55, + .QueueContent = {55}, + .RangeIncrementalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeIncremental_MultipleNewElements) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{15, EDirection::Following} + }, + .InputElements = {ui64(10), ui64(12), ui64(14), ui64(16), ui64(18), ui64(20)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 12, 14, 16, 18, 20}, + .RangeIncrementalChecks = {{5, 6}}, + }, + { + .CurrentElement = 12, + .QueueContent = {12, 14, 16, 18, 20}, + .RangeIncrementalChecks = {{4, 5}}, + }, + { + .CurrentElement = 14, + .QueueContent = {14, 16, 18, 20}, + .RangeIncrementalChecks = {{3, 4}}, + }, + { + .CurrentElement = 16, + .QueueContent = {16, 18, 20}, + .RangeIncrementalChecks = {{2, 3}}, + }, + { + .CurrentElement = 18, + .QueueContent = {18, 20}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20}, + .RangeIncrementalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(CombinedDelta_RangeAndRow) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{10, EDirection::Following} + }, + .RowIncrementals = { + TInputRow{1, EDirection::Following} + }, + .InputElements = {ui64(10), ui64(15), ui64(20), ui64(30)}, + .ExpectedStates = { + { + .CurrentElement = 10, + .QueueContent = {10, 15, 20, 30}, + .RangeIncrementalChecks = {{2, 3}}, + .RowIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = 15, + .QueueContent = {15, 20, 30}, + .RangeIncrementalChecks = {{1, 2}}, + .RowIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = 20, + .QueueContent = {20, 30}, + .RangeIncrementalChecks = {{1, 2}}, + .RowIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = 30, + .QueueContent = {30}, + .RangeIncrementalChecks = {{0, 1}}, + .RowIncrementalChecks = {{0, 1}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeIncremental_DescendingOrder) { + TTestCase<ui64, ESortOrder::Desc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{5, EDirection::Following} + }, + .InputElements = {ui64(30), TYield(), TYield(), ui64(25), ui64(20), ui64(15), ui64(10)}, + .ExpectedStates = { + { + .CurrentElement = 30, + .QueueContent = {30, 25, 20}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 25, + .QueueContent = {25, 20, 15}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 20, + .QueueContent = {20, 15, 10}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 15, + .QueueContent = {15, 10}, + .RangeIncrementalChecks = {{1, 2}}, + }, + { + .CurrentElement = 10, + .QueueContent = {10}, + .RangeIncrementalChecks = {{0, 1}}, + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(TwoRangeIntervalsAndTwoRowIntervalsDeltas_MixedDirections) { + TTestCase<ui64, ESortOrder::Asc> testCase = { + .RangeIncrementals = { + TInputRange<ui64>{3, EDirection::Following}, + TInputRange<ui64>{5, EDirection::Preceding}, + }, + .RowIncrementals = { + TInputRow(1, EDirection::Following), + TInputRow(1, EDirection::Preceding), + }, + .InputElements = {ui64(1), ui64(2), ui64(4), ui64(8), TYield()}, + .ExpectedStates = { + { + .CurrentElement = 1, + .QueueContent = {1, 2, 4, 8}, + .RangeIncrementalChecks = { + {2, 3}, + TEmptyInterval{} + }, + .RowIncrementalChecks = { + {1, 2}, + TEmptyInterval{} + }, + }, + { + .CurrentElement = 2, + .QueueContent = {1, 2, 4, 8}, + .RangeIncrementalChecks = { + {2, 3}, + TEmptyInterval{} + }, + .RowIncrementalChecks = { + {2, 3}, + {0, 1} + }, + }, + { + .CurrentElement = 4, + .QueueContent = {1, 2, 4, 8}, + .RangeIncrementalChecks = { + {2, 3}, + TEmptyInterval{} + }, + .RowIncrementalChecks = { + {3, 4}, + {1, 2} + }, + }, + { + .CurrentElement = 8, + .QueueContent = {2, 4, 8}, + .RangeIncrementalChecks = { + {2, 3}, + {0, 1} + }, + .RowIncrementalChecks = { + {2, 3}, + {1, 2} + } + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RowIntervalAndRowDelta_Desc) { + TTestCase<i64, ESortOrder::Desc> testCase = { + .RowIntervals = { + TInputRowWindowFrame( + TInputRow(1, EDirection::Preceding), + TInputRow(1, EDirection::Following) + ) + }, + .RowIncrementals = { + TInputRow{1, EDirection::Following} + }, + .InputElements = {TYield(), i64(8), i64(5), TYield(), i64(2), i64(-5)}, + .ExpectedStates = { + { + .CurrentElement = 8, + .QueueContent = {8, 5}, + .RowIntervalChecks = {{0, 2}}, + .RowIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = 5, + .QueueContent = {8, 5, 2}, + .RowIntervalChecks = {{0, 3}}, + .RowIncrementalChecks = {{2, 3}} + }, + { + .CurrentElement = 2, + .QueueContent = {5, 2, -5}, + .RowIntervalChecks = {{0, 3}}, + .RowIncrementalChecks = {{2, 3}} + }, + { + .CurrentElement = -5, + .QueueContent = {2, -5}, + .RowIntervalChecks = {{0, 2}}, + .RowIncrementalChecks = {{1, 2}} + } + } + }; + + RunTestCase(testCase); +} + +Y_UNIT_TEST(RangeIntervalAndRangeDelta_Desc) { + TTestCase<i64, ESortOrder::Desc> testCase = { + .RangeIntervals = { + TInputRangeWindowFrame<i64>{ + TInputRange<i64>{3, EDirection::Preceding}, + TInputRange<i64>{3, EDirection::Following} + } + }, + .RangeIncrementals = { + TInputRange<i64>{3, EDirection::Following} + }, + .InputElements = {i64(8), i64(5), TYield(), i64(2), i64(-5)}, + .ExpectedStates = { + { + .CurrentElement = 8, + .QueueContent = {8, 5, 2}, + .RangeIntervalChecks = {{0, 2}}, + .RangeIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = 5, + .QueueContent = {8, 5, 2, -5}, + .RangeIntervalChecks = {{0, 3}}, + .RangeIncrementalChecks = {{2, 3}} + }, + { + .CurrentElement = 2, + .QueueContent = {5, 2, -5}, + .RangeIntervalChecks = {{0, 2}}, + .RangeIncrementalChecks = {{1, 2}} + }, + { + .CurrentElement = -5, + .QueueContent = {-5}, + .RangeIntervalChecks = {{0, 1}}, + .RangeIncrementalChecks = {{0, 1}} + } + } + }; + + RunTestCase(testCase); +} + +} diff --git a/yql/essentials/minikql/mkql_program_builder.cpp b/yql/essentials/minikql/mkql_program_builder.cpp index 54a4523801a..2788580a86f 100644 --- a/yql/essentials/minikql/mkql_program_builder.cpp +++ b/yql/essentials/minikql/mkql_program_builder.cpp @@ -3679,6 +3679,55 @@ TRuntimeNode TProgramBuilder::PreserveStream(TRuntimeNode stream, TRuntimeNode q return TRuntimeNode(callableBuilder.Build(), false); } +TRuntimeNode TProgramBuilder::WinFramesCollector(TRuntimeNode stream, TRuntimeNode storage, TRuntimeNode winBounds) { + auto streamType = AS_TYPE(TStreamType, stream); + auto storageType = AS_TYPE(TResourceType, storage); + auto winBoundsType = AS_TYPE(TStructType, winBounds); + MKQL_ENSURE(winBoundsType != nullptr, "WinFramesCollector: winBounds must be struct literal."); + const auto tag = storageType->GetTag(); + MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "WinFramesCollector: Expected queue resource."); + TCallableBuilder callableBuilder(Env_, __func__, streamType); + callableBuilder.Add(stream); + callableBuilder.Add(storage); + callableBuilder.Add(winBounds); + return TRuntimeNode(callableBuilder.Build(), /*isImmediate=*/false); +} + +TRuntimeNode TProgramBuilder::WinFrame(TRuntimeNode queue, + TRuntimeNode handle, + TRuntimeNode isIncremental, + TRuntimeNode isRange, + TRuntimeNode isSingleElement, + const TArrayRef<const TRuntimeNode>& dependentNodes, + TType* returnType) { + auto queueType = AS_TYPE(TResourceType, queue); + auto handleType = AS_TYPE(TDataType, handle); + MKQL_ENSURE(handleType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "WinFrame: handle must be ui64"); + + auto isIncrementalType = AS_TYPE(TDataType, isIncremental); + MKQL_ENSURE(isIncrementalType->GetSchemeType() == NUdf::TDataType<bool>::Id, "WinFrame: isIncremental must be bool"); + + auto isRangeType = AS_TYPE(TDataType, isRange); + MKQL_ENSURE(isRangeType->GetSchemeType() == NUdf::TDataType<bool>::Id, "WinFrame: isRange must be bool"); + + auto isSingleElementType = AS_TYPE(TDataType, isSingleElement); + MKQL_ENSURE(isSingleElementType->GetSchemeType() == NUdf::TDataType<bool>::Id, "WinFrame: isSingleElement must be bool"); + + const auto tag = queueType->GetTag(); + MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "WinFrame: Expected Queue resource"); + + TCallableBuilder callableBuilder(Env_, __func__, returnType); + callableBuilder.Add(queue); + callableBuilder.Add(handle); + callableBuilder.Add(isIncremental); + callableBuilder.Add(isRange); + callableBuilder.Add(isSingleElement); + for (auto node : dependentNodes) { + callableBuilder.Add(node); + } + return TRuntimeNode(callableBuilder.Build(), false); +} + TRuntimeNode TProgramBuilder::Seq(const TArrayRef<const TRuntimeNode>& args, TType* returnType) { TCallableBuilder callableBuilder(Env_, __func__, returnType); for (auto node : args) { diff --git a/yql/essentials/minikql/mkql_program_builder.h b/yql/essentials/minikql/mkql_program_builder.h index fad2ca51ec9..10e4411b2f4 100644 --- a/yql/essentials/minikql/mkql_program_builder.h +++ b/yql/essentials/minikql/mkql_program_builder.h @@ -686,7 +686,16 @@ public: TRuntimeNode QueuePeek(TRuntimeNode resource, TRuntimeNode index, const TArrayRef<const TRuntimeNode>& dependentNodes, TType* returnType); TRuntimeNode QueueRange(TRuntimeNode resource, TRuntimeNode begin, TRuntimeNode end, const TArrayRef<const TRuntimeNode>& dependentNodes, TType* returnType); - TRuntimeNode PreserveStream(TRuntimeNode stream, TRuntimeNode preserve, TRuntimeNode outpace); + TRuntimeNode PreserveStream(TRuntimeNode stream, TRuntimeNode queue, TRuntimeNode outpace); + + TRuntimeNode WinFramesCollector(TRuntimeNode stream, TRuntimeNode storage, TRuntimeNode winBounds); + TRuntimeNode WinFrame(TRuntimeNode queue, + TRuntimeNode handle, + TRuntimeNode isIncremental, + TRuntimeNode isRange, + TRuntimeNode isSignleElement, + const TArrayRef<const TRuntimeNode>& dependentNodes, + TType* returnType); TRuntimeNode Seq(const TArrayRef<const TRuntimeNode>& items, TType* returnType); diff --git a/yql/essentials/minikql/mkql_runtime_version.h b/yql/essentials/minikql/mkql_runtime_version.h index 59e40802240..e132061b337 100644 --- a/yql/essentials/minikql/mkql_runtime_version.h +++ b/yql/essentials/minikql/mkql_runtime_version.h @@ -24,7 +24,7 @@ namespace NMiniKQL { // 1. Bump this version every time incompatible runtime nodes are introduced. // 2. Make sure you provide runtime node generation for previous runtime versions. #ifndef MKQL_RUNTIME_VERSION - #define MKQL_RUNTIME_VERSION 71U + #define MKQL_RUNTIME_VERSION 72U #endif // History: diff --git a/yql/essentials/minikql/mkql_saturated_math.h b/yql/essentials/minikql/mkql_saturated_math.h new file mode 100644 index 00000000000..de817e0fd63 --- /dev/null +++ b/yql/essentials/minikql/mkql_saturated_math.h @@ -0,0 +1,67 @@ +#pragma once + +#include <yql/essentials/core/sql_types/window_direction.h> +#include <yql/essentials/public/decimal/yql_decimal.h> + +#include <util/system/compiler.h> +#include <util/system/types.h> +#include <util/system/yassert.h> + +#include <cmath> +#include <limits> +#include <type_traits> + +namespace NKikimr::NMiniKQL { + +using NYql::NWindow::EDirection; + +// clang-format off +template <class T> +using TNextWiderSigned = + std::conditional_t<sizeof(T) == sizeof(i8), i16, + std::conditional_t<sizeof(T) == sizeof(i16), i32, + std::conditional_t<sizeof(T) == sizeof(i32), i64, + NYql::NDecimal::TInt128>>>; +// clang-format on + +enum class EInfBoundary { + Left, + Right, +}; + +// InfBoundary == Right -> [boundary, +inf) +// InfBoundary == Left -> (-inf, boundary] +template <EInfBoundary InfBoundary, class T> +Y_FORCE_INLINE constexpr bool IsBelongToInterval(EDirection dir, T from, T delta, T x) { + if constexpr (std::is_floating_point_v<T>) { + Y_DEBUG_ABORT_UNLESS(!std::isnan(delta)); + Y_DEBUG_ABORT_UNLESS(!std::isnan(from)); + Y_DEBUG_ABORT_UNLESS(!std::isnan(x)); + const T b = (dir == EDirection::Following) ? (from + delta) : (from - delta); + if constexpr (InfBoundary == EInfBoundary::Right) { + return x >= b; + } else { + return x <= b; + } + } else { + static_assert(std::is_integral_v<T>, "T must be integral or floating"); + static_assert(!std::is_same_v<T, bool>, "bool is not supported"); + static_assert(sizeof(T) == sizeof(i8) || sizeof(T) == sizeof(i16) || + sizeof(T) == sizeof(i32) || sizeof(T) == sizeof(i64), + "Only 8/16/32/64-bit integers are supported"); + + using W = TNextWiderSigned<T>; + const W wf = static_cast<W>(from); + const W wd = static_cast<W>(delta); + const W wx = static_cast<W>(x); + const W b = (dir == EDirection::Following) ? (wf + wd) : (wf - wd); + + if constexpr (InfBoundary == EInfBoundary::Right) { + return wx >= b; // [b, +inf) + } else { + return wx <= b; // (-inf, b] + } + } +} + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/mkql_saturated_math_ut.cpp b/yql/essentials/minikql/mkql_saturated_math_ut.cpp new file mode 100644 index 00000000000..1921921b322 --- /dev/null +++ b/yql/essentials/minikql/mkql_saturated_math_ut.cpp @@ -0,0 +1,107 @@ +#include "mkql_saturated_math.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <limits> + +namespace NKikimr::NMiniKQL { + +Y_UNIT_TEST_SUITE(SaturatedMathTest) { + +Y_UNIT_TEST(IsBelongToInterval_RightDirection_Normal) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, 5, 15)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, 5, 10)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, 5, 0)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, 5, 16)); +} + +Y_UNIT_TEST(IsBelongToInterval_RightDirection_Overflow) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<ui32>::max(), 1u, std::numeric_limits<ui32>::max())); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<ui32>::max(), 1u, 0u)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<ui32>::max(), 1u, 100u)); +} + +Y_UNIT_TEST(IsBelongToInterval_LeftDirection_Normal) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, 5, 5)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, 5, 0)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, 5, -100)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, 5, 6)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, 5, 10)); +} + +Y_UNIT_TEST(IsBelongToInterval_LeftDirection_Underflow_Unsigned) { + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 5u, 10u, 0u)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 5u, 10u, 1u)); +} + +Y_UNIT_TEST(IsBelongToInterval_LeftDirection_Underflow_Signed) { + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, std::numeric_limits<i32>::min(), 1, std::numeric_limits<i32>::min())); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, std::numeric_limits<i32>::min(), 1, 0)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, std::numeric_limits<i32>::min(), 1, std::numeric_limits<i32>::min() + 1)); +} + +Y_UNIT_TEST(IsBelongToInterval_RightDirection_NegativeDelta_Signed) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, -5, 5)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, -5, 0)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10, -5, 6)); +} + +Y_UNIT_TEST(IsBelongToInterval_LeftDirection_NegativeDelta_Signed) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, -5, 15)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, -5, 10)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10, -5, 16)); +} + +Y_UNIT_TEST(IsBelongToInterval_BoundaryValues_ui64) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<ui64>::max(), ui64(0), std::numeric_limits<ui64>::max())); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<ui64>::max(), ui64(0), ui64(0))); + + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 0ul, 0ul, 0ul)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 0ul, 0ul, 1ul)); +} + +Y_UNIT_TEST(IsBelongToInterval_BoundaryValues_i64) { + UNIT_ASSERT((IsBelongToInterval<EInfBoundary::Left, i64>(EDirection::Following, std::numeric_limits<i64>::max(), 0ll, std::numeric_limits<i64>::max()))); + UNIT_ASSERT((IsBelongToInterval<EInfBoundary::Left, i64>(EDirection::Following, std::numeric_limits<i64>::max(), 0ll, 0ll))); + UNIT_ASSERT((IsBelongToInterval<EInfBoundary::Left, i64>(EDirection::Following, std::numeric_limits<i64>::max(), 0ll, std::numeric_limits<i64>::min()))); + + UNIT_ASSERT((IsBelongToInterval<EInfBoundary::Left, i64>(EDirection::Preceding, std::numeric_limits<i64>::min(), 0ll, std::numeric_limits<i64>::min()))); + UNIT_ASSERT((!IsBelongToInterval<EInfBoundary::Left, i64>(EDirection::Preceding, std::numeric_limits<i64>::min(), 0ll, std::numeric_limits<i64>::min() + 1))); +} + +Y_UNIT_TEST(IsBelongToInterval_Float) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0f, 5.0f, 15.0f)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0f, 5.0f, 10.0f)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0f, 5.0f, 15.1f)); + + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0f, 5.0f, 5.0f)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0f, 5.0f, 0.0f)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0f, 5.0f, 5.1f)); +} + +Y_UNIT_TEST(IsBelongToInterval_Double) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0, 5.0, 15.0)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0, 5.0, 10.0)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, 10.0, 5.0, 15.1)); + + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0, 5.0, 5.0)); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0, 5.0, 0.0)); + UNIT_ASSERT(!IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, 10.0, 5.0, 5.1)); +} + +Y_UNIT_TEST(IsBelongToInterval_Float_BoundaryValues) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<float>::max(), 0.0f, std::numeric_limits<float>::max())); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<float>::max(), 0.0f, 0.0f)); + + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, std::numeric_limits<float>::lowest(), 0.0f, std::numeric_limits<float>::lowest())); +} + +Y_UNIT_TEST(IsBelongToInterval_Double_BoundaryValues) { + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<double>::max(), 0.0, std::numeric_limits<double>::max())); + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Following, std::numeric_limits<double>::max(), 0.0, 0.0)); + + UNIT_ASSERT(IsBelongToInterval<EInfBoundary::Left>(EDirection::Preceding, std::numeric_limits<double>::lowest(), 0.0, std::numeric_limits<double>::lowest())); +} + +} // Y_UNIT_TEST_SUITE(SaturatedMathTest) + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/ut/ya.make b/yql/essentials/minikql/ut/ya.make index fa0f14f8d0e..b79fd2bf6d8 100644 --- a/yql/essentials/minikql/ut/ya.make +++ b/yql/essentials/minikql/ut/ya.make @@ -20,11 +20,16 @@ SRCS( mkql_node_printer_ut.cpp mkql_node_ut.cpp mkql_opt_literal_ut.cpp + mkql_saturated_math_ut.cpp mkql_stats_registry_ut.cpp mkql_string_util_ut.cpp mkql_type_builder_ut.cpp mkql_type_ops_ut.cpp mkql_unboxed_value_stream_ut.cpp + mkql_core_win_frames_collector_ut_part1.cpp + mkql_core_win_frames_collector_ut_part2.cpp + mkql_core_win_frames_collector_ut_part3.cpp + mkql_core_win_frames_collector_ut_part4.cpp pack_num_ut.cpp watermark_tracker_ut.cpp ) diff --git a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp index 1e169fdc586..3444d0d93c8 100644 --- a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp +++ b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp @@ -576,6 +576,7 @@ TMkqlCommonCallableCompiler::TShared::TShared() { {"ListFromRange", &TProgramBuilder::ListFromRange}, {"PreserveStream", &TProgramBuilder::PreserveStream}, + {"WinFramesCollector", &TProgramBuilder::WinFramesCollector}, {"BlockIf", &TProgramBuilder::BlockIf}, }); @@ -762,6 +763,23 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return ctx.ProgramBuilder.WideLastCombiner(flow, keyExtractor, init, update, finish); }); + AddCallable("WinFrame", [](const TExprNode& node, TMkqlBuildContext& ctx) { + auto queue = MkqlBuildExpr(*node.Child(0), ctx); + auto handle = MkqlBuildExpr(*node.Child(1), ctx); + auto isIncremental = MkqlBuildExpr(*node.Child(2), ctx); + auto isRange = MkqlBuildExpr(*node.Child(3), ctx); + auto isSingleElement = MkqlBuildExpr(*node.Child(4), ctx); + const auto& args = GetArgumentsFrom<5U>(node, ctx); + const auto returnType = ctx.BuildType(node, *node.GetTypeAnn()); + return ctx.ProgramBuilder.WinFrame(queue, + handle, + isIncremental, + isRange, + isSingleElement, + args, + returnType); + }); + AddCallable("WideChopper", [](const TExprNode& node, TMkqlBuildContext& ctx) { const auto flow = MkqlBuildExpr(node.Head(), ctx); diff --git a/yql/essentials/providers/config/yql_config_provider.cpp b/yql/essentials/providers/config/yql_config_provider.cpp index d556752e5f8..1757a47d5d3 100644 --- a/yql/essentials/providers/config/yql_config_provider.cpp +++ b/yql/essentials/providers/config/yql_config_provider.cpp @@ -759,6 +759,12 @@ private: return false; } Types_.DiscoveryMode = true; + } else if (name == "WindowNewPipeline" || name == "DisableWindowNewPipeline") { + if (args.size() != 0) { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); + return false; + } + Types_.WindowNewPipeline = (name == "WindowNewPipeline"); } else if (name == "EnableSystemColumns") { if (args.size() != 0) { ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index 5ff1d3d8594..5e5359a598c 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -77,6 +77,7 @@ THashMap<TStringBuf, TPragmaField> CTX_PRAGMA_FIELDS = { {"OptimizeSimpleILIKE", &TContext::OptimizeSimpleIlike}, {"DebugPositions", &TContext::DebugPositions}, {"ExceptIntersectBefore202503", &TContext::ExceptIntersectBefore202503}, + {"WindowNewPipeline", &TContext::WindowNewPipeline}, }; typedef TMaybe<bool> TContext::*TPragmaMaybeField; diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h index 158d9e891eb..e360b369ac3 100644 --- a/yql/essentials/sql/v1/context.h +++ b/yql/essentials/sql/v1/context.h @@ -410,6 +410,7 @@ public: bool DisableLegacyNotNull = false; bool DebugPositions = false; bool StrictWarningAsError = false; + bool WindowNewPipeline = false; TMaybe<bool> DirectRowDependsOn; TVector<size_t> ForAllStatementsParts; TMaybe<TString> Engine; diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp index 2896cac686c..d96d6b2a4bc 100644 --- a/yql/essentials/sql/v1/query.cpp +++ b/yql/essentials/sql/v1/query.cpp @@ -3496,6 +3496,11 @@ public: BuildQuotedAtom(Pos_, "DebugPositions")))); } + if (ctx.WindowNewPipeline) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos_, "WindowNewPipeline")))); + } + if (ctx.DirectRowDependsOn.Defined()) { const TString pragmaName = *ctx.DirectRowDependsOn ? "DirectRowDependsOn" : "DisableDirectRowDependsOn"; currentWorlds->Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos_, pragmaName)))); diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp index cb3c5ea8dbf..5acc2069e35 100644 --- a/yql/essentials/sql/v1/source.cpp +++ b/yql/essentials/sql/v1/source.cpp @@ -745,7 +745,7 @@ TNodePtr BuildFrameNode(const TFrameBound& frame, EFrameType frameType) { return node; } -TNodePtr ISource::BuildWindowFrame(const TFrameSpecification& spec, bool isCompact) { +TNodePtr ISource::BuildWindowFrame(TContext& ctx, const TFrameSpecification& spec, bool isCompact, TNodePtr sortSpec) { YQL_ENSURE(spec.FrameExclusion == FrameExclNone); YQL_ENSURE(spec.FrameBegin); YQL_ENSURE(spec.FrameEnd); @@ -755,8 +755,12 @@ TNodePtr ISource::BuildWindowFrame(const TFrameSpecification& spec, bool isCompa auto begin = Q(Y(Q("begin"), frameBeginNode)); auto end = Q(Y(Q("end"), frameEndNode)); - - return isCompact ? Q(Y(begin, end, Q(Y(Q("compact"))))) : Q(Y(begin, end)); + auto sortSpecNode = Q(Y(Q("sortSpec"), sortSpec)); + if (ctx.WindowNewPipeline) { + return isCompact ? Q(Y(begin, end, Q(Y(Q("compact"))), sortSpecNode)) : Q(Y(begin, end, sortSpecNode)); + } else { + return isCompact ? Q(Y(begin, end, Q(Y(Q("compact"))))) : Q(Y(begin, end)); + } } class TSessionWindowTraits final: public TCallNode { @@ -836,7 +840,9 @@ TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label) { break; } YQL_ENSURE(frameType); - auto callOnFrame = Y(frameType, BuildWindowFrame(*spec->Frame, spec->IsCompact)); + auto sortSpec = spec->OrderBy.empty() ? Y("Void") : BuildSortSpec(spec->OrderBy, useLabel, true, false); + + auto callOnFrame = Y(frameType, BuildWindowFrame(ctx, *spec->Frame, spec->IsCompact, sortSpec)); for (auto& agg : aggs) { auto winTraits = agg->WindowTraits(listType, ctx); callOnFrame = L(callOnFrame, winTraits); @@ -854,7 +860,6 @@ TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label) { } } - auto sortSpec = spec->OrderBy.empty() ? Y("Void") : BuildSortSpec(spec->OrderBy, useLabel, true, false); if (spec->Session) { TString label = spec->Session->GetLabel(); YQL_ENSURE(label); diff --git a/yql/essentials/sql/v1/source.h b/yql/essentials/sql/v1/source.h index 82386100e0e..36f62d86f70 100644 --- a/yql/essentials/sql/v1/source.h +++ b/yql/essentials/sql/v1/source.h @@ -126,7 +126,7 @@ protected: TVector<TNodePtr>& Expressions(EExprSeat exprSeat); TNodePtr AliasOrColumn(const TNodePtr& node, bool withSource); - TNodePtr BuildWindowFrame(const TFrameSpecification& spec, bool isCompact); + TNodePtr BuildWindowFrame(TContext& ctx, const TFrameSpecification& spec, bool isCompact, TNodePtr sortSpec); THashSet<TString> ExprAliases_; THashSet<TString> FlattenByAliases_; diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp index 148a05dccd2..d3d236fdf46 100644 --- a/yql/essentials/sql/v1/sql_query.cpp +++ b/yql/essentials/sql/v1/sql_query.cpp @@ -3720,6 +3720,7 @@ THashMap<TString, TPragmaDescr> PragmaDescrs{ TABLE_ELEM("UseTablePrefixForEach", PragmaUseTablePrefixForEach, true), PAIRED_TABLE_ELEM("SimpleColumns", SimpleColumns), PAIRED_TABLE_ELEM("DebugPositions", DebugPositions), + PAIRED_TABLE_ELEM("WindowNewPipeline", WindowNewPipeline), PAIRED_TABLE_ELEM("CoalesceJoinKeysOnQualifiedAll", CoalesceJoinKeysOnQualifiedAll), PAIRED_TABLE_ELEM("PullUpFlatMapOverJoin", PragmaPullUpFlatMapOverJoin), PAIRED_TABLE_ELEM("FilterPushdownOverJoinOptionalSide", FilterPushdownOverJoinOptionalSide), diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index daeba65f049..57c9accf0b7 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -1978,6 +1978,62 @@ "uri": "https://{canondata_backend}/1777230/9825a8684763af6f5aebf2f2145914be57ea6969/resource.tar.gz#test.test_window-win_func_mutable_resource-default.txt-Results_/results.txt" } ], + "test.test[window-win_range_datetime--Debug]": [ + { + "checksum": "a097b1bff7d23c34b28e0871c5a00d37", + "size": 1637, + "uri": "https://{canondata_backend}/1937367/f34c72b1cf97e78038e516c0ce86b7993e721cef/resource.tar.gz#test.test_window-win_range_datetime--Debug_/opt.yql" + } + ], + "test.test[window-win_range_datetime--Results]": [ + { + "checksum": "787d452ab683e65eee8e6bce811c35af", + "size": 987, + "uri": "https://{canondata_backend}/1937492/b5dd38b5f8f339a35fb473f566f1eaa247e2a363/resource.tar.gz#test.test_window-win_range_datetime--Results_/results.txt" + } + ], + "test.test[window-win_range_two_columns_success--Debug]": [ + { + "checksum": "681a330d11e8d8cac9db241c06bceb54", + "size": 1552, + "uri": "https://{canondata_backend}/1937367/f34c72b1cf97e78038e516c0ce86b7993e721cef/resource.tar.gz#test.test_window-win_range_two_columns_success--Debug_/opt.yql" + } + ], + "test.test[window-win_range_two_columns_success--Results]": [ + { + "checksum": "b3443155d73266ef1a1265bc1630e8d3", + "size": 914, + "uri": "https://{canondata_backend}/1937492/b5dd38b5f8f339a35fb473f566f1eaa247e2a363/resource.tar.gz#test.test_window-win_range_two_columns_success--Results_/results.txt" + } + ], + "test.test[window-win_range_uint16--Debug]": [ + { + "checksum": "c33a5a66c10f38ccbd0d3f4116a8afa8", + "size": 2169, + "uri": "https://{canondata_backend}/1937367/f34c72b1cf97e78038e516c0ce86b7993e721cef/resource.tar.gz#test.test_window-win_range_uint16--Debug_/opt.yql" + } + ], + "test.test[window-win_range_uint16--Results]": [ + { + "checksum": "ae4149d74cebf9c7610a4213050da604", + "size": 1703, + "uri": "https://{canondata_backend}/1937492/b5dd38b5f8f339a35fb473f566f1eaa247e2a363/resource.tar.gz#test.test_window-win_range_uint16--Results_/results.txt" + } + ], + "test.test[window-win_range_uint64--Debug]": [ + { + "checksum": "f63634271a387f2c84631c637dda9793", + "size": 2054, + "uri": "https://{canondata_backend}/1937367/f34c72b1cf97e78038e516c0ce86b7993e721cef/resource.tar.gz#test.test_window-win_range_uint64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_uint64--Results]": [ + { + "checksum": "895f0c71af1fb5043e3c1aca34e25e5e", + "size": 1715, + "uri": "https://{canondata_backend}/1937492/b5dd38b5f8f339a35fb473f566f1eaa247e2a363/resource.tar.gz#test.test_window-win_range_uint64--Results_/results.txt" + } + ], "test.test[window-yql-14179-default.txt-Debug]": [ { "checksum": "ff4b3d459a8abbe21625fc36854cff0a", diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json index 7e2961d9cc1..3e77c48e2fc 100644 --- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json @@ -1853,6 +1853,34 @@ "uri": "https://{canondata_backend}/1871102/ab8415873dd63e6dbefac4800c4fa4b5a54faba9/resource.tar.gz#test.test_window-win_peephole_double_usage-default.txt-Results_/results.txt" } ], + "test.test[window-win_range_date--Debug]": [ + { + "checksum": "0c91c5e23899806a7f2ad12f77da1bb6", + "size": 1586, + "uri": "https://{canondata_backend}/1599023/6ec8ac6b6f069ce55f95e839456b095494864ede/resource.tar.gz#test.test_window-win_range_date--Debug_/opt.yql" + } + ], + "test.test[window-win_range_date--Results]": [ + { + "checksum": "2e560cda041c0b1b0f04d0c104e379c9", + "size": 1060, + "uri": "https://{canondata_backend}/1900335/559b16c25ae1b1143411eb2a157b3820e72a378d/resource.tar.gz#test.test_window-win_range_date--Results_/results.txt" + } + ], + "test.test[window-win_range_no_sort_current_row--Debug]": [ + { + "checksum": "c8f4dee12e5befd7af519825d4f5571d", + "size": 1045, + "uri": "https://{canondata_backend}/1599023/6ec8ac6b6f069ce55f95e839456b095494864ede/resource.tar.gz#test.test_window-win_range_no_sort_current_row--Debug_/opt.yql" + } + ], + "test.test[window-win_range_no_sort_current_row--Results]": [ + { + "checksum": "52ce419458071c00982a662cb28a9d0a", + "size": 987, + "uri": "https://{canondata_backend}/1900335/559b16c25ae1b1143411eb2a157b3820e72a378d/resource.tar.gz#test.test_window-win_range_no_sort_current_row--Results_/results.txt" + } + ], "test.test[window-yql-14277-default.txt-Debug]": [ { "checksum": "f4c3c9898c84a6d7fe78ff589ea46fd5", diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json index 4a54c89b621..3cf7c5a5101 100644 --- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json @@ -1798,5 +1798,36 @@ "size": 924, "uri": "https://{canondata_backend}/1777230/0109601db9d14d0697bcd3bd39c4b6bd3fc9ce55/resource.tar.gz#test.test_window-win_func_cume_dist_ansi-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_always_empty--Debug]": [ + { + "checksum": "eda332ac0b1051f885e72c75e8b9cc8f", + "size": 2025, + "uri": "https://{canondata_backend}/1781765/7fcaa8c3cfd45c988d8c601c510553b10bd721cd/resource.tar.gz#test.test_window-win_range_always_empty--Debug_/opt.yql" + } + ], + "test.test[window-win_range_always_empty--Results]": [ + { + "checksum": "f3aaf06891c2a4c22ab670e89c170a4a", + "size": 1341, + "uri": "https://{canondata_backend}/1942278/bdb2b9c0113be9e14ec61c9f26a445e3ecc397de/resource.tar.gz#test.test_window-win_range_always_empty--Results_/results.txt" + }, + { + "uri": "file://test.test_window-win_range_always_empty--Results_/extracted" + } + ], + "test.test[window-win_range_interval64--Debug]": [ + { + "checksum": "7aed367b5940b4f3765e8f14cdf3d4c0", + "size": 1911, + "uri": "https://{canondata_backend}/1781765/7fcaa8c3cfd45c988d8c601c510553b10bd721cd/resource.tar.gz#test.test_window-win_range_interval64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_interval64--Results]": [ + { + "checksum": "f725705a8e0f1cd59215d9cf426723af", + "size": 1396, + "uri": "https://{canondata_backend}/1942278/bdb2b9c0113be9e14ec61c9f26a445e3ecc397de/resource.tar.gz#test.test_window-win_range_interval64--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/test.test_window-win_range_always_empty--Results_/extracted b/yql/essentials/tests/sql/minirun/part2/canondata/test.test_window-win_range_always_empty--Results_/extracted new file mode 100644 index 00000000000..09fcd88f5d3 --- /dev/null +++ b/yql/essentials/tests/sql/minirun/part2/canondata/test.test_window-win_range_always_empty--Results_/extracted @@ -0,0 +1,11 @@ +<tmp_path>/program.sql:<main>: Warning: Type annotation + + <tmp_path>/program.sql:<main>:35:1: Warning: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At tuple, At function: SqlProjectItem, At lambda + SELECT + ^ + <tmp_path>/program.sql:<main>:36:5: Warning: At function: EnsureWarn + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + ^ + <tmp_path>/program.sql:<main>:36:12: Warning: Consider using non-singular type as first argument of Ensure + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + ^
\ No newline at end of file diff --git a/yql/essentials/tests/sql/minirun/part3/canondata/result.json b/yql/essentials/tests/sql/minirun/part3/canondata/result.json index a49befd327f..7b06929c098 100644 --- a/yql/essentials/tests/sql/minirun/part3/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part3/canondata/result.json @@ -1521,5 +1521,47 @@ "size": 2434, "uri": "https://{canondata_backend}/1777230/5e7f4f59451d2786c03123facb2a9cec2ff42353/resource.tar.gz#test.test_window-win_group_peephole-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_float--Debug]": [ + { + "checksum": "0d2d82258bb88cfed82d2eca1d36aae8", + "size": 2013, + "uri": "https://{canondata_backend}/1942415/7b6d0aa3a54d3e4f890b6754f1cb4e6217c888d0/resource.tar.gz#test.test_window-win_range_float--Debug_/opt.yql" + } + ], + "test.test[window-win_range_float--Results]": [ + { + "checksum": "05574cedd3319b24bf77098bae8204a2", + "size": 1403, + "uri": "https://{canondata_backend}/1889210/66a223cf4e14995d44a609d001bc322262820d96/resource.tar.gz#test.test_window-win_range_float--Results_/results.txt" + } + ], + "test.test[window-win_range_string_success--Debug]": [ + { + "checksum": "33e46a0356f2a7955ecfd866f81afbe8", + "size": 1477, + "uri": "https://{canondata_backend}/1942415/7b6d0aa3a54d3e4f890b6754f1cb4e6217c888d0/resource.tar.gz#test.test_window-win_range_string_success--Debug_/opt.yql" + } + ], + "test.test[window-win_range_string_success--Results]": [ + { + "checksum": "d96ce6dec2fbc3457fcb0833ff24c873", + "size": 1060, + "uri": "https://{canondata_backend}/1889210/66a223cf4e14995d44a609d001bc322262820d96/resource.tar.gz#test.test_window-win_range_string_success--Results_/results.txt" + } + ], + "test.test[window-win_range_tzdatetime--Debug]": [ + { + "checksum": "942c5500cc850d33c016847ce06110fd", + "size": 1827, + "uri": "https://{canondata_backend}/1942415/7b6d0aa3a54d3e4f890b6754f1cb4e6217c888d0/resource.tar.gz#test.test_window-win_range_tzdatetime--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tzdatetime--Results]": [ + { + "checksum": "e419ee4ba6c42b590fdccde3949ab61c", + "size": 1206, + "uri": "https://{canondata_backend}/1889210/66a223cf4e14995d44a609d001bc322262820d96/resource.tar.gz#test.test_window-win_range_tzdatetime--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json index 71b83380fb2..17fc54855c2 100644 --- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json @@ -2079,6 +2079,62 @@ "uri": "https://{canondata_backend}/1899731/28742e9a38b138bb0af7f4cb8effc6378a2e6625/resource.tar.gz#test.test_union_all-union_all_incompatible-default.txt-Results_/results.txt" } ], + "test.test[window-win_range_tzdate32--Debug]": [ + { + "checksum": "85e2fc3beff38c1d9f149e4307e4fa95", + "size": 1564, + "uri": "https://{canondata_backend}/1599023/d60a483c8471811c129bd7bed1b7f6d295d581e4/resource.tar.gz#test.test_window-win_range_tzdate32--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tzdate32--Results]": [ + { + "checksum": "787d452ab683e65eee8e6bce811c35af", + "size": 987, + "uri": "https://{canondata_backend}/1942278/7e85b51c46267e1bf6df91e2940f54ad3bb724d5/resource.tar.gz#test.test_window-win_range_tzdate32--Results_/results.txt" + } + ], + "test.test[window-win_range_uint32--Debug]": [ + { + "checksum": "41be6d47aeeb3189777ccc3b8357441f", + "size": 2110, + "uri": "https://{canondata_backend}/1599023/d60a483c8471811c129bd7bed1b7f6d295d581e4/resource.tar.gz#test.test_window-win_range_uint32--Debug_/opt.yql" + } + ], + "test.test[window-win_range_uint32--Results]": [ + { + "checksum": "723905774c33df624731afe4a7dc2d2f", + "size": 1412, + "uri": "https://{canondata_backend}/1942278/7e85b51c46267e1bf6df91e2940f54ad3bb724d5/resource.tar.gz#test.test_window-win_range_uint32--Results_/results.txt" + } + ], + "test.test[window-win_range_uint8--Debug]": [ + { + "checksum": "c83614de491b493f2623e8621604b478", + "size": 2131, + "uri": "https://{canondata_backend}/1599023/d60a483c8471811c129bd7bed1b7f6d295d581e4/resource.tar.gz#test.test_window-win_range_uint8--Debug_/opt.yql" + } + ], + "test.test[window-win_range_uint8--Results]": [ + { + "checksum": "a27ca8b8e6e9f7fe7b578e4bcbbc19ae", + "size": 1698, + "uri": "https://{canondata_backend}/1942278/7e85b51c46267e1bf6df91e2940f54ad3bb724d5/resource.tar.gz#test.test_window-win_range_uint8--Results_/results.txt" + } + ], + "test.test[window-win_range_unbounded_non_compact--Debug]": [ + { + "checksum": "c29a02bb9b6b2016616dca819474e3da", + "size": 1971, + "uri": "https://{canondata_backend}/1599023/d60a483c8471811c129bd7bed1b7f6d295d581e4/resource.tar.gz#test.test_window-win_range_unbounded_non_compact--Debug_/opt.yql" + } + ], + "test.test[window-win_range_unbounded_non_compact--Results]": [ + { + "checksum": "fd2a1086451c4b03d885b341c514e553", + "size": 1407, + "uri": "https://{canondata_backend}/1942278/7e85b51c46267e1bf6df91e2940f54ad3bb724d5/resource.tar.gz#test.test_window-win_range_unbounded_non_compact--Results_/results.txt" + } + ], "test.test[window-win_with_as_table-default.txt-Debug]": [ { "checksum": "7c2709c5c8d1842e45523f18744b56f6", diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json index 0ef0c06f729..506f7cd1389 100644 --- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json @@ -2183,5 +2183,47 @@ "size": 4954, "uri": "https://{canondata_backend}/1871102/08c95297ac754f0fe3fecfae40e6440beb75dd66/resource.tar.gz#test.test_window-udaf_with_def_value-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_date32--Debug]": [ + { + "checksum": "1d6c5d4ff922b3e50052cc5b3d0096bd", + "size": 1492, + "uri": "https://{canondata_backend}/1937367/d5c6f47da27e7bb1b38500104c909a1b0c5047e0/resource.tar.gz#test.test_window-win_range_date32--Debug_/opt.yql" + } + ], + "test.test[window-win_range_date32--Results]": [ + { + "checksum": "4ba8418e9e6a43f1c4208296d39eed41", + "size": 987, + "uri": "https://{canondata_backend}/1942173/6473d32519b28df787f5dcefdfa39d698f7ce5c8/resource.tar.gz#test.test_window-win_range_date32--Results_/results.txt" + } + ], + "test.test[window-win_range_interval--Debug]": [ + { + "checksum": "7512fa519cf26c1700d442325ad3d90a", + "size": 1516, + "uri": "https://{canondata_backend}/1937367/d5c6f47da27e7bb1b38500104c909a1b0c5047e0/resource.tar.gz#test.test_window-win_range_interval--Debug_/opt.yql" + } + ], + "test.test[window-win_range_interval--Results]": [ + { + "checksum": "d8b1c92636ad3bf40250f918d8988ed7", + "size": 987, + "uri": "https://{canondata_backend}/1942173/6473d32519b28df787f5dcefdfa39d698f7ce5c8/resource.tar.gz#test.test_window-win_range_interval--Results_/results.txt" + } + ], + "test.test[window-win_range_timestamp--Debug]": [ + { + "checksum": "ccc84c02f6174531092668f6591c4eb3", + "size": 1525, + "uri": "https://{canondata_backend}/1937367/d5c6f47da27e7bb1b38500104c909a1b0c5047e0/resource.tar.gz#test.test_window-win_range_timestamp--Debug_/opt.yql" + } + ], + "test.test[window-win_range_timestamp--Results]": [ + { + "checksum": "4ba8418e9e6a43f1c4208296d39eed41", + "size": 987, + "uri": "https://{canondata_backend}/1942173/6473d32519b28df787f5dcefdfa39d698f7ce5c8/resource.tar.gz#test.test_window-win_range_timestamp--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/minirun/part6/canondata/result.json b/yql/essentials/tests/sql/minirun/part6/canondata/result.json index 0c4e4b83982..9804f808014 100644 --- a/yql/essentials/tests/sql/minirun/part6/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part6/canondata/result.json @@ -1860,5 +1860,61 @@ "size": 2663, "uri": "https://{canondata_backend}/1817427/83903caba057ddc7f2c6584714c26b7c6a60a3ab/resource.tar.gz#test.test_window-rank_nulls_legacy-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_datetime64--Debug]": [ + { + "checksum": "632c988ab8ebca9cba710c831e302b4a", + "size": 1530, + "uri": "https://{canondata_backend}/1599023/c6beed05a0654750d6dd35f156eb5e92e272fef2/resource.tar.gz#test.test_window-win_range_datetime64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_datetime64--Results]": [ + { + "checksum": "d8b1c92636ad3bf40250f918d8988ed7", + "size": 987, + "uri": "https://{canondata_backend}/1924537/09925e74ce5b206f610213346c641877087da325/resource.tar.gz#test.test_window-win_range_datetime64--Results_/results.txt" + } + ], + "test.test[window-win_range_int16--Debug]": [ + { + "checksum": "4f8babcb7143b6e9a7bd32b9ae5e5e9b", + "size": 2156, + "uri": "https://{canondata_backend}/1599023/c6beed05a0654750d6dd35f156eb5e92e272fef2/resource.tar.gz#test.test_window-win_range_int16--Debug_/opt.yql" + } + ], + "test.test[window-win_range_int16--Results]": [ + { + "checksum": "31be6a8314d9c22a8575e7936482336d", + "size": 1703, + "uri": "https://{canondata_backend}/1924537/09925e74ce5b206f610213346c641877087da325/resource.tar.gz#test.test_window-win_range_int16--Results_/results.txt" + } + ], + "test.test[window-win_range_int32--Debug]": [ + { + "checksum": "4d6a62c6bf21c2593b4879f184f1f5a1", + "size": 2939, + "uri": "https://{canondata_backend}/1599023/c6beed05a0654750d6dd35f156eb5e92e272fef2/resource.tar.gz#test.test_window-win_range_int32--Debug_/opt.yql" + } + ], + "test.test[window-win_range_int32--Results]": [ + { + "checksum": "99fd822e9f9fa847c80ca0ce300bc78b", + "size": 2781, + "uri": "https://{canondata_backend}/1924537/09925e74ce5b206f610213346c641877087da325/resource.tar.gz#test.test_window-win_range_int32--Results_/results.txt" + } + ], + "test.test[window-win_range_int64--Debug]": [ + { + "checksum": "de9b719efb4d6442fe5e0d7751ae18a0", + "size": 2038, + "uri": "https://{canondata_backend}/1599023/c6beed05a0654750d6dd35f156eb5e92e272fef2/resource.tar.gz#test.test_window-win_range_int64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_int64--Results]": [ + { + "checksum": "c7dcff5bc39d0d5de7dff008a50bcfc4", + "size": 1423, + "uri": "https://{canondata_backend}/1924537/09925e74ce5b206f610213346c641877087da325/resource.tar.gz#test.test_window-win_range_int64--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json index 40644dbb6ac..0078139c334 100644 --- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json @@ -1656,6 +1656,51 @@ "uri": "https://{canondata_backend}/1817427/cd7fe4c1c700931e8c564489ae0d616c780dd82b/resource.tar.gz#test.test_window-win_func_percent_rank-default.txt-Results_/results.txt" } ], + "test.test[window-win_range_double--Debug]": [ + { + "checksum": "d21780ec012c37ad4ebc77d4197f051c", + "size": 2598, + "uri": "https://{canondata_backend}/1942415/145bc86597611c9d3d2463dc11b7685e37f15121/resource.tar.gz#test.test_window-win_range_double--Debug_/opt.yql" + } + ], + "test.test[window-win_range_double--Results]": [ + { + "checksum": "4a684f17188cd2d3b302d7d2398be4d2", + "size": 2094, + "uri": "https://{canondata_backend}/1784826/43bd381e92058647bc0e86e95ac2ba41fdde5f73/resource.tar.gz#test.test_window-win_range_double--Results_/results.txt" + }, + { + "uri": "file://test.test_window-win_range_double--Results_/extracted" + } + ], + "test.test[window-win_range_int8--Debug]": [ + { + "checksum": "65f986fdaae957c8995aadbffa996d49", + "size": 2865, + "uri": "https://{canondata_backend}/1942415/145bc86597611c9d3d2463dc11b7685e37f15121/resource.tar.gz#test.test_window-win_range_int8--Debug_/opt.yql" + } + ], + "test.test[window-win_range_int8--Results]": [ + { + "checksum": "2e3360cb27a204b4646f88f3a9e2f358", + "size": 2701, + "uri": "https://{canondata_backend}/1924537/bd5e62bd933224323303716cb90137141d0e39ad/resource.tar.gz#test.test_window-win_range_int8--Results_/results.txt" + } + ], + "test.test[window-win_range_tztimestamp64--Debug]": [ + { + "checksum": "c2752a85fb26b5bdc84fc14ee1cb0069", + "size": 1541, + "uri": "https://{canondata_backend}/1942415/145bc86597611c9d3d2463dc11b7685e37f15121/resource.tar.gz#test.test_window-win_range_tztimestamp64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tztimestamp64--Results]": [ + { + "checksum": "a4e1edc209e5e7b458a3c4c1fedbe9df", + "size": 987, + "uri": "https://{canondata_backend}/1924537/bd5e62bd933224323303716cb90137141d0e39ad/resource.tar.gz#test.test_window-win_range_tztimestamp64--Results_/results.txt" + } + ], "test.test[window-yql-19709-default.txt-Debug]": [ { "checksum": "b4f5b1907698dfd26478f9b7345f7794", diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/test.test_window-win_range_double--Results_/extracted b/yql/essentials/tests/sql/minirun/part7/canondata/test.test_window-win_range_double--Results_/extracted new file mode 100644 index 00000000000..36cd19deff4 --- /dev/null +++ b/yql/essentials/tests/sql/minirun/part7/canondata/test.test_window-win_range_double--Results_/extracted @@ -0,0 +1,5 @@ +<tmp_path>/program.sql:<main>: Warning: Parse Sql + + <tmp_path>/program.sql:<main>:39:1: Warning: Symbol $str is not used + $str = ($x) -> { + ^
\ No newline at end of file diff --git a/yql/essentials/tests/sql/minirun/part8/canondata/result.json b/yql/essentials/tests/sql/minirun/part8/canondata/result.json index 65d538dc199..730eedead1e 100644 --- a/yql/essentials/tests/sql/minirun/part8/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part8/canondata/result.json @@ -1774,5 +1774,47 @@ "size": 14988, "uri": "https://{canondata_backend}/1937027/768c048f063cb934e42138f86a1cf134863bee57/resource.tar.gz#test.test_window-fuse_with_rename_session_start_first-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_timestamp64--Debug]": [ + { + "checksum": "ad308cf67de4aa7b6375363cd467950f", + "size": 1546, + "uri": "https://{canondata_backend}/1903280/07355a9d83b3f401ae28f4fdf7530075a1f5522a/resource.tar.gz#test.test_window-win_range_timestamp64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_timestamp64--Results]": [ + { + "checksum": "a575cbd7f19992ab67f1cfade2c77758", + "size": 987, + "uri": "https://{canondata_backend}/1942525/99c15fa016128c7fb56eed5e8923d2dab837b3cd/resource.tar.gz#test.test_window-win_range_timestamp64--Results_/results.txt" + } + ], + "test.test[window-win_range_tzdatetime64--Debug]": [ + { + "checksum": "dd86e84592a6a8aa0f24179147380168", + "size": 1783, + "uri": "https://{canondata_backend}/1903280/07355a9d83b3f401ae28f4fdf7530075a1f5522a/resource.tar.gz#test.test_window-win_range_tzdatetime64--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tzdatetime64--Results]": [ + { + "checksum": "97ddb669e7c762b87a2e058349039a60", + "size": 1206, + "uri": "https://{canondata_backend}/1942525/99c15fa016128c7fb56eed5e8923d2dab837b3cd/resource.tar.gz#test.test_window-win_range_tzdatetime64--Results_/results.txt" + } + ], + "test.test[window-win_range_tztimestamp--Debug]": [ + { + "checksum": "e2daa8df0186eef70f55fa9d118d649c", + "size": 1548, + "uri": "https://{canondata_backend}/1903280/07355a9d83b3f401ae28f4fdf7530075a1f5522a/resource.tar.gz#test.test_window-win_range_tztimestamp--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tztimestamp--Results]": [ + { + "checksum": "4e65f1b76331d053068bf8a6fc242084", + "size": 914, + "uri": "https://{canondata_backend}/1942525/99c15fa016128c7fb56eed5e8923d2dab837b3cd/resource.tar.gz#test.test_window-win_range_tztimestamp--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/minirun/part9/canondata/result.json b/yql/essentials/tests/sql/minirun/part9/canondata/result.json index 32c417c69d5..31f91031c18 100644 --- a/yql/essentials/tests/sql/minirun/part9/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part9/canondata/result.json @@ -1873,5 +1873,47 @@ "size": 983, "uri": "https://{canondata_backend}/1871102/116385eb6fe562fc7bfbaa4f34a104024a4dae99/resource.tar.gz#test.test_window-win_over_joined-default.txt-Results_/results.txt" } + ], + "test.test[window-win_range_int64_order_by_multiply--Debug]": [ + { + "checksum": "4916124a2a600b1d21a7544a63d2b7d3", + "size": 1853, + "uri": "https://{canondata_backend}/1942415/abc8ff0bc7ce1b4382b676962a2321d49a930356/resource.tar.gz#test.test_window-win_range_int64_order_by_multiply--Debug_/opt.yql" + } + ], + "test.test[window-win_range_int64_order_by_multiply--Results]": [ + { + "checksum": "1b703b12cc68cead213448c9d445199a", + "size": 1195, + "uri": "https://{canondata_backend}/995452/f47564f6c57d297ad9007057d5687a16a72d1529/resource.tar.gz#test.test_window-win_range_int64_order_by_multiply--Results_/results.txt" + } + ], + "test.test[window-win_range_tzdate--Debug]": [ + { + "checksum": "e0a33e49d4226d5826f2ea159b72b043", + "size": 1506, + "uri": "https://{canondata_backend}/1942415/abc8ff0bc7ce1b4382b676962a2321d49a930356/resource.tar.gz#test.test_window-win_range_tzdate--Debug_/opt.yql" + } + ], + "test.test[window-win_range_tzdate--Results]": [ + { + "checksum": "a575cbd7f19992ab67f1cfade2c77758", + "size": 987, + "uri": "https://{canondata_backend}/1942278/0ddda7a1843b8f1bd86bd3f85a3608f01cefd34f/resource.tar.gz#test.test_window-win_range_tzdate--Results_/results.txt" + } + ], + "test.test[window-win_range_unbounded_compact--Debug]": [ + { + "checksum": "d6bf3ea2a4d79802126ffca10d6b63ba", + "size": 2005, + "uri": "https://{canondata_backend}/1942415/abc8ff0bc7ce1b4382b676962a2321d49a930356/resource.tar.gz#test.test_window-win_range_unbounded_compact--Debug_/opt.yql" + } + ], + "test.test[window-win_range_unbounded_compact--Results]": [ + { + "checksum": "fd2a1086451c4b03d885b341c514e553", + "size": 1407, + "uri": "https://{canondata_backend}/1936997/f1acad6ad8fea2ff9a9ddc4955b72dc215714c63/resource.tar.gz#test.test_window-win_range_unbounded_compact--Results_/results.txt" + } ] } diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index dccc1cbe3fb..4499fbf101c 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -9736,6 +9736,237 @@ "uri": "https://{canondata_backend}/1937150/ec0019724df75083b0e89cab22f57e10ef36744e/resource.tar.gz#test_sql2yql.test_window-win_peephole_double_usage_/sql.yql" } ], + "test_sql2yql.test[window-win_range_always_empty]": [ + { + "checksum": "2ef693d3b03cc8c3f4e7aae2bb3ad8bb", + "size": 3899, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_always_empty_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_date32]": [ + { + "checksum": "84d51512d1b4296e63bbc337e9633717", + "size": 3121, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_date32_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_date]": [ + { + "checksum": "52d60db7624fe371ef604361700d7322", + "size": 3234, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_date_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_datetime64]": [ + { + "checksum": "2e8aff0050289ba6c109e9cc314419a6", + "size": 3145, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_datetime64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_datetime]": [ + { + "checksum": "6eb63937b94d44377a08d68a182a30b1", + "size": 3286, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_datetime_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_double]": [ + { + "checksum": "a6c70091fabedabb808bc762184563b1", + "size": 5925, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_double_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_float]": [ + { + "checksum": "d8a950a21a1906f09488fee8a026eb5d", + "size": 3935, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_float_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_int16]": [ + { + "checksum": "3631c6caea719a0bac631b1f7a5ca820", + "size": 3903, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_int16_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_int32]": [ + { + "checksum": "99abd04f1ef9c008ac0123e8ef1e8afd", + "size": 6197, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_int32_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_int64]": [ + { + "checksum": "d6424c526be41be015300f479e4912b4", + "size": 3923, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_int64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_int64_order_by_multiply]": [ + { + "checksum": "66343f5e37b62e24b2160c9ade25f3de", + "size": 3735, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_int64_order_by_multiply_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_int8]": [ + { + "checksum": "e76d148344c6a2035db99d27ddd2e853", + "size": 6141, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_int8_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_interval64]": [ + { + "checksum": "40763826db068ea9e07a0c62e14c8d49", + "size": 4530, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_interval64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_interval]": [ + { + "checksum": "be781f7e90a3b0ee39e95a6bf454ae39", + "size": 3133, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_interval_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_no_sort_current_row]": [ + { + "checksum": "37688fb1e53c8bd60bf8e4dcdb0bab46", + "size": 2951, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_no_sort_current_row_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_string_current_row_fail]": [ + { + "checksum": "594d135d591c0043ed9b4fa1dfefaad5", + "size": 2006, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_string_current_row_fail_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_string_success]": [ + { + "checksum": "f0ba74785bd3ff0a8696f2e1b73f3ae5", + "size": 3174, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_string_success_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_timestamp64]": [ + { + "checksum": "745e3f40873c24217cbad230c20abd50", + "size": 3203, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_timestamp64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_timestamp]": [ + { + "checksum": "96e73cba85259e13a07e8863248fa711", + "size": 3151, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_timestamp_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_two_columns_success]": [ + { + "checksum": "3883650132f04bd03b437993588ef499", + "size": 3205, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_two_columns_success_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_two_columns_with_current_row_fail]": [ + { + "checksum": "f527652b7b97b174cbc60d43bfb5bc01", + "size": 2092, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_two_columns_with_current_row_fail_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tzdate32]": [ + { + "checksum": "a4365ef89f1049c8defda0e6b8dabf09", + "size": 3202, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tzdate32_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tzdate]": [ + { + "checksum": "8b5ed5ef6ec4638b4124aaf7c2383d2c", + "size": 3151, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tzdate_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tzdatetime64]": [ + { + "checksum": "115f5d3e4966934a312d67658ae40812", + "size": 3610, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tzdatetime64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tzdatetime]": [ + { + "checksum": "6d8fef79eb6efdb4321cd228704bf899", + "size": 3594, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tzdatetime_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tztimestamp64]": [ + { + "checksum": "a4f5f46e05c4fecf196e3ad27f4bc797", + "size": 3167, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tztimestamp64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_tztimestamp]": [ + { + "checksum": "eeab5e42c4eda6809499b5f1478ded52", + "size": 3108, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_tztimestamp_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_uint16]": [ + { + "checksum": "9e0277c25ec78b91a4ed4e781dac54bd", + "size": 3918, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_uint16_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_uint32]": [ + { + "checksum": "9cdbdd22c659581853def10f97aea862", + "size": 3956, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_uint32_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_uint64]": [ + { + "checksum": "b52a29c00d494ea8f67783b156e0add1", + "size": 3913, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_uint64_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_uint8]": [ + { + "checksum": "e438f141a5550ef90f83c93f1fab81c7", + "size": 3892, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_uint8_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_unbounded_compact]": [ + { + "checksum": "5669928ee21a86cba611a0d3e9ed428b", + "size": 3880, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_unbounded_compact_/sql.yql" + } + ], + "test_sql2yql.test[window-win_range_unbounded_non_compact]": [ + { + "checksum": "c9ae323ef7604e673b54e7394d731f7a", + "size": 3868, + "uri": "https://{canondata_backend}/1903280/397315720fa648d3b3b50280c855e13986b1a22e/resource.tar.gz#test_sql2yql.test_window-win_range_unbounded_non_compact_/sql.yql" + } + ], "test_sql2yql.test[window-win_with_as_table]": [ { "checksum": "a0854b2700bb786745d0d98dfeb2c4fa", @@ -15263,6 +15494,171 @@ "uri": "file://test_sql_format.test_window-win_peephole_double_usage_/formatted.sql" } ], + "test_sql_format.test[window-win_range_always_empty]": [ + { + "uri": "file://test_sql_format.test_window-win_range_always_empty_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_date32]": [ + { + "uri": "file://test_sql_format.test_window-win_range_date32_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_date]": [ + { + "uri": "file://test_sql_format.test_window-win_range_date_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_datetime64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_datetime64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_datetime]": [ + { + "uri": "file://test_sql_format.test_window-win_range_datetime_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_double]": [ + { + "uri": "file://test_sql_format.test_window-win_range_double_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_float]": [ + { + "uri": "file://test_sql_format.test_window-win_range_float_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_int16]": [ + { + "uri": "file://test_sql_format.test_window-win_range_int16_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_int32]": [ + { + "uri": "file://test_sql_format.test_window-win_range_int32_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_int64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_int64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_int64_order_by_multiply]": [ + { + "uri": "file://test_sql_format.test_window-win_range_int64_order_by_multiply_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_int8]": [ + { + "uri": "file://test_sql_format.test_window-win_range_int8_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_interval64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_interval64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_interval]": [ + { + "uri": "file://test_sql_format.test_window-win_range_interval_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_no_sort_current_row]": [ + { + "uri": "file://test_sql_format.test_window-win_range_no_sort_current_row_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_string_current_row_fail]": [ + { + "uri": "file://test_sql_format.test_window-win_range_string_current_row_fail_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_string_success]": [ + { + "uri": "file://test_sql_format.test_window-win_range_string_success_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_timestamp64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_timestamp64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_timestamp]": [ + { + "uri": "file://test_sql_format.test_window-win_range_timestamp_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_two_columns_success]": [ + { + "uri": "file://test_sql_format.test_window-win_range_two_columns_success_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_two_columns_with_current_row_fail]": [ + { + "uri": "file://test_sql_format.test_window-win_range_two_columns_with_current_row_fail_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tzdate32]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tzdate32_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tzdate]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tzdate_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tzdatetime64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tzdatetime64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tzdatetime]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tzdatetime_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tztimestamp64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tztimestamp64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_tztimestamp]": [ + { + "uri": "file://test_sql_format.test_window-win_range_tztimestamp_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_uint16]": [ + { + "uri": "file://test_sql_format.test_window-win_range_uint16_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_uint32]": [ + { + "uri": "file://test_sql_format.test_window-win_range_uint32_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_uint64]": [ + { + "uri": "file://test_sql_format.test_window-win_range_uint64_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_uint8]": [ + { + "uri": "file://test_sql_format.test_window-win_range_uint8_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_unbounded_compact]": [ + { + "uri": "file://test_sql_format.test_window-win_range_unbounded_compact_/formatted.sql" + } + ], + "test_sql_format.test[window-win_range_unbounded_non_compact]": [ + { + "uri": "file://test_sql_format.test_window-win_range_unbounded_non_compact_/formatted.sql" + } + ], "test_sql_format.test[window-win_with_as_table]": [ { "uri": "file://test_sql_format.test_window-win_with_as_table_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_always_empty_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_always_empty_/formatted.sql new file mode 100644 index 00000000000..c4fcc04c2d0 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_always_empty_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 0|>, + <|a: NULL, b: 1, sum: NULL, count: 0|>, + <|a: uint8('8'), b: 1, sum: NULL, count: 0|>, + <|a: uint8('10'), b: 1, sum: NULL, count: 0|>, + <|a: uint8('11'), b: 1, sum: NULL, count: 0|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint8('1') PRECEDING AND uint8('2') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date32_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date32_/formatted.sql new file mode 100644 index 00000000000..dda5c6fb20f --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date32_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: Date32('2017-11-25'), b: 1, count: 2|>, + <|a: Date32('2017-11-26'), b: 1, count: 3|>, + <|a: Date32('2017-11-27'), b: 1, count: 4|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND Interval64('P1D') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date_/formatted.sql new file mode 100644 index 00000000000..dbc02715b0a --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_date_/formatted.sql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Date('2017-11-24'), b: 1, count: 0|>, + <|a: Date('2017-11-25'), b: 1, count: 1|>, + <|a: Date('2017-11-26'), b: 1, count: 2|>, + <|a: Date('2017-11-27'), b: 1, count: 3|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval('P3D') PRECEDING AND Interval('P1D') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime64_/formatted.sql new file mode 100644 index 00000000000..94a1b2a39ea --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime64_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, + <|a: Datetime64('2017-11-27T13:22:00Z'), b: 1, count: 2|>, + <|a: Datetime64('2017-11-27T13:23:00Z'), b: 1, count: 1|>, + <|a: Datetime64('2017-11-27T13:24:00Z'), b: 1, count: 0|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64('PT1M') FOLLOWING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime_/formatted.sql new file mode 100644 index 00000000000..1ab6b9f5e0c --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_datetime_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Datetime('2017-11-27T13:22:00Z'), b: 1, count: 0|>, + <|a: Datetime('2017-11-27T13:23:00Z'), b: 1, count: 1|>, + <|a: Datetime('2017-11-27T13:24:00Z'), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a DESC + RANGE BETWEEN Interval('PT1M') FOLLOWING AND Interval('PT3M') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, 'Got: ' || $str(actual_count) || ', but expected: ' || $str(count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_double_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_double_/formatted.sql new file mode 100644 index 00000000000..1bcf29bd41d --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_double_/formatted.sql @@ -0,0 +1,54 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: double('-10.5'), b: 1, sum1: double('-10.5'), count1: 1, sum2: NULL, count2: 0|>, + <|a: double('-5.0'), b: 1, sum1: double('-15.5'), count1: 2, sum2: NULL, count2: 0|>, + <|a: double('0.0'), b: 1, sum1: double('-5.0'), count1: 2, sum2: double('-5.0'), count2: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN double('10.0') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN double('5.0') PRECEDING AND double('0.5') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_float_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_float_/formatted.sql new file mode 100644 index 00000000000..313cd70eee2 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_float_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: float('1.5'), b: 1, sum: float('1.5'), count: 1|>, + <|a: float('2.0'), b: 1, sum: float('3.5'), count: 2|>, + <|a: float('2.8'), b: 1, sum: float('6.3'), count: 3|>, + <|a: float('5.0'), b: 1, sum: float('5.0'), count: 1|>, + <|a: float('6.0'), b: 1, sum: float('11.0'), count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN float('1.5') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int16_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int16_/formatted.sql new file mode 100644 index 00000000000..78f9811181e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int16_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int16('-1000'), b: 1, sum: int16('-1500'), count: 5|>, + <|a: int16('-500'), b: 1, sum: int16('-500'), count: 4|>, + <|a: int16('0'), b: 1, sum: int16('0'), count: 3|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + -a DESC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int32_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int32_/formatted.sql new file mode 100644 index 00000000000..d5da54eaece --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int32_/formatted.sql @@ -0,0 +1,52 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int32('-50000'), b: 1, sum1: int32('-50000'), count1: 1, sum2: NULL, count2: 0|>, + <|a: int32('-10000'), b: 1, sum1: int32('-60000'), count1: 2, sum2: int32('-50000'), count2: 1|>, + <|a: int32('0'), b: 1, sum1: int32('-60000'), count1: 3, sum2: int32('-10000'), count2: 1|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int32('50000') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int32('40000') PRECEDING AND int32('10000') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_/formatted.sql new file mode 100644 index 00000000000..b09eb71db88 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: int64('-1500000'), count: 5|>, + <|a: NULL, b: 1, sum: int64('-1500000'), count: 5|>, + <|a: int64('-1000000'), b: 1, sum: int64('-1500000'), count: 3|>, + <|a: int64('-500000'), b: 1, sum: int64('-500000'), count: 2|>, + <|a: int64('0'), b: 1, sum: int64('0'), count: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_order_by_multiply_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_order_by_multiply_/formatted.sql new file mode 100644 index 00000000000..5d7b6de4550 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int64_order_by_multiply_/formatted.sql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int64('-1'), b: 1, sum: int64('-3'), count: 2|>, + <|a: int64('-2'), b: 1, sum: int64('-6'), count: 3|>, + <|a: int64('-3'), b: 1, sum: int64('-5'), count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION BY + b + ORDER BY + a * 5 ASC + RANGE BETWEEN 5l PRECEDING AND 5l FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int8_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int8_/formatted.sql new file mode 100644 index 00000000000..4ecbd2bdf9e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_int8_/formatted.sql @@ -0,0 +1,52 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int8('-8'), b: 1, sum1: int8('-8'), count1: 1, sum2: NULL, count2: 0|>, + <|a: int8('-5'), b: 1, sum1: int8('-13'), count1: 2, sum2: int8('-8'), count2: 1|>, + <|a: int8('0'), b: 1, sum1: int8('-5'), count1: 2, sum2: NULL, count2: 0|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int8('5') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int8('3') PRECEDING AND int8('2') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval64_/formatted.sql new file mode 100644 index 00000000000..2d5181462ef --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval64_/formatted.sql @@ -0,0 +1,46 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Interval64('P1DT2H3M4.567888S'), b: 1, count1: 1, count2: 2|>, + <|a: Interval64('P1DT2H3M4.567889S'), b: 1, count1: 2, count2: 2|>, + <|a: Interval64('P1DT2H3M4.567890S'), b: 1, count1: 2, count2: 1|>, + <|a: NULL, b: 1, count1: 2, count2: 2|>, + <|a: NULL, b: 1, count1: 2, count2: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count1, + COUNT(*) OVER w2 AS actual_count2, + count1, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64('PT0.000001S') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND Interval64('PT0.000001S') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval_/formatted.sql new file mode 100644 index 00000000000..2cb255ee45e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_interval_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Interval('P1DT2H3M4.567888S'), b: 1, count: 2|>, + <|a: Interval('P1DT2H3M4.567889S'), b: 1, count: 1|>, + <|a: Interval('P1DT2H3M4.567890S'), b: 1, count: 0|>, + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval('PT0.000001S') FOLLOWING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_no_sort_current_row_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_no_sort_current_row_/formatted.sql new file mode 100644 index 00000000000..f15e0033008 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_no_sort_current_row_/formatted.sql @@ -0,0 +1,34 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp('2017-11-27T13:24:00.123454Z'), b: 1, count: 5|>, + <|a: Timestamp('2017-11-27T13:24:00.123455Z'), b: 1, count: 5|>, + <|a: Timestamp('2017-11-27T13:24:00.123456Z'), b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_current_row_fail_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_current_row_fail_/formatted.sql new file mode 100644 index 00000000000..27b585f75a8 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_current_row_fail_/formatted.sql @@ -0,0 +1,23 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +/* custom error: Range frame for non numeric expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW */ +$data = [ + <|a: 'apple', b: 1, count: 1|>, + <|a: 'banana', b: 1, count: 2|>, +]; + +SELECT + COUNT(*) OVER w1 AS actual_count, + count, +FROM + AS_TABLE($data) +WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + ) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_success_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_success_/formatted.sql new file mode 100644 index 00000000000..2d4b6902c21 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_string_success_/formatted.sql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: 'apple', b: 1, count: 3|>, + <|a: 'banana', b: 1, count: 4|>, + <|a: 'cherry', b: 1, count: 6|>, + <|a: 'cherry', b: 1, count: 6|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp64_/formatted.sql new file mode 100644 index 00000000000..567ff24b6cd --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp64_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp64('2017-11-27T13:24:00.123454Z'), b: 1, count: 1|>, + <|a: Timestamp64('2017-11-27T13:24:00.123455Z'), b: 1, count: 2|>, + <|a: Timestamp64('2017-11-27T13:24:00.123456Z'), b: 1, count: 2|>, + <|a: Timestamp64('2017-11-27T13:24:00.123457Z'), b: 1, count: 2|>, + <|a: Timestamp64('2017-11-27T13:24:00.123458Z'), b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64('PT0.000001S') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp_/formatted.sql new file mode 100644 index 00000000000..1aa8b5c73de --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_timestamp_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp('2017-11-27T13:24:00.123454Z'), b: 1, count: 2|>, + <|a: Timestamp('2017-11-27T13:24:00.123455Z'), b: 1, count: 3|>, + <|a: Timestamp('2017-11-27T13:24:00.123456Z'), b: 1, count: 4|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND Interval('PT0.000001S') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_success_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_success_/formatted.sql new file mode 100644 index 00000000000..c49e8e16ab1 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_success_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: 'apple', c: 1, b: 1, count: 1|>, + <|a: 'apple', c: 2, b: 1, count: 2|>, + <|a: 'banana', c: 1, b: 1, count: 3|>, + <|a: 'banana', c: 2, b: 1, count: 4|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC, + c ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_with_current_row_fail_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_with_current_row_fail_/formatted.sql new file mode 100644 index 00000000000..787e7527f32 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_two_columns_with_current_row_fail_/formatted.sql @@ -0,0 +1,25 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +/* custom error: Range frame for multiple expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW. */ +$data = [ + <|a: 1, c: 1, b: 1|>, + <|a: 1, c: 2, b: 1|>, + <|a: 2, c: 1, b: 1|>, + <|a: 2, c: 2, b: 1|>, +]; + +SELECT + COUNT(*) OVER w1 AS cnt, +FROM + AS_TABLE($data) +WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC, + c ASC + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + ) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate32_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate32_/formatted.sql new file mode 100644 index 00000000000..be9ca56009a --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate32_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDate32('2017-11-25,Europe/Moscow'), b: 1, count: 0|>, + <|a: TzDate32('2017-11-26,Europe/Moscow'), b: 1, count: 1|>, + <|a: TzDate32('2017-11-27,Europe/Moscow'), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64('P3D') PRECEDING AND Interval64('P1D') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate_/formatted.sql new file mode 100644 index 00000000000..9bb5a15ac73 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdate_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDate('2017-11-25,Europe/Moscow'), b: 1, count: 1|>, + <|a: TzDate('2017-11-26,Europe/Moscow'), b: 1, count: 2|>, + <|a: TzDate('2017-11-27,Europe/Moscow'), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval('P1D') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime64_/formatted.sql new file mode 100644 index 00000000000..9d7a7717dc4 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime64_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDatetime64('2017-11-27T13:22:00,America/Los_Angeles'), b: 1, count: 3|>, + <|a: TzDatetime64('2017-11-27T13:23:00,America/Los_Angeles'), b: 1, count: 3|>, + <|a: TzDatetime64('2017-11-27T13:24:00,America/Los_Angeles'), b: 1, count: 2|>, + <|a: TzDatetime64('2017-11-27T13:22:00,America/Los_Angeles'), b: 2, count: 3|>, + <|a: TzDatetime64('2017-11-27T13:23:00,America/Los_Angeles'), b: 2, count: 3|>, + <|a: TzDatetime64('2017-11-27T13:24:00,America/Los_Angeles'), b: 2, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64('PT1M') PRECEDING AND Interval64('PT3M') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime_/formatted.sql new file mode 100644 index 00000000000..9ba5e614335 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tzdatetime_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDatetime('2017-11-27T13:22:00,America/Los_Angeles'), b: 1, count: 0|>, + <|a: TzDatetime('2017-11-27T13:23:00,America/Los_Angeles'), b: 1, count: 1|>, + <|a: TzDatetime('2017-11-27T13:24:00,America/Los_Angeles'), b: 1, count: 2|>, + <|a: TzDatetime('2017-11-27T13:22:00,America/Los_Angeles'), b: 2, count: 0|>, + <|a: TzDatetime('2017-11-27T13:23:00,America/Los_Angeles'), b: 2, count: 1|>, + <|a: TzDatetime('2017-11-27T13:24:00,America/Los_Angeles'), b: 3, count: 0|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval('PT3M') PRECEDING AND Interval('PT1M') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp64_/formatted.sql new file mode 100644 index 00000000000..cb2f7a4edfe --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp64_/formatted.sql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzTimestamp64('2017-11-27T13:24:00.123454,GMT'), b: 1, count: 2|>, + <|a: TzTimestamp64('2017-11-27T13:24:00.123455,GMT'), b: 1, count: 1|>, + <|a: TzTimestamp64('2017-11-27T13:24:00.123459,GMT'), b: 1, count: 1|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND Interval64('PT0.000001S') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp_/formatted.sql new file mode 100644 index 00000000000..3fd6f2dfa1e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_tztimestamp_/formatted.sql @@ -0,0 +1,35 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzTimestamp('2017-11-27T13:24:00.123454,GMT'), b: 1, count: 2|>, + <|a: TzTimestamp('2017-11-27T13:24:00.123455,GMT'), b: 1, count: 1|>, + <|a: TzTimestamp('2017-11-27T13:24:00.123456,GMT'), b: 1, count: 0|>, + <|a: NULL, b: 1, count: 1|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval('PT0.000001S') FOLLOWING AND Interval('PT0.000003S') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint16_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint16_/formatted.sql new file mode 100644 index 00000000000..b8370c61bd4 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint16_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint16('100'), b: 1, sum: uint16('100'), count: 3|>, + <|a: uint16('200'), b: 1, sum: uint16('300'), count: 4|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND uint16('10') FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint32_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint32_/formatted.sql new file mode 100644 index 00000000000..244f6530525 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint32_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: uint32('1000'), b: 1, sum: uint32('1000'), count: 1|>, + <|a: uint32('2000'), b: 1, sum: uint32('3000'), count: 2|>, + <|a: uint32('2500'), b: 1, sum: uint32('4500'), count: 2|>, + <|a: uint32('3000'), b: 1, sum: uint32('7500'), count: 3|>, + <|a: uint32('5000'), b: 1, sum: uint32('5000'), count: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint32('1000') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint64_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint64_/formatted.sql new file mode 100644 index 00000000000..c8d95198094 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint64_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint64('1000000'), b: 1, sum: uint64('1000000'), count: 3|>, + <|a: uint64('2000000'), b: 1, sum: uint64('3000000'), count: 4|>, + <|a: uint64('3000000'), b: 1, sum: uint64('6000000'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint8_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint8_/formatted.sql new file mode 100644 index 00000000000..0559b41284e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_uint8_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint8('8'), b: 1, sum: uint8('8'), count: 1|>, + <|a: uint8('10'), b: 1, sum: uint8('10'), count: 1|>, + <|a: uint8('11'), b: 1, sum: uint8('21'), count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint8('1') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_compact_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_compact_/formatted.sql new file mode 100644 index 00000000000..0efd929a58a --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_compact_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('100'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('200'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_non_compact_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_non_compact_/formatted.sql new file mode 100644 index 00000000000..47a5828b134 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_window-win_range_unbounded_non_compact_/formatted.sql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('100'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('200'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/suites/window/win_range_always_empty.cfg b/yql/essentials/tests/sql/suites/window/win_range_always_empty.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_always_empty.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_always_empty.yql b/yql/essentials/tests/sql/suites/window/win_range_always_empty.yql new file mode 100644 index 00000000000..701b7451ce2 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_always_empty.yql @@ -0,0 +1,40 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 0|>, + <|a: NULL, b: 1, sum: NULL, count: 0|>, + <|a: uint8('8'), b: 1, sum: NULL, count: 0|>, + <|a: uint8('10'), b: 1, sum: NULL, count: 0|>, + <|a: uint8('11'), b: 1, sum: NULL, count: 0|>, + +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint8('1') PRECEDING AND uint8('2') PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/suites/window/win_range_date.cfg b/yql/essentials/tests/sql/suites/window/win_range_date.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_date.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_date.yql b/yql/essentials/tests/sql/suites/window/win_range_date.yql new file mode 100644 index 00000000000..91dd8c07419 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_date.yql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Date("2017-11-24"), b: 1, count: 0|>, + <|a: Date("2017-11-25"), b: 1, count: 1|>, + <|a: Date("2017-11-26"), b: 1, count: 2|>, + <|a: Date("2017-11-27"), b: 1, count: 3|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval("P3D") PRECEDING AND Interval("P1D") PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_date32.cfg b/yql/essentials/tests/sql/suites/window/win_range_date32.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_date32.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_date32.yql b/yql/essentials/tests/sql/suites/window/win_range_date32.yql new file mode 100644 index 00000000000..747b5df4c00 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_date32.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: Date32("2017-11-25"), b: 1, count: 2|>, + <|a: Date32("2017-11-26"), b: 1, count: 3|>, + <|a: Date32("2017-11-27"), b: 1, count: 4|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND Interval64("P1D") PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_datetime.cfg b/yql/essentials/tests/sql/suites/window/win_range_datetime.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_datetime.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_datetime.yql b/yql/essentials/tests/sql/suites/window/win_range_datetime.yql new file mode 100644 index 00000000000..12a70dfaa4d --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_datetime.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Datetime("2017-11-27T13:22:00Z"), b: 1, count: 0|>, + <|a: Datetime("2017-11-27T13:23:00Z"), b: 1, count: 1|>, + <|a: Datetime("2017-11-27T13:24:00Z"), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a DESC + RANGE BETWEEN Interval("PT1M") FOLLOWING AND Interval("PT3M") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, "Got: " || $str(actual_count) || ", but expected: " || $str(count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_datetime64.cfg b/yql/essentials/tests/sql/suites/window/win_range_datetime64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_datetime64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_datetime64.yql b/yql/essentials/tests/sql/suites/window/win_range_datetime64.yql new file mode 100644 index 00000000000..68c2a7561b0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_datetime64.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, + <|a: Datetime64("2017-11-27T13:22:00Z"), b: 1, count: 2|>, + <|a: Datetime64("2017-11-27T13:23:00Z"), b: 1, count: 1|>, + <|a: Datetime64("2017-11-27T13:24:00Z"), b: 1, count: 0|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64("PT1M") FOLLOWING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_double.cfg b/yql/essentials/tests/sql/suites/window/win_range_double.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_double.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_double.yql b/yql/essentials/tests/sql/suites/window/win_range_double.yql new file mode 100644 index 00000000000..5207c207874 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_double.yql @@ -0,0 +1,54 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: double('-10.5'), b: 1, sum1: double('-10.5'), count1: 1, sum2: NULL, count2: 0|>, + <|a: double('-5.0'), b: 1, sum1: double('-15.5'), count1: 2, sum2: NULL, count2: 0|>, + <|a: double('0.0'), b: 1, sum1: double('-5.0'), count1: 2, sum2: double('-5.0'), count2: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN double('10.0') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN double('5.0') PRECEDING AND double('0.5') PRECEDING + ) +); + +$str = ($x) -> { + RETURN CAST($x AS String) ?? 'null'; +}; + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/suites/window/win_range_float.cfg b/yql/essentials/tests/sql/suites/window/win_range_float.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_float.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_float.yql b/yql/essentials/tests/sql/suites/window/win_range_float.yql new file mode 100644 index 00000000000..ec69c8411a8 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_float.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: float('1.5'), b: 1, sum: float('1.5'), count: 1|>, + <|a: float('2.0'), b: 1, sum: float('3.5'), count: 2|>, + <|a: float('2.8'), b: 1, sum: float('6.3'), count: 3|>, + <|a: float('5.0'), b: 1, sum: float('5.0'), count: 1|>, + <|a: float('6.0'), b: 1, sum: float('11.0'), count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN float('1.5') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_int16.cfg b/yql/essentials/tests/sql/suites/window/win_range_int16.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int16.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_int16.yql b/yql/essentials/tests/sql/suites/window/win_range_int16.yql new file mode 100644 index 00000000000..26e40e3e285 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int16.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int16('-1000'), b: 1, sum: int16('-1500'), count: 5|>, + <|a: int16('-500'), b: 1, sum: int16('-500'), count: 4|>, + <|a: int16('0'), b: 1, sum: int16('0'), count: 3|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + -a DESC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_int32.cfg b/yql/essentials/tests/sql/suites/window/win_range_int32.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int32.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_int32.yql b/yql/essentials/tests/sql/suites/window/win_range_int32.yql new file mode 100644 index 00000000000..a9a15439877 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int32.yql @@ -0,0 +1,52 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int32('-50000'), b: 1, sum1: int32('-50000'), count1: 1, sum2: NULL, count2: 0|>, + <|a: int32('-10000'), b: 1, sum1: int32('-60000'), count1: 2, sum2: int32('-50000'), count2: 1|>, + <|a: int32('0'), b: 1, sum1: int32('-60000'), count1: 3, sum2: int32('-10000'), count2: 1|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int32('50000') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int32('40000') PRECEDING AND int32('10000') PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_int64.cfg b/yql/essentials/tests/sql/suites/window/win_range_int64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_int64.yql b/yql/essentials/tests/sql/suites/window/win_range_int64.yql new file mode 100644 index 00000000000..8a852fe1b54 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int64.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: int64('-1500000'), count: 5|>, + <|a: NULL, b: 1, sum: int64('-1500000'), count: 5|>, + <|a: int64('-1000000'), b: 1, sum: int64('-1500000'), count: 3|>, + <|a: int64('-500000'), b: 1, sum: int64('-500000'), count: 2|>, + <|a: int64('0'), b: 1, sum: int64('0'), count: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.cfg b/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.yql b/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.yql new file mode 100644 index 00000000000..b6027791153 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int64_order_by_multiply.yql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int64('-1'), b: 1, sum: int64('-3'), count: 2|>, + <|a: int64('-2'), b: 1, sum: int64('-6'), count: 3|>, + <|a: int64('-3'), b: 1, sum: int64('-5'), count: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION BY + b + ORDER BY + a * 5 ASC + RANGE BETWEEN 5l PRECEDING AND 5l FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_int8.cfg b/yql/essentials/tests/sql/suites/window/win_range_int8.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int8.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_int8.yql b/yql/essentials/tests/sql/suites/window/win_range_int8.yql new file mode 100644 index 00000000000..837d951d058 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_int8.yql @@ -0,0 +1,52 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: int8('-8'), b: 1, sum1: int8('-8'), count1: 1, sum2: NULL, count2: 0|>, + <|a: int8('-5'), b: 1, sum1: int8('-13'), count1: 2, sum2: int8('-8'), count2: 1|>, + <|a: int8('0'), b: 1, sum1: int8('-5'), count1: 2, sum2: NULL, count2: 0|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, + <|a: NULL, b: 1, sum1: NULL, count1: 2, sum2: NULL, count2: 2|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum1, + COUNT(*) OVER w1 AS actual_count1, + SUM(a) OVER w2 AS actual_sum2, + COUNT(*) OVER w2 AS actual_count2, + sum1, + count1, + sum2, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int8('5') PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN int8('3') PRECEDING AND int8('2') PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum1, sum1 IS NOT DISTINCT FROM actual_sum1, $str(actual_sum1)), + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(sum2, sum2 IS NOT DISTINCT FROM actual_sum2, $str(actual_sum2)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_interval.cfg b/yql/essentials/tests/sql/suites/window/win_range_interval.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_interval.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_interval.yql b/yql/essentials/tests/sql/suites/window/win_range_interval.yql new file mode 100644 index 00000000000..9c11cfaeccd --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_interval.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Interval("P1DT2H3M4.567888S"), b: 1, count: 2|>, + <|a: Interval("P1DT2H3M4.567889S"), b: 1, count: 1|>, + <|a: Interval("P1DT2H3M4.567890S"), b: 1, count: 0|>, + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval("PT0.000001S") FOLLOWING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_interval64.cfg b/yql/essentials/tests/sql/suites/window/win_range_interval64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_interval64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_interval64.yql b/yql/essentials/tests/sql/suites/window/win_range_interval64.yql new file mode 100644 index 00000000000..0532c304f7f --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_interval64.yql @@ -0,0 +1,46 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Interval64("P1DT2H3M4.567888S"), b: 1, count1: 1, count2: 2|>, + <|a: Interval64("P1DT2H3M4.567889S"), b: 1, count1: 2, count2: 2|>, + <|a: Interval64("P1DT2H3M4.567890S"), b: 1, count1: 2, count2: 1|>, + <|a: NULL, b: 1, count1: 2, count2: 2|>, + <|a: NULL, b: 1, count1: 2, count2: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count1, + COUNT(*) OVER w2 AS actual_count2, + count1, + count2, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64("PT0.000001S") PRECEDING AND CURRENT ROW + ), + w2 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND Interval64("PT0.000001S") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count1, count1 IS NOT DISTINCT FROM actual_count1, $str(actual_count1)), + Ensure(count2, count2 IS NOT DISTINCT FROM actual_count2, $str(actual_count2)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.cfg b/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.yql b/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.yql new file mode 100644 index 00000000000..6516912c20b --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_no_sort_current_row.yql @@ -0,0 +1,34 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp("2017-11-27T13:24:00.123454Z"), b: 1, count: 5|>, + <|a: Timestamp("2017-11-27T13:24:00.123455Z"), b: 1, count: 5|>, + <|a: Timestamp("2017-11-27T13:24:00.123456Z"), b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, + <|a: NULL, b: 1, count: 5|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.cfg b/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.cfg new file mode 100644 index 00000000000..2bf6b4f432b --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.cfg @@ -0,0 +1,2 @@ +xfail +langver 2025.05
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.yql b/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.yql new file mode 100644 index 00000000000..4aefb8a695c --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_string_current_row_fail.yql @@ -0,0 +1,23 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +/* custom error: Range frame for non numeric expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW */ + +$data = [ + <|a: "apple", b: 1, count: 1|>, + <|a: "banana", b: 1, count: 2|>, +]; + +SELECT + COUNT(*) OVER w1 AS actual_count, + count, +FROM + AS_TABLE($data) +WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + )
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_string_success.cfg b/yql/essentials/tests/sql/suites/window/win_range_string_success.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_string_success.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_string_success.yql b/yql/essentials/tests/sql/suites/window/win_range_string_success.yql new file mode 100644 index 00000000000..7d6cc348107 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_string_success.yql @@ -0,0 +1,37 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: "apple", b: 1, count: 3|>, + <|a: "banana", b: 1, count: 4|>, + <|a: "cherry", b: 1, count: 6|>, + <|a: "cherry", b: 1, count: 6|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_timestamp.cfg b/yql/essentials/tests/sql/suites/window/win_range_timestamp.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_timestamp.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_timestamp.yql b/yql/essentials/tests/sql/suites/window/win_range_timestamp.yql new file mode 100644 index 00000000000..8615b00c4c8 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_timestamp.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp("2017-11-27T13:24:00.123454Z"), b: 1, count: 2|>, + <|a: Timestamp("2017-11-27T13:24:00.123455Z"), b: 1, count: 3|>, + <|a: Timestamp("2017-11-27T13:24:00.123456Z"), b: 1, count: 4|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND Interval("PT0.000001S") PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_timestamp64.cfg b/yql/essentials/tests/sql/suites/window/win_range_timestamp64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_timestamp64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_timestamp64.yql b/yql/essentials/tests/sql/suites/window/win_range_timestamp64.yql new file mode 100644 index 00000000000..469716d81a5 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_timestamp64.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: Timestamp64("2017-11-27T13:24:00.123454Z"), b: 1, count: 1|>, + <|a: Timestamp64("2017-11-27T13:24:00.123455Z"), b: 1, count: 2|>, + <|a: Timestamp64("2017-11-27T13:24:00.123456Z"), b: 1, count: 2|>, + <|a: Timestamp64("2017-11-27T13:24:00.123457Z"), b: 1, count: 2|>, + <|a: Timestamp64("2017-11-27T13:24:00.123458Z"), b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64("PT0.000001S") PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.cfg b/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.yql b/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.yql new file mode 100644 index 00000000000..53636599ec6 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_two_columns_success.yql @@ -0,0 +1,35 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: "apple", c: 1, b: 1, count: 1|>, + <|a: "apple", c: 2, b: 1, count: 2|>, + <|a: "banana", c: 1, b: 1, count: 3|>, + <|a: "banana", c: 2, b: 1, count: 4|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC, c ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.cfg b/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.cfg new file mode 100644 index 00000000000..2bf6b4f432b --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.cfg @@ -0,0 +1,2 @@ +xfail +langver 2025.05
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.yql b/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.yql new file mode 100644 index 00000000000..b1f0f491a77 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_two_columns_with_current_row_fail.yql @@ -0,0 +1,25 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +/* custom error: Range frame for multiple expressions is only allowed to be UNBOUNDED PRECEDING AND CURRENT ROW. */ + +$data = [ + <|a: 1, c: 1, b: 1|>, + <|a: 1, c: 2, b: 1|>, + <|a: 2, c: 1, b: 1|>, + <|a: 2, c: 2, b: 1|>, +]; + +SELECT + COUNT(*) OVER w1 AS cnt, +FROM + AS_TABLE($data) +WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC, c ASC + RANGE BETWEEN CURRENT ROW AND CURRENT ROW + ) +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdate.cfg b/yql/essentials/tests/sql/suites/window/win_range_tzdate.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdate.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdate.yql b/yql/essentials/tests/sql/suites/window/win_range_tzdate.yql new file mode 100644 index 00000000000..0c26c710360 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdate.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDate("2017-11-25,Europe/Moscow"), b: 1, count: 1|>, + <|a: TzDate("2017-11-26,Europe/Moscow"), b: 1, count: 2|>, + <|a: TzDate("2017-11-27,Europe/Moscow"), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval("P1D") PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdate32.cfg b/yql/essentials/tests/sql/suites/window/win_range_tzdate32.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdate32.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdate32.yql b/yql/essentials/tests/sql/suites/window/win_range_tzdate32.yql new file mode 100644 index 00000000000..79dde992734 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdate32.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDate32("2017-11-25,Europe/Moscow"), b: 1, count: 0|>, + <|a: TzDate32("2017-11-26,Europe/Moscow"), b: 1, count: 1|>, + <|a: TzDate32("2017-11-27,Europe/Moscow"), b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64("P3D") PRECEDING AND Interval64("P1D") PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.cfg b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.yql b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.yql new file mode 100644 index 00000000000..4f0f69774ca --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDatetime("2017-11-27T13:22:00,America/Los_Angeles"), b: 1, count: 0|>, + <|a: TzDatetime("2017-11-27T13:23:00,America/Los_Angeles"), b: 1, count: 1|>, + <|a: TzDatetime("2017-11-27T13:24:00,America/Los_Angeles"), b: 1, count: 2|>, + <|a: TzDatetime("2017-11-27T13:22:00,America/Los_Angeles"), b: 2, count: 0|>, + <|a: TzDatetime("2017-11-27T13:23:00,America/Los_Angeles"), b: 2, count: 1|>, + <|a: TzDatetime("2017-11-27T13:24:00,America/Los_Angeles"), b: 3, count: 0|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval("PT3M") PRECEDING AND Interval("PT1M") PRECEDING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.cfg b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.yql b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.yql new file mode 100644 index 00000000000..c310de4ed54 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tzdatetime64.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzDatetime64("2017-11-27T13:22:00,America/Los_Angeles"), b: 1, count: 3|>, + <|a: TzDatetime64("2017-11-27T13:23:00,America/Los_Angeles"), b: 1, count: 3|>, + <|a: TzDatetime64("2017-11-27T13:24:00,America/Los_Angeles"), b: 1, count: 2|>, + <|a: TzDatetime64("2017-11-27T13:22:00,America/Los_Angeles"), b: 2, count: 3|>, + <|a: TzDatetime64("2017-11-27T13:23:00,America/Los_Angeles"), b: 2, count: 3|>, + <|a: TzDatetime64("2017-11-27T13:24:00,America/Los_Angeles"), b: 2, count: 2|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval64("PT1M") PRECEDING AND Interval64("PT3M") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.cfg b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.yql b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.yql new file mode 100644 index 00000000000..fb506905e6e --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp.yql @@ -0,0 +1,35 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzTimestamp("2017-11-27T13:24:00.123454,GMT"), b: 1, count: 2|>, + <|a: TzTimestamp("2017-11-27T13:24:00.123455,GMT"), b: 1, count: 1|>, + <|a: TzTimestamp("2017-11-27T13:24:00.123456,GMT"), b: 1, count: 0|>, + <|a: NULL, b: 1, count: 1|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN Interval("PT0.000001S") FOLLOWING AND Interval("PT0.000003S") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.cfg b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.yql b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.yql new file mode 100644 index 00000000000..20c2e518026 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_tztimestamp64.yql @@ -0,0 +1,36 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: TzTimestamp64("2017-11-27T13:24:00.123454,GMT"), b: 1, count: 2|>, + <|a: TzTimestamp64("2017-11-27T13:24:00.123455,GMT"), b: 1, count: 1|>, + <|a: TzTimestamp64("2017-11-27T13:24:00.123459,GMT"), b: 1, count: 1|>, + <|a: NULL, b: 1, count: 2|>, + <|a: NULL, b: 1, count: 2|>, +]; + +$win_result = ( + SELECT + COUNT(*) OVER w1 AS actual_count, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN CURRENT ROW AND Interval64("PT0.000001S") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint16.cfg b/yql/essentials/tests/sql/suites/window/win_range_uint16.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint16.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint16.yql b/yql/essentials/tests/sql/suites/window/win_range_uint16.yql new file mode 100644 index 00000000000..0a566be4e8c --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint16.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint16('100'), b: 1, sum: uint16('100'), count: 3|>, + <|a: uint16('200'), b: 1, sum: uint16('300'), count: 4|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND uint16("10") FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint32.cfg b/yql/essentials/tests/sql/suites/window/win_range_uint32.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint32.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint32.yql b/yql/essentials/tests/sql/suites/window/win_range_uint32.yql new file mode 100644 index 00000000000..43786333579 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint32.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: uint32('1000'), b: 1, sum: uint32('1000'), count: 1|>, + <|a: uint32('2000'), b: 1, sum: uint32('3000'), count: 2|>, + <|a: uint32('2500'), b: 1, sum: uint32('4500'), count: 2|>, + <|a: uint32('3000'), b: 1, sum: uint32('7500'), count: 3|>, + <|a: uint32('5000'), b: 1, sum: uint32('5000'), count: 1|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint32('1000') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint64.cfg b/yql/essentials/tests/sql/suites/window/win_range_uint64.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint64.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint64.yql b/yql/essentials/tests/sql/suites/window/win_range_uint64.yql new file mode 100644 index 00000000000..ae1e9503be4 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint64.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint64('1000000'), b: 1, sum: uint64('1000000'), count: 3|>, + <|a: uint64('2000000'), b: 1, sum: uint64('3000000'), count: 4|>, + <|a: uint64('3000000'), b: 1, sum: uint64('6000000'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint8.cfg b/yql/essentials/tests/sql/suites/window/win_range_uint8.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint8.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_uint8.yql b/yql/essentials/tests/sql/suites/window/win_range_uint8.yql new file mode 100644 index 00000000000..3a8a713aff8 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_uint8.yql @@ -0,0 +1,40 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: NULL, b: 1, sum: NULL, count: 2|>, + <|a: uint8('8'), b: 1, sum: uint8('8'), count: 1|>, + <|a: uint8('10'), b: 1, sum: uint8('10'), count: 1|>, + <|a: uint8('11'), b: 1, sum: uint8('21'), count: 2|>, + +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN uint8('1') PRECEDING AND CURRENT ROW + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +; diff --git a/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.cfg b/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.yql b/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.yql new file mode 100644 index 00000000000..b69a3b10c07 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_unbounded_compact.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('100'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('200'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION COMPACT BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.cfg b/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.cfg new file mode 100644 index 00000000000..b01e0c8c619 --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.cfg @@ -0,0 +1 @@ +langver 2025.05 diff --git a/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.yql b/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.yql new file mode 100644 index 00000000000..d77896a087f --- /dev/null +++ b/yql/essentials/tests/sql/suites/window/win_range_unbounded_non_compact.yql @@ -0,0 +1,39 @@ +PRAGMA WindowNewPipeline; +PRAGMA config.flags('OptimizerFlags', 'ForbidConstantDependsOnFuse'); + +$data = [ + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: NULL, b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('100'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('200'), b: 1, sum: uint16('550'), count: 5|>, + <|a: uint16('250'), b: 1, sum: uint16('550'), count: 5|>, +]; + +$win_result = ( + SELECT + SUM(a) OVER w1 AS actual_sum, + COUNT(*) OVER w1 AS actual_count, + sum, + count, + FROM + AS_TABLE($data) + WINDOW + w1 AS ( + PARTITION BY + b + ORDER BY + a ASC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +); + +$str = ($x) -> { + return CAST($x as String) ?? "null"; +}; + +SELECT + Ensure(sum, sum IS NOT DISTINCT FROM actual_sum, $str(actual_sum)), + Ensure(count, count IS NOT DISTINCT FROM actual_count, $str(actual_count)) +FROM + $win_result +;
\ No newline at end of file diff --git a/yt/yql/providers/yt/gateway/native/yql_yt_lambda_builder.cpp b/yt/yql/providers/yt/gateway/native/yql_yt_lambda_builder.cpp index aaea8d4ecba..812b7fc57d7 100644 --- a/yt/yql/providers/yt/gateway/native/yql_yt_lambda_builder.cpp +++ b/yt/yql/providers/yt/gateway/native/yql_yt_lambda_builder.cpp @@ -61,7 +61,7 @@ NKikimr::NMiniKQL::TComputationNodeFactory GetGatewayNodeFactory(TCodecContext* YQL_ENSURE(fileInfo, "Unknown file path " << fullFileName); const auto path = fileInfo->Path->GetPath(); const auto content = callable.GetType()->GetName() == "FileContentJob" ? TFileInput(path).ReadAll() : path.GetPath(); - return ctx.NodeFactory.CreateImmutableNode(MakeString(content)); + return ctx.NodeFactory.CreateImmutableNode(NKikimr::NMiniKQL::MakeString(content)); } } |
