summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVadim Averin <[email protected]>2024-08-19 17:06:41 +0300
committerGitHub <[email protected]>2024-08-19 17:06:41 +0300
commit47bd121575c210d4bbb2dddcc2131759a694df05 (patch)
treed8c929ba94e141d2f802c1f68844dcd4a34b005f
parent4b5d232d21769a6dc74f8edf4b343fe2c41c1b24 (diff)
Allow system sampling in DQ (#7836)
-rw-r--r--ydb/library/yql/providers/yt/gateway/lib/yt_helpers.cpp5
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_dq_hybrid.cpp4
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp5
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_mkql_compiler.cpp4
4 files changed, 7 insertions, 11 deletions
diff --git a/ydb/library/yql/providers/yt/gateway/lib/yt_helpers.cpp b/ydb/library/yql/providers/yt/gateway/lib/yt_helpers.cpp
index c27c97e7df5..0efdcbf7424 100644
--- a/ydb/library/yql/providers/yt/gateway/lib/yt_helpers.cpp
+++ b/ydb/library/yql/providers/yt/gateway/lib/yt_helpers.cpp
@@ -340,12 +340,15 @@ static bool IterateRows(NYT::ITransactionPtr tx,
}
NYT::TTableReaderOptions readerOptions;
- if (sampling && sampling->Mode == EYtSampleMode::Bernoulli) {
+ if (sampling) {
NYT::TNode spec = NYT::TNode::CreateMap();
spec["sampling_rate"] = sampling->Percentage / 100.;
if (sampling->Repeat) {
spec["sampling_seed"] = static_cast<i64>(sampling->Repeat);
}
+ if (sampling->Mode == EYtSampleMode::System) {
+ spec["sampling_mode"] = "block";
+ }
readerOptions.Config(spec);
}
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_dq_hybrid.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_dq_hybrid.cpp
index 1828263a72d..89023c74aed 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_dq_hybrid.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_dq_hybrid.cpp
@@ -120,10 +120,6 @@ private:
PushSkipStat("NonEmptyKeyFilter", nodeName);
return false;
}
- auto sampleSetting = GetSetting(section.Settings().Ref(), EYtSettingType::Sample);
- if (sampleSetting && sampleSetting->Child(1)->Child(0)->Content() == "system") {
- return false;
- }
ui64 dataSize = 0ULL, dataChunks = 0ULL;
for (const auto& path : section.Paths()) {
const TYtPathInfo info(path);
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
index 2de6abfcd38..db20e9d90a8 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
@@ -426,11 +426,6 @@ public:
AddMessage(ctx, info, skipIssues, State_->PassiveExecution);
return false;
}
- auto sampleSetting = GetSetting(section.Settings().Ref(), EYtSettingType::Sample);
- if (sampleSetting && sampleSetting->Child(1)->Child(0)->Content() == "system") {
- AddMessage(ctx, "system sampling", skipIssues, State_->PassiveExecution);
- return false;
- }
for (auto path: section.Paths()) {
if (!path.Table().Maybe<TYtTable>()) {
AddMessage(ctx, "non-table path", skipIssues, State_->PassiveExecution);
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_mkql_compiler.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_mkql_compiler.cpp
index 3b722caca0d..fd2a7a66ffd 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_mkql_compiler.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_mkql_compiler.cpp
@@ -395,11 +395,13 @@ TRuntimeNode BuildDqYtInputCall(
// All sections have the same sampling settings
if (samplingSpec.IsUndefined()) {
if (auto sampling = GetSampleParams(section.Settings().Ref())) {
- YQL_ENSURE(sampling->Mode != EYtSampleMode::System);
samplingSpec["sampling_rate"] = sampling->Percentage / 100.;
if (sampling->Repeat) {
samplingSpec["sampling_seed"] = static_cast<i64>(sampling->Repeat);
}
+ if (sampling->Mode == EYtSampleMode::System) {
+ samplingSpec["sampling_mode"] = "block";
+ }
}
}
}