diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-04-25 20:24:24 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-04-25 20:24:24 +0300 |
commit | f3eadcc2cfd4bd08ff995216c88ab8d67e24f696 (patch) | |
tree | 7de36c161b40a328cf2f1af4ca0960b21f22e401 | |
parent | 8bb83e4a069bb0920c0371612d15d8b0c41b254d (diff) | |
download | ydb-f3eadcc2cfd4bd08ff995216c88ab8d67e24f696.tar.gz |
additional counters
-rw-r--r-- | ydb/core/tx/columnshard/counters.cpp | 9 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/counters.h | 10 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/indexed_read_data.h | 2 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/reader/batch.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/reader/batch.h | 1 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/reader/filter_assembler.cpp | 38 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/reader/filter_assembler.h | 1 |
7 files changed, 50 insertions, 13 deletions
diff --git a/ydb/core/tx/columnshard/counters.cpp b/ydb/core/tx/columnshard/counters.cpp index dcf1476e52..a660582add 100644 --- a/ydb/core/tx/columnshard/counters.cpp +++ b/ydb/core/tx/columnshard/counters.cpp @@ -9,6 +9,15 @@ TScanCounters::TScanCounters(const TString& module) { PortionBytes = subGroup->GetCounter(module + "/PortionBytes", true); FilterBytes = subGroup->GetCounter(module + "/FilterBytes", true); PostFilterBytes = subGroup->GetCounter(module + "/PostFilterBytes", true); + PostFilterPortionsCount = subGroup->GetCounter(module + "/PostFilterPortionsCount", true); + FilterOnlyPortionsCount = subGroup->GetCounter(module + "/FilterOnlyPortionsCount", true); + FilterOnlyPortionsBytes = subGroup->GetCounter(module + "/FilterOnlyPortionsBytes", true); + EmptyFilterPortionsCount = subGroup->GetCounter(module + "/EmptyFilterPortionsCount", true); + EmptyFilterPortionsBytes = subGroup->GetCounter(module + "/EmptyFilterPortionsBytes", true); + FilteredRowsCount = subGroup->GetCounter(module + "/FilteredRowsCount", true); + UsefulFilterBytes = subGroup->GetCounter(module + "/UsefulFilterBytes", true); + UsefulPostFilterBytes = subGroup->GetCounter(module + "/UsefulPostFilterBytes", true); + OriginalRowsCount = subGroup->GetCounter(module + "/OriginalRowsCount", true); } } diff --git a/ydb/core/tx/columnshard/counters.h b/ydb/core/tx/columnshard/counters.h index 357def8e37..1801a18767 100644 --- a/ydb/core/tx/columnshard/counters.h +++ b/ydb/core/tx/columnshard/counters.h @@ -9,6 +9,16 @@ private: YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, PortionBytes); YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, FilterBytes); YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, PostFilterBytes); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, PostFilterPortionsCount); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, FilterOnlyPortionsCount); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, FilterOnlyPortionsBytes); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, EmptyFilterPortionsCount); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, EmptyFilterPortionsBytes); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, FilteredRowsCount); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, UsefulFilterBytes); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, UsefulPostFilterBytes); + YDB_READONLY_DEF(NMonitoring::TDynamicCounters::TCounterPtr, OriginalRowsCount); + public: TScanCounters(const TString& module = "Scan"); }; diff --git a/ydb/core/tx/columnshard/engines/indexed_read_data.h b/ydb/core/tx/columnshard/engines/indexed_read_data.h index cccbe5801d..172608cf2e 100644 --- a/ydb/core/tx/columnshard/engines/indexed_read_data.h +++ b/ydb/core/tx/columnshard/engines/indexed_read_data.h @@ -228,7 +228,7 @@ private: std::set<ui32> EarlyFilterColumns; YDB_READONLY_DEF(std::set<ui32>, PostFilterColumns); bool AbortedFlag = false; - NColumnShard::TScanCounters Counters; + YDB_READONLY_DEF(NColumnShard::TScanCounters, Counters); std::vector<NIndexedReader::TBatch*> Batches; TFetchBlobsQueue& FetchBlobsQueue; friend class NIndexedReader::TBatch; diff --git a/ydb/core/tx/columnshard/engines/reader/batch.cpp b/ydb/core/tx/columnshard/engines/reader/batch.cpp index a51115669d..b8aa8a4336 100644 --- a/ydb/core/tx/columnshard/engines/reader/batch.cpp +++ b/ydb/core/tx/columnshard/engines/reader/batch.cpp @@ -63,6 +63,7 @@ void TBatch::Reset(const std::set<ui32>* columnIds) { Y_VERIFY(WaitIndexed.empty()); Y_VERIFY(Data.empty()); WaitingBytes = 0; + FetchedBytes = 0; for (const NOlap::TColumnRecord& rec : PortionInfo->Records) { if (CurrentColumnIds && !CurrentColumnIds->contains(rec.ColumnId)) { continue; @@ -95,6 +96,7 @@ bool TBatch::AddIndexedReady(const TBlobRange& bRange, const TString& blobData) return false; } WaitingBytes -= bRange.Size; + FetchedBytes += bRange.Size; Data.emplace(bRange, blobData); return true; } diff --git a/ydb/core/tx/columnshard/engines/reader/batch.h b/ydb/core/tx/columnshard/engines/reader/batch.h index 7e30a7f26f..475ec5323d 100644 --- a/ydb/core/tx/columnshard/engines/reader/batch.h +++ b/ydb/core/tx/columnshard/engines/reader/batch.h @@ -23,6 +23,7 @@ private: YDB_READONLY(ui64, Portion, 0); YDB_READONLY(ui64, Granule, 0); YDB_READONLY(ui64, WaitingBytes, 0); + YDB_READONLY(ui64, FetchedBytes, 0); THashSet<TBlobRange> WaitIndexed; YDB_READONLY_DEF(std::shared_ptr<arrow::RecordBatch>, FilteredBatch); diff --git a/ydb/core/tx/columnshard/engines/reader/filter_assembler.cpp b/ydb/core/tx/columnshard/engines/reader/filter_assembler.cpp index 76448d7563..f11ddbe863 100644 --- a/ydb/core/tx/columnshard/engines/reader/filter_assembler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/filter_assembler.cpp @@ -10,10 +10,10 @@ bool TAssembleFilter::DoExecuteImpl() { auto batch = BatchConstructor.Assemble(); Y_VERIFY(batch); Y_VERIFY(batch->num_rows()); - const ui32 originalCount = batch->num_rows(); + OriginalCount = batch->num_rows(); Filter = std::make_shared<NArrow::TColumnFilter>(NOlap::FilterPortion(batch, *ReadMetadata)); if (!Filter->Apply(batch)) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_data")("original_count", originalCount); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_data")("original_count", OriginalCount); FilteredBatch = nullptr; return true; } @@ -22,7 +22,7 @@ bool TAssembleFilter::DoExecuteImpl() { auto filter = NOlap::EarlyFilter(batch, ReadMetadata->Program); Filter->CombineSequential(filter); if (!filter.Apply(batch)) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_data")("original_count", originalCount); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_data")("original_count", OriginalCount); FilteredBatch = nullptr; return true; } @@ -30,7 +30,7 @@ bool TAssembleFilter::DoExecuteImpl() { #else Y_UNUSED(AllowEarlyFilter); #endif - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "not_skip_data")("original_count", originalCount)("filtered_count", batch->num_rows()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "not_skip_data")("original_count", OriginalCount)("filtered_count", batch->num_rows()); FilteredBatch = batch; return true; @@ -38,17 +38,31 @@ bool TAssembleFilter::DoExecuteImpl() { bool TAssembleFilter::DoApply(TIndexedReadData& owner) const { TBatch& batch = owner.GetBatchInfo(BatchNo); + Y_VERIFY(OriginalCount); + owner.GetCounters().GetOriginalRowsCount()->Add(OriginalCount); batch.InitFilter(Filter, FilteredBatch); - if (batch.AskedColumnsAlready(owner.GetPostFilterColumns()) || !FilteredBatch || FilteredBatch->num_rows() == 0) { + if (!FilteredBatch || FilteredBatch->num_rows() == 0) { + owner.GetCounters().GetEmptyFilterPortionsCount()->Add(1); + owner.GetCounters().GetEmptyFilterPortionsBytes()->Add(batch.GetFetchedBytes()); batch.InitBatch(FilteredBatch); } else { - batch.Reset(&owner.GetPostFilterColumns()); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "additional_data") - ("filtered_count", FilteredBatch->num_rows()) - ("blobs_count", batch.GetWaitingBlobs().size()) - ("columns_count", batch.GetCurrentColumnIds()->size()) - ("fetch_size", batch.GetWaitingBytes()) - ; + owner.GetCounters().GetFilteredRowsCount()->Add(FilteredBatch->num_rows()); + owner.GetCounters().GetUsefulFilterBytes()->Add(batch.GetFetchedBytes() * FilteredBatch->num_rows() / OriginalCount); + if (batch.AskedColumnsAlready(owner.GetPostFilterColumns())) { + owner.GetCounters().GetFilterOnlyPortionsCount()->Add(1); + owner.GetCounters().GetFilterOnlyPortionsBytes()->Add(batch.GetFetchedBytes()); + batch.InitBatch(FilteredBatch); + } else { + owner.GetCounters().GetPostFilterPortionsCount()->Add(1); + batch.Reset(&owner.GetPostFilterColumns()); + owner.GetCounters().GetUsefulPostFilterBytes()->Add(batch.GetWaitingBytes() * FilteredBatch->num_rows() / OriginalCount); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "additional_data") + ("filtered_count", FilteredBatch->num_rows()) + ("blobs_count", batch.GetWaitingBlobs().size()) + ("columns_count", batch.GetCurrentColumnIds()->size()) + ("fetch_size", batch.GetWaitingBytes()) + ; + } } return true; } diff --git a/ydb/core/tx/columnshard/engines/reader/filter_assembler.h b/ydb/core/tx/columnshard/engines/reader/filter_assembler.h index 7f6b3f7c9c..7f1e5f38e7 100644 --- a/ydb/core/tx/columnshard/engines/reader/filter_assembler.h +++ b/ydb/core/tx/columnshard/engines/reader/filter_assembler.h @@ -17,6 +17,7 @@ namespace NKikimr::NOlap::NIndexedReader { NOlap::TReadMetadata::TConstPtr ReadMetadata; std::shared_ptr<NArrow::TColumnFilter> Filter; const ui32 BatchNo; + ui32 OriginalCount = 0; bool AllowEarlyFilter = false; protected: virtual bool DoApply(TIndexedReadData& owner) const override; |