diff options
author | kardymon-d <kardymon-d@yandex-team.com> | 2023-10-30 17:18:30 +0300 |
---|---|---|
committer | kardymon-d <kardymon-d@yandex-team.com> | 2023-10-30 17:59:52 +0300 |
commit | 0c63c0cb52bb1b85bb80f72a5e09df49e3243a4d (patch) | |
tree | 5764cbd76416935f74e3ba3740a16bcb4c7f3bf6 | |
parent | d16d15ba3b08594fd1066d200c2d10b10cba1a1a (diff) | |
download | ydb-0c63c0cb52bb1b85bb80f72a5e09df49e3243a4d.tar.gz |
Добавление аллокатора в match_recognize
Add MKQLAllocator
6 files changed, 29 insertions, 25 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index af82da13d8f..b4f519c2381 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -17,7 +17,7 @@ namespace NKikimr::NMiniKQL { namespace NMatchRecognize { enum class EOutputColumnSource {PartitionKey, Measure}; -using TOutputColumnOrder = TVector<std::pair<EOutputColumnSource, size_t>>; +using TOutputColumnOrder = std::vector<std::pair<EOutputColumnSource, size_t>, TMKQLAllocator<std::pair<EOutputColumnSource, size_t>>>; using namespace NYql::NMatchRecognize; @@ -129,7 +129,7 @@ private: const TContainerCacheOnContext& Cache; TSimpleList Rows; TMatchedVars CurMatchedVars; - std::deque<TMatchedVars> Matches; + std::deque<TMatchedVars, TMKQLAllocator<TMatchedVars>> Matches; ui64 MatchNumber; }; @@ -292,7 +292,8 @@ private: class TStateForInterleavedPartitions : public TComputationValue<TStateForInterleavedPartitions> { - using TPartitionMap = std::unordered_map<TString, std::unique_ptr<TStreamingMatchRecognize>>; + using TPartitionMapValue = std::unique_ptr<TStreamingMatchRecognize>; + using TPartitionMap = std::unordered_map<TString, TPartitionMapValue, std::hash<TString>, std:: equal_to<TString>, TMKQLAllocator<std::pair<const TString, TPartitionMapValue>>>; public: TStateForInterleavedPartitions( TMemoryUsageInfo* memInfo, @@ -363,7 +364,7 @@ private: private: TPartitionMap Partitions; - std::stack<TPartitionMap::iterator> HasReadyOutput; + std::stack<TPartitionMap::iterator, std::deque<TPartitionMap::iterator, TMKQLAllocator<TPartitionMap::iterator>>> HasReadyOutput; bool Terminating = false; IComputationExternalNode* InputRowArg; @@ -449,7 +450,8 @@ private: }; TOutputColumnOrder GetOutputColumnOrder(TRuntimeNode partitionKyeColumnsIndexes, TRuntimeNode measureColumnsIndexes) { - std::unordered_map<size_t, std::pair<EOutputColumnSource, size_t>> temp; + using tempMapValue = std::pair<EOutputColumnSource, size_t>; + std::unordered_map<size_t, tempMapValue, std::hash<size_t>, std::equal_to<size_t>, TMKQLAllocator<std::pair<const size_t, tempMapValue>, EMemorySubPool::Temporary>> temp; { auto list = AS_VALUE(TListLiteral, partitionKyeColumnsIndexes); for (ui32 i = 0; i != list->GetItemsCount(); ++i) { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h index 31d7cf5cd70..3a7dd959499 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h @@ -7,7 +7,7 @@ namespace NKikimr::NMiniKQL::NMatchRecognize { template<class R> -using TMatchedVar = std::vector<R>; +using TMatchedVar = std::vector<R, TMKQLAllocator<R>>; template<class R> void Extend(TMatchedVar<R>& var, const R& r) { if (var.empty()) { @@ -23,7 +23,7 @@ void Extend(TMatchedVar<R>& var, const R& r) { } template<class R> -using TMatchedVars = std::vector<TMatchedVar<R>>; +using TMatchedVars = std::vector<TMatchedVar<R>, TMKQLAllocator<TMatchedVar<R>>>; template<class R> NUdf::TUnboxedValue ToValue(const THolderFactory& holderFactory, const R& range) { @@ -58,7 +58,7 @@ class TMatchedVarsValue : public TComputationValue<TMatchedVarsValue<R>> { class TRangeList: public TComputationValue<TRangeList> { class TIterator : public TComputationValue<TIterator> { public: - TIterator(TMemoryUsageInfo* memInfo, const THolderFactory& holderFactory, const std::vector<R>& ranges) + TIterator(TMemoryUsageInfo* memInfo, const THolderFactory& holderFactory, const std::vector<R, TMKQLAllocator<R>>& ranges) : TComputationValue<TIterator>(memInfo) , HolderFactory(holderFactory) , Ranges(ranges) @@ -74,7 +74,7 @@ class TMatchedVarsValue : public TComputationValue<TMatchedVarsValue<R>> { return true; } const THolderFactory& HolderFactory; - const std::vector<R>& Ranges; + const std::vector<R, TMKQLAllocator<R>>& Ranges; size_t Index; }; @@ -106,7 +106,7 @@ class TMatchedVarsValue : public TComputationValue<TMatchedVarsValue<R>> { const TMatchedVar<R>& Var; }; public: - TMatchedVarsValue(TMemoryUsageInfo* memInfo, const THolderFactory& holderFactory, const std::vector<TMatchedVar<R>>& vars) + TMatchedVarsValue(TMemoryUsageInfo* memInfo, const THolderFactory& holderFactory, const std::vector<TMatchedVar<R>, TMKQLAllocator<TMatchedVar<R>>>& vars) : TComputationValue<TMatchedVarsValue>(memInfo) , HolderFactory(holderFactory) , Vars(vars) @@ -118,7 +118,7 @@ public: } private: const THolderFactory& HolderFactory; - const std::vector<TMatchedVar<R>>& Vars; + const std::vector<TMatchedVar<R>, TMKQLAllocator<TMatchedVar<R>>>& Vars; }; }//namespace NKikimr::NMiniKQL::NMatchRecognize diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_measure_arg.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_measure_arg.h index 54d328075d5..38ed16e21ee 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_measure_arg.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_measure_arg.h @@ -9,7 +9,7 @@ namespace NKikimr::NMiniKQL::NMatchRecognize { using NYql::NMatchRecognize::EMeasureInputDataSpecialColumns; -using TMeasureInputColumnOrder = TVector<std::pair<EMeasureInputDataSpecialColumns, size_t>>; +using TMeasureInputColumnOrder = std::vector<std::pair<EMeasureInputDataSpecialColumns, size_t>, TMKQLAllocator<std::pair<EMeasureInputDataSpecialColumns, size_t>>>; //Input row augmented with lightweight special columns for calculating MEASURE lambdas class TRowForMeasureValue: public TComputationValue<TRowForMeasureValue> diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h index 1de9d347648..ac899507294 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h @@ -13,7 +13,7 @@ using namespace NYql::NMatchRecognize; struct TVoidTransition{}; using TEpsilonTransition = size_t; //to -using TEpsilonTransitions = std::vector<TEpsilonTransition>; +using TEpsilonTransitions = std::vector<TEpsilonTransition, TMKQLAllocator<TEpsilonTransition>>; using TMatchedVarTransition = std::pair<ui32, size_t>; //{varIndex, to} using TQuantityEnterTransition = size_t; //to using TQuantityExitTransition = std::pair<std::pair<ui64, ui64>, std::pair<size_t, size_t>>; //{{min, max}, {foFindMore, toMatched}} @@ -26,7 +26,7 @@ using TNfaTransition = std::variant< >; struct TNfaTransitionGraph { - std::vector<TNfaTransition> Transitions; + std::vector<TNfaTransition, TMKQLAllocator<TNfaTransition>> Transitions; size_t Input; size_t Output; @@ -48,10 +48,10 @@ private: return Graph->Transitions.size() - 1; } - TNfaItem BuildTerms(const std::vector<TRowPatternTerm>& terms, const THashMap<TString, size_t>& varNameToIndex) { + TNfaItem BuildTerms(const TVector<TRowPatternTerm>& terms, const THashMap<TString, size_t>& varNameToIndex) { auto input = AddNode(); auto output = AddNode(); - std::vector<TEpsilonTransition> fromInput; + std::vector<TEpsilonTransition, TMKQLAllocator<TEpsilonTransition>> fromInput; for (const auto& t: terms) { auto a = BuildTerm(t, varNameToIndex); fromInput.push_back(a.Input); @@ -63,7 +63,7 @@ private: TNfaItem BuildTerm(const TRowPatternTerm& term, const THashMap<TString, size_t>& varNameToIndex) { auto input = AddNode(); auto output = AddNode(); - std::vector<TNfaItem> automata; + std::vector<TNfaItem, TMKQLAllocator<TNfaItem>> automata; for (const auto& f: term) { automata.push_back(BuildFactor(f, varNameToIndex)); } @@ -124,13 +124,13 @@ class TNfa { using TRange = TSparseList::TRange; using TMatchedVars = TMatchedVars<TRange>; struct TState { - TState(size_t index, const TMatchedVars& vars, std::stack<ui64>&& quantifiers) + TState(size_t index, const TMatchedVars& vars, std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>&& quantifiers) : Index(index) , Vars(vars) , Quantifiers(quantifiers) {} const size_t Index; TMatchedVars Vars; - std::stack<ui64> Quantifiers; //get rid of this + std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>> Quantifiers; //get rid of this friend inline bool operator<(const TState& lhs, const TState& rhs) { return std::tie(lhs.Index, lhs.Quantifiers, lhs.Vars) < std::tie(rhs.Index, rhs.Quantifiers, rhs.Vars); @@ -147,10 +147,10 @@ public: } void ProcessRow(TSparseList::TRange&& currentRowLock, TComputationContext& ctx) { - ActiveStates.emplace(TransitionGraph->Input, TMatchedVars(Defines.size()), std::stack<ui64>{}); + ActiveStates.emplace(TransitionGraph->Input, TMatchedVars(Defines.size()), std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>{}); MakeEpsilonTransitions(); - std::set<TState> newStates; - std::set<TState> deletedStates; + std::set<TState, std::less<TState>, TMKQLAllocator<TState>> newStates; + std::set<TState, std::less<TState>, TMKQLAllocator<TState>> deletedStates; for (const auto& s: ActiveStates) { //Here we handle only transitions of TMatchedVarTransition type, //all other transitions are handled in MakeEpsilonTransitions @@ -162,7 +162,7 @@ public: auto vars = s.Vars; //TODO get rid of this copy auto& matchedVar = vars[varIndex]; Extend(matchedVar, currentRowLock); - newStates.emplace(matchedVarTransition->second, std::move(vars), std::stack<ui64>(s.Quantifiers)); + newStates.emplace(matchedVarTransition->second, std::move(vars), std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>(s.Quantifiers)); } deletedStates.insert(s); } @@ -200,7 +200,7 @@ public: private: //TODO (zverevgeny): Consider to change to std::vector for the sake of perf - using TStateSet = std::set<TState>; + using TStateSet = std::set<TState, std::less<TState>, TMKQLAllocator<TState>>; struct TTransitionVisitor { TTransitionVisitor(const TState& state, TStateSet& newStates, TStateSet& deletedStates) : State(state) @@ -216,7 +216,7 @@ private: } void operator()(const TEpsilonTransitions& epsilonTransitions) { for (const auto& i: epsilonTransitions) { - NewStates.emplace(i, TMatchedVars(State.Vars), std::stack<ui64>(State.Quantifiers)); + NewStates.emplace(i, TMatchedVars(State.Vars), std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>(State.Quantifiers)); } DeletedStates.insert(State); } diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp index 92716e5faa1..d3cb38992e6 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp @@ -8,6 +8,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeMatchedVarExtend) { using TRange = TSimpleList::TRange; using TMatchedVar = TMatchedVar<TRange>; using TMatchedVars = TMatchedVars<TRange>; + TScopedAlloc alloc(__LOCATION__); Y_UNIT_TEST(MatchedRangeSingleton) { TRange r{10}; diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp index 7b541599dff..7c7d94714ae 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp @@ -97,6 +97,7 @@ struct TNfaSetup { Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(OutputStateHasNoOutputEdges) { + TScopedAlloc alloc(__LOCATION__); const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false}}}; const auto transitionGraph = TNfaTransitionGraphBuilder::Create(pattern, {{"A", 0}}); const auto& output = transitionGraph->Transitions.at(transitionGraph->Output); |