diff options
author | alex-sh <alex-sh@yandex-team.ru> | 2022-02-10 16:50:03 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:03 +0300 |
commit | 3196904c9f5bf7aff7374eeadcb0671589581f61 (patch) | |
tree | d13114a178799aeb203a4b3b43dd7fb0c4f6975f /library/cpp/linear_regression/benchmark/pool.cpp | |
parent | d154d11651ea533127249184148c3f023e2c6d0a (diff) | |
download | ydb-3196904c9f5bf7aff7374eeadcb0671589581f61.tar.gz |
Restoring authorship annotation for <alex-sh@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/linear_regression/benchmark/pool.cpp')
-rw-r--r-- | library/cpp/linear_regression/benchmark/pool.cpp | 198 |
1 files changed, 99 insertions, 99 deletions
diff --git a/library/cpp/linear_regression/benchmark/pool.cpp b/library/cpp/linear_regression/benchmark/pool.cpp index 7f2c6a7004..2460b177ca 100644 --- a/library/cpp/linear_regression/benchmark/pool.cpp +++ b/library/cpp/linear_regression/benchmark/pool.cpp @@ -1,109 +1,109 @@ -#include "pool.h" - -#include <util/string/cast.h> -#include <util/stream/file.h> - +#include "pool.h" + +#include <util/string/cast.h> +#include <util/stream/file.h> + TInstance TInstance::FromFeaturesString(const TString& featuresString) { - TInstance instance; - - TStringBuf featuresStringBuf(featuresString); - - featuresStringBuf.NextTok('\t'); // query id - instance.Goal = FromString(featuresStringBuf.NextTok('\t')); - featuresStringBuf.NextTok('\t'); // url - instance.Weight = FromString(featuresStringBuf.NextTok('\t')); - - while (featuresStringBuf) { - instance.Features.push_back(FromString(featuresStringBuf.NextTok('\t'))); - } - - return instance; -} - -TPool::TCVIterator::TCVIterator(const TPool& parentPool, const size_t foldsCount, const EIteratorType iteratorType) - : ParentPool(parentPool) - , FoldsCount(foldsCount) - , IteratorType(iteratorType) - , InstanceFoldNumbers(ParentPool.size()) -{ -} - -void TPool::TCVIterator::ResetShuffle() { + TInstance instance; + + TStringBuf featuresStringBuf(featuresString); + + featuresStringBuf.NextTok('\t'); // query id + instance.Goal = FromString(featuresStringBuf.NextTok('\t')); + featuresStringBuf.NextTok('\t'); // url + instance.Weight = FromString(featuresStringBuf.NextTok('\t')); + + while (featuresStringBuf) { + instance.Features.push_back(FromString(featuresStringBuf.NextTok('\t'))); + } + + return instance; +} + +TPool::TCVIterator::TCVIterator(const TPool& parentPool, const size_t foldsCount, const EIteratorType iteratorType) + : ParentPool(parentPool) + , FoldsCount(foldsCount) + , IteratorType(iteratorType) + , InstanceFoldNumbers(ParentPool.size()) +{ +} + +void TPool::TCVIterator::ResetShuffle() { TVector<size_t> instanceNumbers(ParentPool.size()); - for (size_t instanceNumber = 0; instanceNumber < ParentPool.size(); ++instanceNumber) { - instanceNumbers[instanceNumber] = instanceNumber; - } - Shuffle(instanceNumbers.begin(), instanceNumbers.end(), RandomGenerator); - - for (size_t instancePosition = 0; instancePosition < ParentPool.size(); ++instancePosition) { - InstanceFoldNumbers[instanceNumbers[instancePosition]] = instancePosition % FoldsCount; - } - Current = InstanceFoldNumbers.begin(); -} - -void TPool::TCVIterator::SetTestFold(const size_t testFoldNumber) { - TestFoldNumber = testFoldNumber; - Current = InstanceFoldNumbers.begin(); - Advance(); -} - -bool TPool::TCVIterator::IsValid() const { - return Current != InstanceFoldNumbers.end(); -} - + for (size_t instanceNumber = 0; instanceNumber < ParentPool.size(); ++instanceNumber) { + instanceNumbers[instanceNumber] = instanceNumber; + } + Shuffle(instanceNumbers.begin(), instanceNumbers.end(), RandomGenerator); + + for (size_t instancePosition = 0; instancePosition < ParentPool.size(); ++instancePosition) { + InstanceFoldNumbers[instanceNumbers[instancePosition]] = instancePosition % FoldsCount; + } + Current = InstanceFoldNumbers.begin(); +} + +void TPool::TCVIterator::SetTestFold(const size_t testFoldNumber) { + TestFoldNumber = testFoldNumber; + Current = InstanceFoldNumbers.begin(); + Advance(); +} + +bool TPool::TCVIterator::IsValid() const { + return Current != InstanceFoldNumbers.end(); +} + const TInstance& TPool::TCVIterator::operator*() const { - return ParentPool[Current - InstanceFoldNumbers.begin()]; -} - -const TInstance* TPool::TCVIterator::operator->() const { - return &ParentPool[Current - InstanceFoldNumbers.begin()]; -} - -TPool::TCVIterator& TPool::TCVIterator::operator++() { - Advance(); - return *this; -} - -void TPool::TCVIterator::Advance() { - while (IsValid()) { - ++Current; - if (IsValid() && TakeCurrent()) { - break; - } - } -} - -bool TPool::TCVIterator::TakeCurrent() const { - switch (IteratorType) { + return ParentPool[Current - InstanceFoldNumbers.begin()]; +} + +const TInstance* TPool::TCVIterator::operator->() const { + return &ParentPool[Current - InstanceFoldNumbers.begin()]; +} + +TPool::TCVIterator& TPool::TCVIterator::operator++() { + Advance(); + return *this; +} + +void TPool::TCVIterator::Advance() { + while (IsValid()) { + ++Current; + if (IsValid() && TakeCurrent()) { + break; + } + } +} + +bool TPool::TCVIterator::TakeCurrent() const { + switch (IteratorType) { case LearnIterator: return *Current != TestFoldNumber; case TestIterator: return *Current == TestFoldNumber; - } - return false; -} - + } + return false; +} + void TPool::ReadFromFeatures(const TString& featuresPath) { TFileInput featuresIn(featuresPath); TString featuresString; - while (featuresIn.ReadLine(featuresString)) { - this->push_back(TInstance::FromFeaturesString(featuresString)); - } -} - -TPool::TCVIterator TPool::CrossValidationIterator(const size_t foldsCount, const EIteratorType iteratorType) const { - return TPool::TCVIterator(*this, foldsCount, iteratorType); -} - -TPool TPool::InjurePool(const double injureFactor, const double injureOffset) const { - TPool injuredPool(*this); - - for (TInstance& instance : injuredPool) { - for (double& feature : instance.Features) { - feature = feature * injureFactor + injureOffset; - } - instance.Goal = instance.Goal * injureFactor + injureOffset; - } - - return injuredPool; -} + while (featuresIn.ReadLine(featuresString)) { + this->push_back(TInstance::FromFeaturesString(featuresString)); + } +} + +TPool::TCVIterator TPool::CrossValidationIterator(const size_t foldsCount, const EIteratorType iteratorType) const { + return TPool::TCVIterator(*this, foldsCount, iteratorType); +} + +TPool TPool::InjurePool(const double injureFactor, const double injureOffset) const { + TPool injuredPool(*this); + + for (TInstance& instance : injuredPool) { + for (double& feature : instance.Features) { + feature = feature * injureFactor + injureOffset; + } + instance.Goal = instance.Goal * injureFactor + injureOffset; + } + + return injuredPool; +} |