diff options
author | PashaSmirnov <[email protected]> | 2022-02-10 16:50:38 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:50:38 +0300 |
commit | fdec3824b69496e472972311be8d8157a8bb99db (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers/comptrie/comptrie_ut.cpp | |
parent | e440fc473cc191fe94becca370781e148709007a (diff) |
Restoring authorship annotation for PashaSmirnov <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/comptrie_ut.cpp')
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_ut.cpp | 460 |
1 files changed, 230 insertions, 230 deletions
diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 5b7b33e8c43..74bee09b5d6 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -15,7 +15,7 @@ #include <util/folder/dirut.h> #include <util/random/random.h> -#include <util/random/fast.h> +#include <util/random/fast.h> #include <util/string/hex.h> #include <util/string/cast.h> @@ -24,7 +24,7 @@ #include "set.h" #include "first_symbol_iterator.h" #include "search_iterator.h" -#include "pattern_searcher.h" +#include "pattern_searcher.h" #include <array> #include <iterator> @@ -109,10 +109,10 @@ private: UNIT_TEST(TestBuilderFindLongestPrefix); UNIT_TEST(TestBuilderFindLongestPrefixWithEmptyValue); - UNIT_TEST(TestPatternSearcherEmpty); - UNIT_TEST(TestPatternSearcherSimple); - UNIT_TEST(TestPatternSearcherRandom); - + UNIT_TEST(TestPatternSearcherEmpty); + UNIT_TEST(TestPatternSearcherSimple); + UNIT_TEST(TestPatternSearcherRandom); + UNIT_TEST_SUITE_END(); static const char* SampleData[]; @@ -246,21 +246,21 @@ public: void TestBuilderFindLongestPrefix(); void TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey); void TestBuilderFindLongestPrefixWithEmptyValue(); - - void TestPatternSearcherOnDataset( - const TVector<TString>& patterns, - const TVector<TString>& samples - ); - void TestPatternSearcherEmpty(); - void TestPatternSearcherSimple(); - void TestPatternSearcherRandom( - size_t patternsNum, - size_t patternMaxLength, - size_t strMaxLength, - int maxChar, - TFastRng<ui64>& rng - ); - void TestPatternSearcherRandom(); + + void TestPatternSearcherOnDataset( + const TVector<TString>& patterns, + const TVector<TString>& samples + ); + void TestPatternSearcherEmpty(); + void TestPatternSearcherSimple(); + void TestPatternSearcherRandom( + size_t patternsNum, + size_t patternMaxLength, + size_t strMaxLength, + int maxChar, + TFastRng<ui64>& rng + ); + void TestPatternSearcherRandom(); }; UNIT_TEST_SUITE_REGISTRATION(TCompactTrieTest); @@ -1580,212 +1580,212 @@ void TCompactTrieTest::TestBuilderFindLongestPrefixWithEmptyValue() { TBufferOutput buffer; builder.Save(buffer); } - -void TCompactTrieTest::TestPatternSearcherEmpty() { - TCompactPatternSearcherBuilder<char, ui32> builder; - - TBufferOutput searcherData; - builder.Save(searcherData); - - TCompactPatternSearcher<char, ui32> searcher( - searcherData.Buffer().Data(), - searcherData.Buffer().Size() - ); - - UNIT_ASSERT(searcher.SearchMatches("a").empty()); - UNIT_ASSERT(searcher.SearchMatches("").empty()); - UNIT_ASSERT(searcher.SearchMatches("abc").empty()); -} - -void TCompactTrieTest::TestPatternSearcherOnDataset( - const TVector<TString>& patterns, - const TVector<TString>& samples -) { - TCompactPatternSearcherBuilder<char, ui32> builder; - - for (size_t patternIdx = 0; patternIdx < patterns.size(); ++patternIdx) { - builder.Add(patterns[patternIdx], patternIdx); - } - - TBufferOutput searcherData; - builder.Save(searcherData); - - TCompactPatternSearcher<char, ui32> searcher( - searcherData.Buffer().Data(), - searcherData.Buffer().Size() - ); - - for (const auto& sample : samples) { - const auto matches = searcher.SearchMatches(sample); - - size_t matchesNum = 0; - THashSet<TString> processedPatterns; - for (const auto& pattern : patterns) { - if (pattern.Empty() || processedPatterns.contains(pattern)) { - continue; - } - for (size_t start = 0; start + pattern.Size() <= sample.Size(); ++start) { - matchesNum += (pattern == sample.substr(start, pattern.Size())); - } - processedPatterns.insert(pattern); - } - UNIT_ASSERT_VALUES_EQUAL(matchesNum, matches.size()); - - - TSet<std::pair<size_t, ui32>> foundMatches; - for (const auto& match : matches) { - std::pair<size_t, ui32> matchParams(match.End, match.Data); - UNIT_ASSERT(!foundMatches.contains(matchParams)); - foundMatches.insert(matchParams); - - const auto& pattern = patterns[match.Data]; - UNIT_ASSERT_VALUES_EQUAL( - sample.substr(match.End - pattern.size() + 1, pattern.size()), - pattern - ); - } - } -} - -void TCompactTrieTest::TestPatternSearcherSimple() { - TestPatternSearcherOnDataset( - { // patterns - "abcd", - "abc", - "ab", - "a", - "" - }, - { // samples - "abcde", - "abcd", - "abc", - "ab", - "a", - "" - } - ); - TestPatternSearcherOnDataset( - { // patterns - "a" - "ab", - "abcd", - }, - { // samples - "abcde", - "abcd", - "abc", - "ab", - "a", - "" - } - ); - TestPatternSearcherOnDataset( - { // patterns - "aaaa", - "aaa", - "aa", - "a", - }, - { // samples - "aaaaaaaaaaaa" - } - ); - TestPatternSearcherOnDataset( - { // patterns - "aa", "ab", "ac", "ad", "ae", "af", - "ba", "bb", "bc", "bd", "be", "bf", - "ca", "cb", "cc", "cd", "ce", "cf", - "da", "db", "dc", "dd", "de", "df", - "ea", "eb", "ec", "ed", "ee", "ef", - "fa", "fb", "fc", "fd", "fe", "ff" - }, - { // samples - "dcabafeebfdcbacddacadbaabecdbaeffecdbfabcdcabcfaefecdfebacfedacefbdcacfeb", - "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefancdefancdef", - "fedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcba", - "", - "a", "b", "c", "d", "e", "f", - "aa", "ab", "ac", "ad", "ae", "af", - "ba", "bb", "bc", "bd", "be", "bf", - "ca", "cb", "cc", "cd", "ce", "cf", - "da", "db", "dc", "dd", "de", "df", - "ea", "eb", "ec", "ed", "ee", "ef", - "fa", "fb", "fc", "fd", "fe", "ff" - } - ); -} - -static char RandChar( - TFastRng<ui64>& rng, - int maxChar -) { - return static_cast<char>(rng.GenRand() % (maxChar + 1)); -} - -static TString RandStr( - TFastRng<ui64>& rng, - size_t maxLength, - int maxChar, - bool nonEmpty = false -) { - Y_ASSERT(maxLength > 0); - - size_t length; - if (nonEmpty) { - length = rng.GenRand() % maxLength + 1; - } else { - length = rng.GenRand() % (maxLength + 1); - } - - TString result; - while (result.size() < length) { - result += RandChar(rng, maxChar); - } - - return result; -} - -void TCompactTrieTest::TestPatternSearcherRandom( - size_t patternsNum, - size_t patternMaxLength, - size_t strMaxLength, - int maxChar, - TFastRng<ui64>& rng -) { - auto patternToSearch = RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true); - - TVector<TString> patterns = {patternToSearch}; - while (patterns.size() < patternsNum) { - patterns.push_back(RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true)); - } - - auto filler = RandStr(rng, strMaxLength - patternToSearch.Size() + 1, maxChar); - size_t leftFillerSize = rng.GenRand() % (filler.size() + 1); - auto leftFiller = filler.substr(0, leftFillerSize); - auto rightFiller = filler.substr(leftFillerSize, filler.size() - leftFillerSize); - auto sample = leftFiller + patternToSearch + rightFiller; - - TestPatternSearcherOnDataset(patterns, {sample}); -} - -void TCompactTrieTest::TestPatternSearcherRandom() { - TFastRng<ui64> rng(0); - for (size_t patternMaxLen : {1, 2, 10}) { - for (size_t strMaxLen : TVector<size_t>{patternMaxLen, 2 * patternMaxLen, 10}) { - for (int maxChar : {0, 1, 5, 255}) { - for (size_t patternsNum : {1, 10}) { - for (size_t testIdx = 0; testIdx < 3; ++testIdx) { - TestPatternSearcherRandom( - patternsNum, - patternMaxLen, - strMaxLen, - maxChar, - rng - ); - } - } - } - } - } -} + +void TCompactTrieTest::TestPatternSearcherEmpty() { + TCompactPatternSearcherBuilder<char, ui32> builder; + + TBufferOutput searcherData; + builder.Save(searcherData); + + TCompactPatternSearcher<char, ui32> searcher( + searcherData.Buffer().Data(), + searcherData.Buffer().Size() + ); + + UNIT_ASSERT(searcher.SearchMatches("a").empty()); + UNIT_ASSERT(searcher.SearchMatches("").empty()); + UNIT_ASSERT(searcher.SearchMatches("abc").empty()); +} + +void TCompactTrieTest::TestPatternSearcherOnDataset( + const TVector<TString>& patterns, + const TVector<TString>& samples +) { + TCompactPatternSearcherBuilder<char, ui32> builder; + + for (size_t patternIdx = 0; patternIdx < patterns.size(); ++patternIdx) { + builder.Add(patterns[patternIdx], patternIdx); + } + + TBufferOutput searcherData; + builder.Save(searcherData); + + TCompactPatternSearcher<char, ui32> searcher( + searcherData.Buffer().Data(), + searcherData.Buffer().Size() + ); + + for (const auto& sample : samples) { + const auto matches = searcher.SearchMatches(sample); + + size_t matchesNum = 0; + THashSet<TString> processedPatterns; + for (const auto& pattern : patterns) { + if (pattern.Empty() || processedPatterns.contains(pattern)) { + continue; + } + for (size_t start = 0; start + pattern.Size() <= sample.Size(); ++start) { + matchesNum += (pattern == sample.substr(start, pattern.Size())); + } + processedPatterns.insert(pattern); + } + UNIT_ASSERT_VALUES_EQUAL(matchesNum, matches.size()); + + + TSet<std::pair<size_t, ui32>> foundMatches; + for (const auto& match : matches) { + std::pair<size_t, ui32> matchParams(match.End, match.Data); + UNIT_ASSERT(!foundMatches.contains(matchParams)); + foundMatches.insert(matchParams); + + const auto& pattern = patterns[match.Data]; + UNIT_ASSERT_VALUES_EQUAL( + sample.substr(match.End - pattern.size() + 1, pattern.size()), + pattern + ); + } + } +} + +void TCompactTrieTest::TestPatternSearcherSimple() { + TestPatternSearcherOnDataset( + { // patterns + "abcd", + "abc", + "ab", + "a", + "" + }, + { // samples + "abcde", + "abcd", + "abc", + "ab", + "a", + "" + } + ); + TestPatternSearcherOnDataset( + { // patterns + "a" + "ab", + "abcd", + }, + { // samples + "abcde", + "abcd", + "abc", + "ab", + "a", + "" + } + ); + TestPatternSearcherOnDataset( + { // patterns + "aaaa", + "aaa", + "aa", + "a", + }, + { // samples + "aaaaaaaaaaaa" + } + ); + TestPatternSearcherOnDataset( + { // patterns + "aa", "ab", "ac", "ad", "ae", "af", + "ba", "bb", "bc", "bd", "be", "bf", + "ca", "cb", "cc", "cd", "ce", "cf", + "da", "db", "dc", "dd", "de", "df", + "ea", "eb", "ec", "ed", "ee", "ef", + "fa", "fb", "fc", "fd", "fe", "ff" + }, + { // samples + "dcabafeebfdcbacddacadbaabecdbaeffecdbfabcdcabcfaefecdfebacfedacefbdcacfeb", + "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefancdefancdef", + "fedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcba", + "", + "a", "b", "c", "d", "e", "f", + "aa", "ab", "ac", "ad", "ae", "af", + "ba", "bb", "bc", "bd", "be", "bf", + "ca", "cb", "cc", "cd", "ce", "cf", + "da", "db", "dc", "dd", "de", "df", + "ea", "eb", "ec", "ed", "ee", "ef", + "fa", "fb", "fc", "fd", "fe", "ff" + } + ); +} + +static char RandChar( + TFastRng<ui64>& rng, + int maxChar +) { + return static_cast<char>(rng.GenRand() % (maxChar + 1)); +} + +static TString RandStr( + TFastRng<ui64>& rng, + size_t maxLength, + int maxChar, + bool nonEmpty = false +) { + Y_ASSERT(maxLength > 0); + + size_t length; + if (nonEmpty) { + length = rng.GenRand() % maxLength + 1; + } else { + length = rng.GenRand() % (maxLength + 1); + } + + TString result; + while (result.size() < length) { + result += RandChar(rng, maxChar); + } + + return result; +} + +void TCompactTrieTest::TestPatternSearcherRandom( + size_t patternsNum, + size_t patternMaxLength, + size_t strMaxLength, + int maxChar, + TFastRng<ui64>& rng +) { + auto patternToSearch = RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true); + + TVector<TString> patterns = {patternToSearch}; + while (patterns.size() < patternsNum) { + patterns.push_back(RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true)); + } + + auto filler = RandStr(rng, strMaxLength - patternToSearch.Size() + 1, maxChar); + size_t leftFillerSize = rng.GenRand() % (filler.size() + 1); + auto leftFiller = filler.substr(0, leftFillerSize); + auto rightFiller = filler.substr(leftFillerSize, filler.size() - leftFillerSize); + auto sample = leftFiller + patternToSearch + rightFiller; + + TestPatternSearcherOnDataset(patterns, {sample}); +} + +void TCompactTrieTest::TestPatternSearcherRandom() { + TFastRng<ui64> rng(0); + for (size_t patternMaxLen : {1, 2, 10}) { + for (size_t strMaxLen : TVector<size_t>{patternMaxLen, 2 * patternMaxLen, 10}) { + for (int maxChar : {0, 1, 5, 255}) { + for (size_t patternsNum : {1, 10}) { + for (size_t testIdx = 0; testIdx < 3; ++testIdx) { + TestPatternSearcherRandom( + patternsNum, + patternMaxLen, + strMaxLen, + maxChar, + rng + ); + } + } + } + } + } +} |