diff options
author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/stream/tokenizer_ut.cpp |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/stream/tokenizer_ut.cpp')
-rw-r--r-- | util/stream/tokenizer_ut.cpp | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/util/stream/tokenizer_ut.cpp b/util/stream/tokenizer_ut.cpp new file mode 100644 index 00000000000..afc566da86e --- /dev/null +++ b/util/stream/tokenizer_ut.cpp @@ -0,0 +1,264 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/array_size.h> +#include <util/generic/strbuf.h> + +#include "mem.h" +#include "null.h" +#include "tokenizer.h" + +static inline void CheckIfNullTerminated(const TStringBuf str) { + UNIT_ASSERT_VALUES_EQUAL('\0', *(str.data() + str.size())); +} + +Y_UNIT_TEST_SUITE(TStreamTokenizerTests) { + Y_UNIT_TEST(EmptyStreamTest) { + auto&& input = TNullInput{}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(0, tokensCount); + } + + Y_UNIT_TEST(EmptyTokensTest) { + const char data[] = "\n\n"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); + UNIT_ASSERT_VALUES_EQUAL(0, it->Length()); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(2, tokensCount); + } + + Y_UNIT_TEST(LastTokenendDoesntSatisfyPredicateTest) { + const char data[] = "abc\ndef\nxxxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + UNIT_ASSERT(tokensCount < tokensSize); + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(FirstTokenIsEmptyTest) { + const char data[] = "\ndef\nxxxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf(), TStringBuf("def"), TStringBuf("xxxxxx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + UNIT_ASSERT(tokensCount < tokensSize); + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(PredicateDoesntMatch) { + const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(data, token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); + } + + Y_UNIT_TEST(SimpleTest) { + const char data[] = "qwerty\n1234567890\n"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("qwerty"), TStringBuf("1234567890")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + UNIT_ASSERT(tokensCount < tokensSize); + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(CustomPredicateTest) { + struct TIsVerticalBar { + inline bool operator()(const char ch) const noexcept { + return '|' == ch; + } + }; + + const char data[] = "abc|def|xxxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + UNIT_ASSERT(tokensCount < tokensSize); + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(CustomPredicateSecondTest) { + struct TIsVerticalBar { + inline bool operator()(const char ch) const noexcept { + return '|' == ch || ',' == ch; + } + }; + + const char data[] = "abc|def|xxxxxx,abc|def|xxxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx"), + TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + UNIT_ASSERT(tokensCount < tokensSize); + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(FalsePredicateTest) { + struct TAlwaysFalse { + inline bool operator()(const char) const noexcept { + return false; + } + }; + + const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TAlwaysFalse>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(data, token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); + } + + Y_UNIT_TEST(TruePredicateTest) { + struct TAlwaysTrue { + inline bool operator()(const char) const noexcept { + return true; + } + }; + + const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TAlwaysTrue>{&input}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); + UNIT_ASSERT_VALUES_EQUAL(0, it->Length()); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(dataSize, tokensCount); + } + + Y_UNIT_TEST(FirstTokenHasSizeOfTheBufferTest) { + const char data[] = "xxxxx\nxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, tokens[0].size()}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(OnlyTokenHasSizeOfTheBufferTest) { + const char data[] = "xxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, dataSize}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(data, token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); + } + + Y_UNIT_TEST(BufferSizeInitialSizeSmallerThanTokenTest) { + const char data[] = "xxxxx\nxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, 1}; + auto tokensCount = size_t{}; + for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { + const auto token = TStringBuf{it->Data(), it->Length()}; + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } + + Y_UNIT_TEST(RangeBasedForTest) { + const char data[] = "abc\ndef\nxxxxxx"; + const auto dataSize = Y_ARRAY_SIZE(data) - 1; + const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; + const auto tokensSize = Y_ARRAY_SIZE(tokens); + auto&& input = TMemoryInput{data, dataSize}; + auto&& tokenizer = TStreamTokenizer<TEol>{&input}; + auto tokensCount = size_t{}; + for (const auto& token : tokenizer) { + UNIT_ASSERT(tokensCount < tokensSize); + CheckIfNullTerminated(token); + UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); + ++tokensCount; + } + UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); + } +} |