#include <library/cpp/testing/unittest/registar.h>

#include <util/generic/array_size.h>
#include <util/generic/strbuf.h>

#include "mem.h"
#include "null.h"
#include "tokenizer.h"

static inline void CheckIfNullTerminated(const TStringBuf str) {
    UNIT_ASSERT_VALUES_EQUAL('\0', *(str.data() + str.size()));
}

Y_UNIT_TEST_SUITE(TStreamTokenizerTests) {
    Y_UNIT_TEST(EmptyStreamTest) {
        auto&& input = TNullInput{};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(0, tokensCount);
    }

    Y_UNIT_TEST(EmptyTokensTest) {
        const char data[] = "\n\n";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
            UNIT_ASSERT_VALUES_EQUAL(0, it->Length());
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(2, tokensCount);
    }

    Y_UNIT_TEST(LastTokenendDoesntSatisfyPredicateTest) {
        const char data[] = "abc\ndef\nxxxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            UNIT_ASSERT(tokensCount < tokensSize);
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(FirstTokenIsEmptyTest) {
        const char data[] = "\ndef\nxxxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf(), TStringBuf("def"), TStringBuf("xxxxxx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            UNIT_ASSERT(tokensCount < tokensSize);
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(PredicateDoesntMatch) {
        const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(data, token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
    }

    Y_UNIT_TEST(SimpleTest) {
        const char data[] = "qwerty\n1234567890\n";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("qwerty"), TStringBuf("1234567890")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            UNIT_ASSERT(tokensCount < tokensSize);
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(CustomPredicateTest) {
        struct TIsVerticalBar {
            inline bool operator()(const char ch) const noexcept {
                return '|' == ch;
            }
        };

        const char data[] = "abc|def|xxxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            UNIT_ASSERT(tokensCount < tokensSize);
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(CustomPredicateSecondTest) {
        struct TIsVerticalBar {
            inline bool operator()(const char ch) const noexcept {
                return '|' == ch || ',' == ch;
            }
        };

        const char data[] = "abc|def|xxxxxx,abc|def|xxxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx"),
                                     TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            UNIT_ASSERT(tokensCount < tokensSize);
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(FalsePredicateTest) {
        struct TAlwaysFalse {
            inline bool operator()(const char) const noexcept {
                return false;
            }
        };

        const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TAlwaysFalse>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(data, token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
    }

    Y_UNIT_TEST(TruePredicateTest) {
        struct TAlwaysTrue {
            inline bool operator()(const char) const noexcept {
                return true;
            }
        };

        const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TAlwaysTrue>{&input};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
            UNIT_ASSERT_VALUES_EQUAL(0, it->Length());
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(dataSize, tokensCount);
    }

    Y_UNIT_TEST(FirstTokenHasSizeOfTheBufferTest) {
        const char data[] = "xxxxx\nxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, tokens[0].size()};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(OnlyTokenHasSizeOfTheBufferTest) {
        const char data[] = "xxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, dataSize};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(data, token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
    }

    Y_UNIT_TEST(BufferSizeInitialSizeSmallerThanTokenTest) {
        const char data[] = "xxxxx\nxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, 1};
        auto tokensCount = size_t{};
        for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
            const auto token = TStringBuf{it->Data(), it->Length()};
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }

    Y_UNIT_TEST(RangeBasedForTest) {
        const char data[] = "abc\ndef\nxxxxxx";
        const auto dataSize = Y_ARRAY_SIZE(data) - 1;
        const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
        const auto tokensSize = Y_ARRAY_SIZE(tokens);
        auto&& input = TMemoryInput{data, dataSize};
        auto&& tokenizer = TStreamTokenizer<TEol>{&input};
        auto tokensCount = size_t{};
        for (const auto& token : tokenizer) {
            UNIT_ASSERT(tokensCount < tokensSize);
            CheckIfNullTerminated(token);
            UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
            ++tokensCount;
        }
        UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
    }
} // Y_UNIT_TEST_SUITE(TStreamTokenizerTests)