diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /util/stream/tokenizer.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'util/stream/tokenizer.h')
-rw-r--r-- | util/stream/tokenizer.h | 276 |
1 files changed, 138 insertions, 138 deletions
diff --git a/util/stream/tokenizer.h b/util/stream/tokenizer.h index b2398efdd1..a0ec9c3cb2 100644 --- a/util/stream/tokenizer.h +++ b/util/stream/tokenizer.h @@ -1,17 +1,17 @@ #pragma once - -#include "input.h" - + +#include "input.h" + #include <util/generic/buffer.h> #include <util/generic/mem_copy.h> #include <util/generic/strbuf.h> #include <util/system/compiler.h> -#include <util/system/yassert.h> - +#include <util/system/yassert.h> + /** * @addtogroup Streams * @{ - */ + */ /** * Simple stream tokenizer. Splits the stream into tokens that are available @@ -21,147 +21,147 @@ * @see TEol */ template <typename TEndOfToken> -class TStreamTokenizer { +class TStreamTokenizer { public: - class TIterator { - public: + class TIterator { + public: inline TIterator(TStreamTokenizer* const parent) - : Parent_(parent) - , AtEnd_(!Parent_->Next(Data_, Len_)) - { - } - + : Parent_(parent) + , AtEnd_(!Parent_->Next(Data_, Len_)) + { + } + inline TIterator() noexcept - : Parent_(nullptr) - , Data_(nullptr) - , Len_(0) - , AtEnd_(true) - { - } - + : Parent_(nullptr) + , Data_(nullptr) + , Len_(0) + , AtEnd_(true) + { + } + inline ~TIterator() = default; - - inline void operator++() { - Next(); - } - + + inline void operator++() { + Next(); + } + inline bool operator==(const TIterator& l) const noexcept { - return AtEnd_ == l.AtEnd_; - } - + return AtEnd_ == l.AtEnd_; + } + inline bool operator!=(const TIterator& l) const noexcept { - return !(*this == l); - } - + return !(*this == l); + } + /** * @return Return null-terminated character array with current token. * The pointer may be invalid after iterator increment. */ inline const char* Data() const noexcept { Y_ASSERT(!AtEnd_); - - return Data_; - } - + + return Data_; + } + /** * @return Length of current token. */ inline size_t Length() const noexcept { Y_ASSERT(!AtEnd_); - - return Len_; - } - + + return Len_; + } + inline TIterator* operator->() noexcept { - return this; - } - + return this; + } + inline TStringBuf operator*() noexcept { return TStringBuf{Data_, Len_}; - } - - private: - inline void Next() { + } + + private: + inline void Next() { Y_ASSERT(Parent_); - - AtEnd_ = !Parent_->Next(Data_, Len_); - } - - private: + + AtEnd_ = !Parent_->Next(Data_, Len_); + } + + private: TStreamTokenizer* const Parent_; - char* Data_; - size_t Len_; - bool AtEnd_; - }; - + char* Data_; + size_t Len_; + bool AtEnd_; + }; + inline TStreamTokenizer(IInputStream* const input, const TEndOfToken& eot = TEndOfToken(), const size_t initial = 1024) - : Input_(input) + : Input_(input) , Buf_(initial) - , Cur_(BufBegin()) - , End_(BufBegin()) - , Eot_(eot) - { - CheckBuf(); - } - - inline bool Next(char*& buf, size_t& len) { - char* it = Cur_; - - while (true) { - do { - while (it != End_) { - if (Eot_(*it)) { + , Cur_(BufBegin()) + , End_(BufBegin()) + , Eot_(eot) + { + CheckBuf(); + } + + inline bool Next(char*& buf, size_t& len) { + char* it = Cur_; + + while (true) { + do { + while (it != End_) { + if (Eot_(*it)) { *it = '\0'; - - buf = Cur_; - len = it - Cur_; - Cur_ = it + 1; - - return true; - } else { - ++it; - } - } - - if (Fill() == 0 && End_ != BufEnd()) { + + buf = Cur_; + len = it - Cur_; + Cur_ = it + 1; + + return true; + } else { + ++it; + } + } + + if (Fill() == 0 && End_ != BufEnd()) { *it = '\0'; - - buf = Cur_; - len = it - Cur_; - Cur_ = End_; - - return len; - } - } while (it != BufEnd()); - + + buf = Cur_; + len = it - Cur_; + Cur_ = End_; + + return len; + } + } while (it != BufEnd()); + Y_ASSERT(it == BufEnd()); Y_ASSERT(End_ == BufEnd()); - - const size_t blen = End_ - Cur_; - if (Cur_ == BufBegin()) { + + const size_t blen = End_ - Cur_; + if (Cur_ == BufBegin()) { Y_ASSERT(blen == Buf_.Capacity()); - - /* + + /* * do reallocate */ - + Buf_.Reserve(Buf_.Capacity() * 4); - CheckBuf(); - } else { - /* + CheckBuf(); + } else { + /* * do move */ - + MemMove(BufBegin(), Cur_, blen); - } - - Cur_ = BufBegin(); - End_ = Cur_ + blen; - it = End_; - } - } - + } + + Cur_ = BufBegin(); + End_ = Cur_ + blen; + it = End_; + } + } + inline TIterator begin() { return TIterator{this}; } @@ -170,45 +170,45 @@ public: return {}; } -private: - inline size_t Fill() { - const size_t avail = BufEnd() - End_; +private: + inline size_t Fill() { + const size_t avail = BufEnd() - End_; const size_t bytesRead = Input_->Read(End_, avail); - + End_ += bytesRead; - + return bytesRead; - } - + } + inline char* BufBegin() noexcept { return Buf_.Data(); - } - + } + inline char* BufEnd() noexcept { return Buf_.Data() + Buf_.Capacity(); - } - - inline void CheckBuf() const { + } + + inline void CheckBuf() const { if (!Buf_.Data()) { throw std::bad_alloc(); - } - } - -private: + } + } + +private: IInputStream* const Input_; TBuffer Buf_; - char* Cur_; - char* End_; - TEndOfToken Eot_; -}; - + char* Cur_; + char* End_; + TEndOfToken Eot_; +}; + /** * Predicate for `TStreamTokenizer` that uses '\\n' as a delimiter. - */ -struct TEol { + */ +struct TEol { inline bool operator()(char ch) const noexcept { - return ch == '\n'; - } -}; - + return ch == '\n'; + } +}; + /** @} */ |