aboutsummaryrefslogtreecommitdiffstats
path: root/util/stream/tokenizer.h
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /util/stream/tokenizer.h
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'util/stream/tokenizer.h')
-rw-r--r--util/stream/tokenizer.h276
1 files changed, 138 insertions, 138 deletions
diff --git a/util/stream/tokenizer.h b/util/stream/tokenizer.h
index b2398efdd1..a0ec9c3cb2 100644
--- a/util/stream/tokenizer.h
+++ b/util/stream/tokenizer.h
@@ -1,17 +1,17 @@
#pragma once
-
-#include "input.h"
-
+
+#include "input.h"
+
#include <util/generic/buffer.h>
#include <util/generic/mem_copy.h>
#include <util/generic/strbuf.h>
#include <util/system/compiler.h>
-#include <util/system/yassert.h>
-
+#include <util/system/yassert.h>
+
/**
* @addtogroup Streams
* @{
- */
+ */
/**
* Simple stream tokenizer. Splits the stream into tokens that are available
@@ -21,147 +21,147 @@
* @see TEol
*/
template <typename TEndOfToken>
-class TStreamTokenizer {
+class TStreamTokenizer {
public:
- class TIterator {
- public:
+ class TIterator {
+ public:
inline TIterator(TStreamTokenizer* const parent)
- : Parent_(parent)
- , AtEnd_(!Parent_->Next(Data_, Len_))
- {
- }
-
+ : Parent_(parent)
+ , AtEnd_(!Parent_->Next(Data_, Len_))
+ {
+ }
+
inline TIterator() noexcept
- : Parent_(nullptr)
- , Data_(nullptr)
- , Len_(0)
- , AtEnd_(true)
- {
- }
-
+ : Parent_(nullptr)
+ , Data_(nullptr)
+ , Len_(0)
+ , AtEnd_(true)
+ {
+ }
+
inline ~TIterator() = default;
-
- inline void operator++() {
- Next();
- }
-
+
+ inline void operator++() {
+ Next();
+ }
+
inline bool operator==(const TIterator& l) const noexcept {
- return AtEnd_ == l.AtEnd_;
- }
-
+ return AtEnd_ == l.AtEnd_;
+ }
+
inline bool operator!=(const TIterator& l) const noexcept {
- return !(*this == l);
- }
-
+ return !(*this == l);
+ }
+
/**
* @return Return null-terminated character array with current token.
* The pointer may be invalid after iterator increment.
*/
inline const char* Data() const noexcept {
Y_ASSERT(!AtEnd_);
-
- return Data_;
- }
-
+
+ return Data_;
+ }
+
/**
* @return Length of current token.
*/
inline size_t Length() const noexcept {
Y_ASSERT(!AtEnd_);
-
- return Len_;
- }
-
+
+ return Len_;
+ }
+
inline TIterator* operator->() noexcept {
- return this;
- }
-
+ return this;
+ }
+
inline TStringBuf operator*() noexcept {
return TStringBuf{Data_, Len_};
- }
-
- private:
- inline void Next() {
+ }
+
+ private:
+ inline void Next() {
Y_ASSERT(Parent_);
-
- AtEnd_ = !Parent_->Next(Data_, Len_);
- }
-
- private:
+
+ AtEnd_ = !Parent_->Next(Data_, Len_);
+ }
+
+ private:
TStreamTokenizer* const Parent_;
- char* Data_;
- size_t Len_;
- bool AtEnd_;
- };
-
+ char* Data_;
+ size_t Len_;
+ bool AtEnd_;
+ };
+
inline TStreamTokenizer(IInputStream* const input, const TEndOfToken& eot = TEndOfToken(),
const size_t initial = 1024)
- : Input_(input)
+ : Input_(input)
, Buf_(initial)
- , Cur_(BufBegin())
- , End_(BufBegin())
- , Eot_(eot)
- {
- CheckBuf();
- }
-
- inline bool Next(char*& buf, size_t& len) {
- char* it = Cur_;
-
- while (true) {
- do {
- while (it != End_) {
- if (Eot_(*it)) {
+ , Cur_(BufBegin())
+ , End_(BufBegin())
+ , Eot_(eot)
+ {
+ CheckBuf();
+ }
+
+ inline bool Next(char*& buf, size_t& len) {
+ char* it = Cur_;
+
+ while (true) {
+ do {
+ while (it != End_) {
+ if (Eot_(*it)) {
*it = '\0';
-
- buf = Cur_;
- len = it - Cur_;
- Cur_ = it + 1;
-
- return true;
- } else {
- ++it;
- }
- }
-
- if (Fill() == 0 && End_ != BufEnd()) {
+
+ buf = Cur_;
+ len = it - Cur_;
+ Cur_ = it + 1;
+
+ return true;
+ } else {
+ ++it;
+ }
+ }
+
+ if (Fill() == 0 && End_ != BufEnd()) {
*it = '\0';
-
- buf = Cur_;
- len = it - Cur_;
- Cur_ = End_;
-
- return len;
- }
- } while (it != BufEnd());
-
+
+ buf = Cur_;
+ len = it - Cur_;
+ Cur_ = End_;
+
+ return len;
+ }
+ } while (it != BufEnd());
+
Y_ASSERT(it == BufEnd());
Y_ASSERT(End_ == BufEnd());
-
- const size_t blen = End_ - Cur_;
- if (Cur_ == BufBegin()) {
+
+ const size_t blen = End_ - Cur_;
+ if (Cur_ == BufBegin()) {
Y_ASSERT(blen == Buf_.Capacity());
-
- /*
+
+ /*
* do reallocate
*/
-
+
Buf_.Reserve(Buf_.Capacity() * 4);
- CheckBuf();
- } else {
- /*
+ CheckBuf();
+ } else {
+ /*
* do move
*/
-
+
MemMove(BufBegin(), Cur_, blen);
- }
-
- Cur_ = BufBegin();
- End_ = Cur_ + blen;
- it = End_;
- }
- }
-
+ }
+
+ Cur_ = BufBegin();
+ End_ = Cur_ + blen;
+ it = End_;
+ }
+ }
+
inline TIterator begin() {
return TIterator{this};
}
@@ -170,45 +170,45 @@ public:
return {};
}
-private:
- inline size_t Fill() {
- const size_t avail = BufEnd() - End_;
+private:
+ inline size_t Fill() {
+ const size_t avail = BufEnd() - End_;
const size_t bytesRead = Input_->Read(End_, avail);
-
+
End_ += bytesRead;
-
+
return bytesRead;
- }
-
+ }
+
inline char* BufBegin() noexcept {
return Buf_.Data();
- }
-
+ }
+
inline char* BufEnd() noexcept {
return Buf_.Data() + Buf_.Capacity();
- }
-
- inline void CheckBuf() const {
+ }
+
+ inline void CheckBuf() const {
if (!Buf_.Data()) {
throw std::bad_alloc();
- }
- }
-
-private:
+ }
+ }
+
+private:
IInputStream* const Input_;
TBuffer Buf_;
- char* Cur_;
- char* End_;
- TEndOfToken Eot_;
-};
-
+ char* Cur_;
+ char* End_;
+ TEndOfToken Eot_;
+};
+
/**
* Predicate for `TStreamTokenizer` that uses '\\n' as a delimiter.
- */
-struct TEol {
+ */
+struct TEol {
inline bool operator()(char ch) const noexcept {
- return ch == '\n';
- }
-};
-
+ return ch == '\n';
+ }
+};
+
/** @} */