diff options
author | denplusplus <denplusplus@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
commit | addb3626ed629a8c7d9c8c30e87365b478a8c266 (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/deprecated | |
parent | 57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (diff) | |
download | ydb-addb3626ed629a8c7d9c8c30e87365b478a8c266.tar.gz |
Restoring authorship annotation for <denplusplus@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/deprecated')
-rw-r--r-- | library/cpp/deprecated/kmp/kmp.cpp | 32 | ||||
-rw-r--r-- | library/cpp/deprecated/kmp/kmp.h | 172 | ||||
-rw-r--r-- | library/cpp/deprecated/kmp/kmp_ut.cpp | 118 | ||||
-rw-r--r-- | library/cpp/deprecated/mapped_file/mapped_file.cpp | 8 | ||||
-rw-r--r-- | library/cpp/deprecated/mapped_file/mapped_file.h | 16 | ||||
-rw-r--r-- | library/cpp/deprecated/split/split_iterator.cpp | 458 | ||||
-rw-r--r-- | library/cpp/deprecated/split/split_iterator.h | 324 | ||||
-rw-r--r-- | library/cpp/deprecated/split/split_iterator_ut.cpp | 130 |
8 files changed, 629 insertions, 629 deletions
diff --git a/library/cpp/deprecated/kmp/kmp.cpp b/library/cpp/deprecated/kmp/kmp.cpp index d9bb37fb5b..d02074c94a 100644 --- a/library/cpp/deprecated/kmp/kmp.cpp +++ b/library/cpp/deprecated/kmp/kmp.cpp @@ -1,21 +1,21 @@ #include "kmp.h" #include <util/generic/yexception.h> - -TKMPMatcher::TKMPMatcher(const char* patternBegin, const char* patternEnd) - : Pattern(patternBegin, patternEnd) -{ - ComputePrefixFunction(); -} - + +TKMPMatcher::TKMPMatcher(const char* patternBegin, const char* patternEnd) + : Pattern(patternBegin, patternEnd) +{ + ComputePrefixFunction(); +} + TKMPMatcher::TKMPMatcher(const TString& pattern) - : Pattern(pattern) -{ - ComputePrefixFunction(); -} - -void TKMPMatcher::ComputePrefixFunction() { - ssize_t* pf; + : Pattern(pattern) +{ + ComputePrefixFunction(); +} + +void TKMPMatcher::ComputePrefixFunction() { + ssize_t* pf; ::ComputePrefixFunction(Pattern.data(), Pattern.data() + Pattern.size(), &pf); - PrefixFunction.Reset(pf); -} + PrefixFunction.Reset(pf); +} diff --git a/library/cpp/deprecated/kmp/kmp.h b/library/cpp/deprecated/kmp/kmp.h index 71b554516d..a7f72eece6 100644 --- a/library/cpp/deprecated/kmp/kmp.h +++ b/library/cpp/deprecated/kmp/kmp.h @@ -1,108 +1,108 @@ #pragma once - + #include <util/generic/ptr.h> #include <util/generic/string.h> #include <util/generic/vector.h> #include <util/generic/yexception.h> - + template <typename T> -void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) { +void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) { Y_ENSURE(begin != end, TStringBuf("empty pattern")); - ssize_t len = end - begin; - TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]); - ssize_t i = 0; - ssize_t j = -1; - resultHolder[0] = -1; - while (i < len) { - while ((j >= 0) && (begin[j] != begin[i])) - j = resultHolder[j]; - ++i; - ++j; + ssize_t len = end - begin; + TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]); + ssize_t i = 0; + ssize_t j = -1; + resultHolder[0] = -1; + while (i < len) { + while ((j >= 0) && (begin[j] != begin[i])) + j = resultHolder[j]; + ++i; + ++j; Y_ASSERT(i >= 0); Y_ASSERT(j >= 0); Y_ASSERT(j < len); - if ((i < len) && (begin[i] == begin[j])) - resultHolder[i] = resultHolder[j]; - else - resultHolder[i] = j; - } - *result = resultHolder.Release(); -} - -class TKMPMatcher { -private: - TArrayHolder<ssize_t> PrefixFunction; + if ((i < len) && (begin[i] == begin[j])) + resultHolder[i] = resultHolder[j]; + else + resultHolder[i] = j; + } + *result = resultHolder.Release(); +} + +class TKMPMatcher { +private: + TArrayHolder<ssize_t> PrefixFunction; TString Pattern; - - void ComputePrefixFunction(); - -public: - TKMPMatcher(const char* patternBegin, const char* patternEnd); + + void ComputePrefixFunction(); + +public: + TKMPMatcher(const char* patternBegin, const char* patternEnd); TKMPMatcher(const TString& pattern); - - bool SubStr(const char* begin, const char* end, const char*& result) const { + + bool SubStr(const char* begin, const char* end, const char*& result) const { Y_ASSERT(begin <= end); ssize_t m = Pattern.size(); - ssize_t n = end - begin; - ssize_t i, j; - for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) { - while ((j >= 0) && (Pattern[j] != begin[i])) - j = PrefixFunction[j]; - } - if (j == m) { - result = begin + i - m; - return true; - } else { - return false; - } - } -}; - + ssize_t n = end - begin; + ssize_t i, j; + for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) { + while ((j >= 0) && (Pattern[j] != begin[i])) + j = PrefixFunction[j]; + } + if (j == m) { + result = begin + i - m; + return true; + } else { + return false; + } + } +}; + template <typename T> -class TKMPStreamMatcher { -public: - class ICallback { - public: - virtual void OnMatch(const T* begin, const T* end) = 0; +class TKMPStreamMatcher { +public: + class ICallback { + public: + virtual void OnMatch(const T* begin, const T* end) = 0; virtual ~ICallback() = default; - }; - -private: - ICallback* Callback; - TArrayHolder<ssize_t> PrefixFunction; + }; + +private: + ICallback* Callback; + TArrayHolder<ssize_t> PrefixFunction; using TTVector = TVector<T>; - TTVector Pattern; - ssize_t State; - TTVector Candidate; - -public: - TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback) - : Callback(callback) - , Pattern(patternBegin, patternEnd) - , State(0) + TTVector Pattern; + ssize_t State; + TTVector Candidate; + +public: + TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback) + : Callback(callback) + , Pattern(patternBegin, patternEnd) + , State(0) , Candidate(Pattern.size()) - { - ssize_t* pf; - ComputePrefixFunction(patternBegin, patternEnd, &pf); - PrefixFunction.Reset(pf); - } + { + ssize_t* pf; + ComputePrefixFunction(patternBegin, patternEnd, &pf); + PrefixFunction.Reset(pf); + } - void Push(const T& symbol) { - while ((State >= 0) && (Pattern[State] != symbol)) { + void Push(const T& symbol) { + while ((State >= 0) && (Pattern[State] != symbol)) { Y_ASSERT(State <= (ssize_t) Pattern.size()); - State = PrefixFunction[State]; + State = PrefixFunction[State]; Y_ASSERT(State <= (ssize_t) Pattern.size()); - } - if (State >= 0) - Candidate[State] = symbol; - ++State; + } + if (State >= 0) + Candidate[State] = symbol; + ++State; if (State == (ssize_t) Pattern.size()) { - Callback->OnMatch(Candidate.begin(), Candidate.end()); - State = 0; - } - } - - void Clear() { - State = 0; - } -}; + Callback->OnMatch(Candidate.begin(), Candidate.end()); + State = 0; + } + } + + void Clear() { + State = 0; + } +}; diff --git a/library/cpp/deprecated/kmp/kmp_ut.cpp b/library/cpp/deprecated/kmp/kmp_ut.cpp index 98a73a91e2..c2eda83c57 100644 --- a/library/cpp/deprecated/kmp/kmp_ut.cpp +++ b/library/cpp/deprecated/kmp/kmp_ut.cpp @@ -1,80 +1,80 @@ #include "kmp.h" #include <library/cpp/testing/unittest/registar.h> - + #include <util/stream/output.h> - + static TVector<int> FindAll(const TString& pattern, const TString& string) { TVector<int> result; - TKMPMatcher kmp(pattern); - const char* pResult; - const char* begin = string.begin(); - const char* end = string.end(); - while (kmp.SubStr(begin, end, pResult)) { + TKMPMatcher kmp(pattern); + const char* pResult; + const char* begin = string.begin(); + const char* end = string.end(); + while (kmp.SubStr(begin, end, pResult)) { result.push_back(int(pResult - string.data())); begin = pResult + pattern.size(); - } - return result; -} - + } + return result; +} + class TTestKMP: public TTestBase { - UNIT_TEST_SUITE(TTestKMP); + UNIT_TEST_SUITE(TTestKMP); UNIT_TEST(Test); UNIT_TEST(TestStream); - UNIT_TEST_SUITE_END(); + UNIT_TEST_SUITE_END(); -public: - void Test() { +public: + void Test() { TVector<int> ans = {0, 2}; - UNIT_ASSERT_EQUAL(FindAll("a", "aba"), ans); + UNIT_ASSERT_EQUAL(FindAll("a", "aba"), ans); ans = {0}; - UNIT_ASSERT_EQUAL(FindAll("aba", "aba"), ans); + UNIT_ASSERT_EQUAL(FindAll("aba", "aba"), ans); ans.clear(); - UNIT_ASSERT_EQUAL(FindAll("abad", "aba"), ans); + UNIT_ASSERT_EQUAL(FindAll("abad", "aba"), ans); ans = {0, 2}; - UNIT_ASSERT_EQUAL(FindAll("ab", "abab"), ans); - } - + UNIT_ASSERT_EQUAL(FindAll("ab", "abab"), ans); + } + class TKMPSimpleCallback: public TKMPStreamMatcher<int>::ICallback { - private: - int* Begin; - int* End; - int Count; + private: + int* Begin; + int* End; + int Count; + + public: + TKMPSimpleCallback(int* begin, int* end) + : Begin(begin) + , End(end) + , Count(0) + { + } - public: - TKMPSimpleCallback(int* begin, int* end) - : Begin(begin) - , End(end) - , Count(0) - { - } - void OnMatch(const int* begin, const int* end) override { - UNIT_ASSERT_EQUAL(end - begin, End - Begin); - const int* p0 = Begin; - const int* p1 = begin; - while (p0 < End) { - UNIT_ASSERT_EQUAL(*p0, *p1); - ++p0; - ++p1; - } - ++Count; - } - - int GetCount() const { - return Count; - } - }; - - void TestStream() { - int pattern[] = {2, 3}; - int data[] = {1, 2, 3, 5, 2, 2, 3, 2, 4, 3, 2}; - TKMPSimpleCallback callback(pattern, pattern + 2); - TKMPStreamMatcher<int> matcher(pattern, pattern + 2, &callback); + UNIT_ASSERT_EQUAL(end - begin, End - Begin); + const int* p0 = Begin; + const int* p1 = begin; + while (p0 < End) { + UNIT_ASSERT_EQUAL(*p0, *p1); + ++p0; + ++p1; + } + ++Count; + } + + int GetCount() const { + return Count; + } + }; + + void TestStream() { + int pattern[] = {2, 3}; + int data[] = {1, 2, 3, 5, 2, 2, 3, 2, 4, 3, 2}; + TKMPSimpleCallback callback(pattern, pattern + 2); + TKMPStreamMatcher<int> matcher(pattern, pattern + 2, &callback); for (auto& i : data) matcher.Push(i); - UNIT_ASSERT_EQUAL(2, callback.GetCount()); - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TTestKMP); + UNIT_ASSERT_EQUAL(2, callback.GetCount()); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TTestKMP); diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp index 6e72eef5c4..b0e4511299 100644 --- a/library/cpp/deprecated/mapped_file/mapped_file.cpp +++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp @@ -42,15 +42,15 @@ void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode o newFile.swap(*this); newFile.term(); } - + void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) { THolder<TFileMap> map(new TFileMap(file, om)); TMappedFile newFile(map.Get(), dbgName); Y_UNUSED(map.Release()); newFile.swap(*this); newFile.term(); -} - +} + void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { THolder<TFileMap> map(new TFileMap(name, om)); TMappedFile newFile(map.Get(), name.data()); @@ -61,4 +61,4 @@ void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { void TMappedFile::flush() { Map_->Flush(); -} +} diff --git a/library/cpp/deprecated/mapped_file/mapped_file.h b/library/cpp/deprecated/mapped_file/mapped_file.h index 9edd84672a..45859ed65a 100644 --- a/library/cpp/deprecated/mapped_file/mapped_file.h +++ b/library/cpp/deprecated/mapped_file/mapped_file.h @@ -17,7 +17,7 @@ class TMappedFile { private: TFileMap* Map_; - + private: TMappedFile(TFileMap* map, const char* dbgName); @@ -34,19 +34,19 @@ public: Map_ = nullptr; init(name, TFileMap::oRdOnly); } - + TMappedFile(const TFile& file, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); void init(const TString& name); - + void init(const TString& name, TFileMap::EOpenMode om); void init(const TString& name, size_t length, TFileMap::EOpenMode om); - + void init(const TFile&, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); - void flush(); - + void flush(); + void term() { if (Map_) { Map_->Unmap(); @@ -57,12 +57,12 @@ public: size_t getSize() const { return (Map_ ? Map_->MappedSize() : 0); - } + } void* getData(size_t pos = 0) const { Y_ASSERT(!Map_ || (pos <= getSize())); return (Map_ ? (void*)((unsigned char*)Map_->Ptr() + pos) : nullptr); - } + } void precharge(size_t pos = 0, size_t size = (size_t)-1) const; diff --git a/library/cpp/deprecated/split/split_iterator.cpp b/library/cpp/deprecated/split/split_iterator.cpp index 88790475a6..32262d25bd 100644 --- a/library/cpp/deprecated/split/split_iterator.cpp +++ b/library/cpp/deprecated/split/split_iterator.cpp @@ -1,112 +1,112 @@ #include "split_iterator.h" #include <util/system/yassert.h> - + #include <cctype> #include <cstring> #include <cstdlib> -/****************** TSplitDelimiters2 ******************/ - +/****************** TSplitDelimiters2 ******************/ + TSplitDelimiters::TSplitDelimiters(const char* s) { - memset(Delims, 0, sizeof(Delims)); - while (*s) + memset(Delims, 0, sizeof(Delims)); + while (*s) Delims[(ui8) * (s++)] = true; -} - -/****************** TSplitBase ******************/ +} + +/****************** TSplitBase ******************/ TSplitBase::TSplitBase(const char* str, size_t length) : Str(str) , Len(length) -{ -} - +{ +} + TSplitBase::TSplitBase(const TString& s) : Str(s.data()) , Len(s.size()) -{ -} - -/****************** TDelimitersSplit ******************/ - +{ +} + +/****************** TDelimitersSplit ******************/ + TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) -{ -} - +{ +} + TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) -{ -} - +{ +} + size_t TDelimitersSplit::Begin() const { - size_t pos = 0; + size_t pos = 0; while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) - ++pos; - return pos; -} - + ++pos; + return pos; +} + TSizeTRegion TDelimitersSplit::Next(size_t& pos) const { - size_t begin = pos; + size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) - ++pos; - TSizeTRegion result(begin, pos); - + ++pos; + TSizeTRegion result(begin, pos); + while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) - ++pos; + ++pos; + + return result; +} - return result; -} - TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const { - return TIterator(*this); -} - -/****************** TDelimitersStrictSplit ******************/ - + return TIterator(*this); +} + +/****************** TDelimitersStrictSplit ******************/ + TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) -{ -} - +{ +} + TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) -{ -} - +{ +} + TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const { - return TIterator(*this); -} - + return TIterator(*this); +} + TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const { - size_t begin = pos; + size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) - ++pos; - TSizeTRegion result(begin, pos); - - if (pos < Len) - ++pos; - - return result; -} - + ++pos; + TSizeTRegion result(begin, pos); + + if (pos < Len) + ++pos; + + return result; +} + size_t TDelimitersStrictSplit::Begin() const { - return 0; -} - -/****************** TScreenedDelimitersSplit ******************/ - + return 0; +} + +/****************** TScreenedDelimitersSplit ******************/ + TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) - : TSplitBase(s) - , Delimiters(delimiters) - , Screens(screens) -{ -} - + : TSplitBase(s) + , Delimiters(delimiters) + , Screens(screens) +{ +} + TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) : TSplitBase(str, length) , Delimiters(delimiters) @@ -114,174 +114,174 @@ TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t lengt { } -TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const { - return TIterator(*this); -} - -TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const { - size_t begin = pos; - bool screened = false; - while (pos < Len) { - if (Screens.IsDelimiter(Str[pos])) - screened = !screened; - if (Delimiters.IsDelimiter(Str[pos]) && !screened) - break; - ++pos; - } - TSizeTRegion result(begin, pos); - - if (pos < Len) - ++pos; - - return result; -} - -size_t TScreenedDelimitersSplit::Begin() const { - return 0; -} - -/****************** TDelimitersSplitWithoutTags ******************/ - +TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const { + size_t begin = pos; + bool screened = false; + while (pos < Len) { + if (Screens.IsDelimiter(Str[pos])) + screened = !screened; + if (Delimiters.IsDelimiter(Str[pos]) && !screened) + break; + ++pos; + } + TSizeTRegion result(begin, pos); + + if (pos < Len) + ++pos; + + return result; +} + +size_t TScreenedDelimitersSplit::Begin() const { + return 0; +} + +/****************** TDelimitersSplitWithoutTags ******************/ + TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) -{ -} - +{ +} + TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) -{ -} - +{ +} + size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const { Y_ASSERT('<' == Str[pos]); - while ((pos < Len) && ('>' != Str[pos])) - ++pos; - return pos + 1; -} - + while ((pos < Len) && ('>' != Str[pos])) + ++pos; + return pos + 1; +} + size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const { while (true) { while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) - ++pos; + ++pos; if (pos < Len) { - if ('<' != Str[pos]) - break; - else - pos = SkipTag(pos); + if ('<' != Str[pos]) + break; + else + pos = SkipTag(pos); } else - break; - } - return pos; -} - + break; + } + return pos; +} + size_t TDelimitersSplitWithoutTags::Begin() const { - size_t pos = 0; - pos = SkipDelimiters(pos); - return pos; -} - + size_t pos = 0; + pos = SkipDelimiters(pos); + return pos; +} + TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const { - size_t begin = pos; + size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) - ++pos; - TSizeTRegion result(begin, pos); - - pos = SkipDelimiters(pos); - - return result; -} - + ++pos; + TSizeTRegion result(begin, pos); + + pos = SkipDelimiters(pos); + + return result; +} + TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const { - return TIterator(*this); -} - -/****************** TCharSplit ******************/ - + return TIterator(*this); +} + +/****************** TCharSplit ******************/ + TCharSplit::TCharSplit(const char* str, size_t length) : TSplitBase(str, length) -{ -} - +{ +} + TCharSplit::TCharSplit(const TString& s) : TSplitBase(s) -{ -} - +{ +} + TCharSplit::TIterator TCharSplit::Iterator() const { - return TIterator(*this); -} - + return TIterator(*this); +} + TSizeTRegion TCharSplit::Next(size_t& pos) const { - TSizeTRegion result(pos, pos + 1); - ++pos; - return result; -} - + TSizeTRegion result(pos, pos + 1); + ++pos; + return result; +} + size_t TCharSplit::Begin() const { - return 0; -} - -/****************** TCharSplitWithoutTags ******************/ - -TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length) - : TSplitBase(str, length) -{ -} - + return 0; +} + +/****************** TCharSplitWithoutTags ******************/ + +TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length) + : TSplitBase(str, length) +{ +} + TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s) - : TSplitBase(s) -{ -} - + : TSplitBase(s) +{ +} + size_t TCharSplitWithoutTags::SkipTag(size_t pos) const { Y_ASSERT('<' == Str[pos]); - while ((pos < Len) && ('>' != Str[pos])) - ++pos; - return pos + 1; -} - + while ((pos < Len) && ('>' != Str[pos])) + ++pos; + return pos + 1; +} + size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const { while (true) { if (pos < Len) { - if ('<' != Str[pos]) - break; - else - pos = SkipTag(pos); + if ('<' != Str[pos]) + break; + else + pos = SkipTag(pos); } else - break; - } - return pos; -} - + break; + } + return pos; +} + size_t TCharSplitWithoutTags::Begin() const { - size_t pos = 0; - pos = SkipDelimiters(pos); - return pos; -} - + size_t pos = 0; + pos = SkipDelimiters(pos); + return pos; +} + TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const { - size_t begin = pos++; - TSizeTRegion result(begin, pos); - - pos = SkipDelimiters(pos); - - return result; -} - + size_t begin = pos++; + TSizeTRegion result(begin, pos); + + pos = SkipDelimiters(pos); + + return result; +} + TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const { - return TIterator(*this); -} - + return TIterator(*this); +} + TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s) - : Matcher(s) + : Matcher(s) , Len(s.size()) -{ -} - -/****************** TSubstringSplit ******************/ - +{ +} + +/****************** TSubstringSplit ******************/ + TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter) : TSplitBase(str, length) , Delimiter(delimiter) @@ -289,30 +289,30 @@ TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstrin } TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter) - : TSplitBase(str) - , Delimiter(delimiter) -{ -} - -TSubstringSplit::TIterator TSubstringSplit::Iterator() const { - return TIterator(*this); -} - -TSizeTRegion TSubstringSplit::Next(size_t& pos) const { - const char* begin = Str + pos; - const char* end = Str + Len; - const char* delim; - if (Delimiter.Matcher.SubStr(begin, end, delim)) { - TSizeTRegion result(pos, delim - begin + pos); - pos += delim - begin + Delimiter.Len; - return result; - } else { - TSizeTRegion result(pos, end - begin + pos); - pos += end - begin; - return result; - } -} - -size_t TSubstringSplit::Begin() const { - return 0; -} + : TSplitBase(str) + , Delimiter(delimiter) +{ +} + +TSubstringSplit::TIterator TSubstringSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TSubstringSplit::Next(size_t& pos) const { + const char* begin = Str + pos; + const char* end = Str + Len; + const char* delim; + if (Delimiter.Matcher.SubStr(begin, end, delim)) { + TSizeTRegion result(pos, delim - begin + pos); + pos += delim - begin + Delimiter.Len; + return result; + } else { + TSizeTRegion result(pos, end - begin + pos); + pos += end - begin; + return result; + } +} + +size_t TSubstringSplit::Begin() const { + return 0; +} diff --git a/library/cpp/deprecated/split/split_iterator.h b/library/cpp/deprecated/split/split_iterator.h index f6f73dae11..0eacc29228 100644 --- a/library/cpp/deprecated/split/split_iterator.h +++ b/library/cpp/deprecated/split/split_iterator.h @@ -1,5 +1,5 @@ #pragma once - + #include <library/cpp/deprecated/kmp/kmp.h> #include <util/string/cast.h> #include <util/string/util.h> @@ -11,7 +11,7 @@ #include <util/generic/string.h> #include <util/generic/vector.h> #include <util/generic/yexception.h> - + #include <cstdio> template <typename T> @@ -57,225 +57,225 @@ inline TUi32Region FromString(const TString& s) { } class TSplitDelimiters { -private: - bool Delims[256]; - -public: +private: + bool Delims[256]; + +public: explicit TSplitDelimiters(const char* s); Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const { - return Delims[ch]; - } -}; - + return Delims[ch]; + } +}; + template <class Split> -class TSplitIterator; - +class TSplitIterator; + class TSplitBase { -protected: - const char* Str; - size_t Len; - -public: - TSplitBase(const char* str, size_t length); +protected: + const char* Str; + size_t Len; + +public: + TSplitBase(const char* str, size_t length); TSplitBase(const TString& s); - + Y_FORCE_INLINE const char* GetString() const { - return Str; - } - + return Str; + } + Y_FORCE_INLINE size_t GetLength() const { - return Len; - } + return Len; + } private: // we don't own Str, make sure that no one calls us with temporary object TSplitBase(TString&&) = delete; -}; - -#ifdef _MSC_VER +}; + +#ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4512) -#endif - +#endif + class TDelimitersSplit: public TSplitBase { -private: - const TSplitDelimiters& Delimiters; - -public: +private: + const TSplitDelimiters& Delimiters; + +public: using TIterator = TSplitIterator<TDelimitersSplit>; - friend class TSplitIterator<TDelimitersSplit>; - - TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + friend class TSplitIterator<TDelimitersSplit>; + + TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Delimiters, make sure that no one calls us with temporary object TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete; TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete; TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete; -}; - +}; + class TDelimitersStrictSplit: public TSplitBase { -private: - const TSplitDelimiters& Delimiters; - -public: +private: + const TSplitDelimiters& Delimiters; + +public: using TIterator = TSplitIterator<TDelimitersStrictSplit>; - friend class TSplitIterator<TDelimitersStrictSplit>; - - TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + friend class TSplitIterator<TDelimitersStrictSplit>; + + TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Delimiters, make sure that no one calls us with temporary object TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete; TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete; TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete; -}; - +}; + class TScreenedDelimitersSplit: public TSplitBase { -private: - const TSplitDelimiters& Delimiters; - const TSplitDelimiters& Screens; - -public: +private: + const TSplitDelimiters& Delimiters; + const TSplitDelimiters& Screens; + +public: using TIterator = TSplitIterator<TScreenedDelimitersSplit>; - friend class TSplitIterator<TScreenedDelimitersSplit>; - + friend class TSplitIterator<TScreenedDelimitersSplit>; + TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Delimiters and Screens, make sure that no one calls us with temporary object TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete; TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete; TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete; -}; - +}; + class TDelimitersSplitWithoutTags: public TSplitBase { -private: - const TSplitDelimiters& Delimiters; - size_t SkipTag(size_t pos) const; - size_t SkipDelimiters(size_t pos) const; - -public: +private: + const TSplitDelimiters& Delimiters; + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>; - friend class TSplitIterator<TDelimitersSplitWithoutTags>; - - TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters); + friend class TSplitIterator<TDelimitersSplitWithoutTags>; + + TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters); TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Delimiters, make sure that no one calls us with temporary object TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete; TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete; TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete; -}; - +}; + class TCharSplit: public TSplitBase { -public: +public: using TIterator = TSplitIterator<TCharSplit>; - friend class TSplitIterator<TCharSplit>; - - TCharSplit(const char* str, size_t length); + friend class TSplitIterator<TCharSplit>; + + TCharSplit(const char* str, size_t length); TCharSplit(const TString& s); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Str, make sure that no one calls us with temporary object TCharSplit(TString&&) = delete; -}; - -#ifdef _MSC_VER +}; + +#ifdef _MSC_VER #pragma warning(pop) -#endif - +#endif + class TCharSplitWithoutTags: public TSplitBase { -private: - size_t SkipTag(size_t pos) const; - size_t SkipDelimiters(size_t pos) const; - -public: +private: + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: using TIterator = TSplitIterator<TCharSplitWithoutTags>; - friend class TSplitIterator<TCharSplitWithoutTags>; - - TCharSplitWithoutTags(const char* str, size_t length); + friend class TSplitIterator<TCharSplitWithoutTags>; + + TCharSplitWithoutTags(const char* str, size_t length); TCharSplitWithoutTags(const TString& s); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Str, make sure that no one calls us with temporary object TCharSplitWithoutTags(TString&&) = delete; -}; - +}; + class TSubstringSplitDelimiter { -public: - TKMPMatcher Matcher; - size_t Len; - +public: + TKMPMatcher Matcher; + size_t Len; + TSubstringSplitDelimiter(const TString& s); -}; - +}; + class TSubstringSplit: public TSplitBase { -private: - const TSubstringSplitDelimiter& Delimiter; - -public: +private: + const TSubstringSplitDelimiter& Delimiter; + +public: using TIterator = TSplitIterator<TSubstringSplit>; - friend class TSplitIterator<TSubstringSplit>; - + friend class TSplitIterator<TSubstringSplit>; + TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter); TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter); - TIterator Iterator() const; - TSizeTRegion Next(size_t& pos) const; - size_t Begin() const; + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; private: // we don't own Delimiters, make sure that no one calls us with temporary object TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete; TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete; -}; - +}; + template <class TSplit> class TSplitIterator { -protected: - const TSplit& Split; - size_t Pos; +protected: + const TSplit& Split; + size_t Pos; TString* CurrentStroka; - -public: - TSplitIterator(const TSplit& split) - : Split(split) - , Pos(Split.Begin()) + +public: + TSplitIterator(const TSplit& split) + : Split(split) + , Pos(Split.Begin()) , CurrentStroka(nullptr) - { - } - - virtual ~TSplitIterator() { - delete CurrentStroka; - } - - inline TSizeTRegion Next() { + { + } + + virtual ~TSplitIterator() { + delete CurrentStroka; + } + + inline TSizeTRegion Next() { Y_ENSURE(!Eof(), TStringBuf("eof reached")); - return Split.Next(Pos); - } + return Split.Next(Pos); + } TStringBuf NextTok() { if (Eof()) @@ -285,33 +285,33 @@ public: } const TString& NextString() { - if (!CurrentStroka) + if (!CurrentStroka) CurrentStroka = new TString(); - TSizeTRegion region = Next(); - CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1); - return *CurrentStroka; - } - - inline bool Eof() const { - return Pos >= Split.Len; - } - + TSizeTRegion region = Next(); + CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1); + return *CurrentStroka; + } + + inline bool Eof() const { + return Pos >= Split.Len; + } + TString GetTail() const { return TString(Split.Str + Pos); - } - - void Skip(size_t count) { - for (size_t i = 0; i < count; ++i) - Next(); - } -}; - + } + + void Skip(size_t count) { + for (size_t i = 0; i < count; ++i) + Next(); + } +}; + using TSplitTokens = TVector<TString>; - + template <typename TSplit> void Split(const TSplit& split, TSplitTokens* words) { - words->clear(); - TSplitIterator<TSplit> it(split); - while (!it.Eof()) + words->clear(); + TSplitIterator<TSplit> it(split); + while (!it.Eof()) words->push_back(it.NextString()); -} +} diff --git a/library/cpp/deprecated/split/split_iterator_ut.cpp b/library/cpp/deprecated/split/split_iterator_ut.cpp index 42951b6ab4..be5069c4be 100644 --- a/library/cpp/deprecated/split/split_iterator_ut.cpp +++ b/library/cpp/deprecated/split/split_iterator_ut.cpp @@ -1,93 +1,93 @@ #include "split_iterator.h" - + #include <library/cpp/testing/unittest/registar.h> - + class TSplitIteratorTest: public TTestBase { - UNIT_TEST_SUITE(TSplitIteratorTest); + UNIT_TEST_SUITE(TSplitIteratorTest); UNIT_TEST(TestDelimiters); UNIT_TEST(TestDelimitersSplit); UNIT_TEST(TestDelimitersStrictSplit); UNIT_TEST(TestTail); UNIT_TEST(TestScreenedDelimitersSplit); UNIT_TEST(TestSubstringDelimiter); - UNIT_TEST_SUITE_END(); - -public: - void TestDelimiters(); - void TestDelimitersSplit(); - void TestDelimitersStrictSplit(); - void TestTail(); - void TestScreenedDelimitersSplit(); - void TestSubstringDelimiter(); -}; - + UNIT_TEST_SUITE_END(); + +public: + void TestDelimiters(); + void TestDelimitersSplit(); + void TestDelimitersStrictSplit(); + void TestTail(); + void TestScreenedDelimitersSplit(); + void TestSubstringDelimiter(); +}; + void TSplitIteratorTest::TestDelimiters() { - TSplitDelimiters delims("@"); - for (int i = 0; i < 256; ++i) + TSplitDelimiters delims("@"); + for (int i = 0; i < 256; ++i) if ('@' != i) { - UNIT_ASSERT(!delims.IsDelimiter((ui8)i)); + UNIT_ASSERT(!delims.IsDelimiter((ui8)i)); } else { - UNIT_ASSERT(delims.IsDelimiter((ui8)i)); - } -} - + UNIT_ASSERT(delims.IsDelimiter((ui8)i)); + } +} + void TSplitIteratorTest::TestDelimitersSplit() { - { + { TString s = "1a3b45cd"; TSplitDelimiters delims("abcd"); TDelimitersSplit split(s, delims); - TSplitTokens tokens; - Split(split, &tokens); + TSplitTokens tokens; + Split(split, &tokens); TSplitTokens pattern = {"1", "3", "45"}; - UNIT_ASSERT(tokens == pattern); - } - { + UNIT_ASSERT(tokens == pattern); + } + { TString s = "aaaaaa"; TSplitDelimiters delims("abcd"); TDelimitersSplit split(s, delims); - TSplitTokens tokens; - Split(split, &tokens); + TSplitTokens tokens; + Split(split, &tokens); TSplitTokens pattern = {}; - UNIT_ASSERT(tokens == pattern); - } -} - + UNIT_ASSERT(tokens == pattern); + } +} + void TSplitIteratorTest::TestDelimitersStrictSplit() { - { + { TString s = "grp@2"; TSplitDelimiters delims("@"); TDelimitersStrictSplit split(s, delims); - TSplitTokens tokens; - Split(split, &tokens); + TSplitTokens tokens; + Split(split, &tokens); TSplitTokens pattern = {"grp", "2"}; - UNIT_ASSERT(tokens == pattern); - } - - { + UNIT_ASSERT(tokens == pattern); + } + + { TString s = "@grp@2@@"; TSplitDelimiters delims("@"); TDelimitersStrictSplit split(s, delims); - TSplitTokens tokens; - Split(split, &tokens); + TSplitTokens tokens; + Split(split, &tokens); TSplitTokens pattern = {"", "grp", "2", ""}; - UNIT_ASSERT(tokens == pattern); - } -} - + UNIT_ASSERT(tokens == pattern); + } +} + void TSplitIteratorTest::TestTail() { TString s = "grp@2@4"; TSplitDelimiters delims("@"); TDelimitersSplit split(s, delims); - TDelimitersSplit::TIterator it = split.Iterator(); - UNIT_ASSERT_EQUAL(it.GetTail(), "grp@2@4"); - it.Next(); - UNIT_ASSERT_EQUAL(it.GetTail(), "2@4"); - it.Next(); - UNIT_ASSERT_EQUAL(it.GetTail(), "4"); - it.Next(); - UNIT_ASSERT_EQUAL(it.GetTail(), ""); -} - + TDelimitersSplit::TIterator it = split.Iterator(); + UNIT_ASSERT_EQUAL(it.GetTail(), "grp@2@4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), "2@4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), "4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), ""); +} + void TSplitIteratorTest::TestScreenedDelimitersSplit() { { const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2"; @@ -135,18 +135,18 @@ void TSplitIteratorTest::TestScreenedDelimitersSplit() { UNIT_ASSERT_EQUAL(it.NextString(), "3283"); UNIT_ASSERT_EQUAL(it.NextString(), "2"); } -} - +} + void TSplitIteratorTest::TestSubstringDelimiter() { const TString s = "a@@bb@@cc@c.d@@r"; - static const TSubstringSplitDelimiter delimiter("@@"); - const TSubstringSplit splitter(s, delimiter); - TSubstringSplit::TIterator it = splitter.Iterator(); + static const TSubstringSplitDelimiter delimiter("@@"); + const TSubstringSplit splitter(s, delimiter); + TSubstringSplit::TIterator it = splitter.Iterator(); UNIT_ASSERT_EQUAL(it.NextString(), "a"); UNIT_ASSERT_EQUAL(it.NextString(), "bb"); UNIT_ASSERT_EQUAL(it.NextString(), "cc@c.d"); UNIT_ASSERT_EQUAL(it.NextString(), "r"); - UNIT_ASSERT(it.Eof()); -} - -UNIT_TEST_SUITE_REGISTRATION(TSplitIteratorTest); + UNIT_ASSERT(it.Eof()); +} + +UNIT_TEST_SUITE_REGISTRATION(TSplitIteratorTest); |