diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/deprecated | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/deprecated')
33 files changed, 2462 insertions, 0 deletions
diff --git a/library/cpp/deprecated/accessors/README.md b/library/cpp/deprecated/accessors/README.md new file mode 100644 index 0000000000..498f1203e0 --- /dev/null +++ b/library/cpp/deprecated/accessors/README.md @@ -0,0 +1,5 @@ +Unified accessors for Arcadia containers and user types. + +Accessors implemented here mix different kinds of access at the wrong abstraction level, so they shouldn't be used. + +If you want begin/end/size for your containers, use std::begin, std::end, std::size. If you need generic reserve / resize / clear / insert, just use appropriate container methods or do your own overloads in place. diff --git a/library/cpp/deprecated/accessors/accessors.cpp b/library/cpp/deprecated/accessors/accessors.cpp new file mode 100644 index 0000000000..7d37e586fa --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors.cpp @@ -0,0 +1 @@ +#include "accessors.h" diff --git a/library/cpp/deprecated/accessors/accessors.h b/library/cpp/deprecated/accessors/accessors.h new file mode 100644 index 0000000000..6d4b1da3ad --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors.h @@ -0,0 +1,83 @@ +#pragma once + +#include "accessors_impl.h" + +namespace NAccessors { + /* + * Adds API compatibility between different types representing memory regions. + * + * i.e. this will work: + * + * TString t; + * const char* beg = NAccessors::Begin(t); // t.begin() + * const char* end = NAccessors::End(t); // t.end() + * size_t sz = NAccessors::Size(t); // t.size() + * + * as well as this: + * + * ui64 t; + * const ui64* beg = NAccessors::Begin(t); // &t + * const ui64* end = NAccessors::End(t); // &t + 1 + * size_t sz = NAccessors::Size(t); // 1 + * + * Both will give you begin, end and size of the underlying memory region. + */ + + template <typename T> + inline const typename TMemoryTraits<T>::TElementType* Begin(const T& t) { + return NPrivate::TBegin<T>::Get(t); + } + + template <typename T> + inline const typename TMemoryTraits<T>::TElementType* End(const T& t) { + return NPrivate::TEnd<T>::Get(t); + } + + template <typename T> + inline size_t Size(const T& t) { + return End(t) - Begin(t); + } + + /** + * This gives some unification in terms of memory manipulation. + */ + + template <typename T> + inline void Reserve(T& t, size_t sz) { + NPrivate::TReserve<T>::Do(t, sz); + } + + template <typename T> + inline void Resize(T& t, size_t sz) { + NPrivate::TResize<T>::Do(t, sz); + } + + template <typename T> + inline void Clear(T& t) { + NPrivate::TClear<T, false>::Do(t); + } + + template <typename T> + inline void Init(T& t) { + NPrivate::TClear<T, true>::Do(t); + } + + template <typename T> + inline void Append(T& t, const typename TMemoryTraits<T>::TElementType& v) { + NPrivate::TAppend<T>::Do(t, v); + } + + template <typename T> + inline void Append(T& t, + const typename TMemoryTraits<T>::TElementType* beg, + const typename TMemoryTraits<T>::TElementType* end) { + NPrivate::TAppendRegion<T>::Do(t, beg, end); + } + + template <typename T> + inline void Assign(T& t, + const typename TMemoryTraits<T>::TElementType* beg, + const typename TMemoryTraits<T>::TElementType* end) { + NPrivate::TAssign<T>::Do(t, beg, end); + } +} diff --git a/library/cpp/deprecated/accessors/accessors_impl.cpp b/library/cpp/deprecated/accessors/accessors_impl.cpp new file mode 100644 index 0000000000..0bf74cab7b --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_impl.cpp @@ -0,0 +1 @@ +#include "accessors_impl.h" diff --git a/library/cpp/deprecated/accessors/accessors_impl.h b/library/cpp/deprecated/accessors/accessors_impl.h new file mode 100644 index 0000000000..6b2b987351 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_impl.h @@ -0,0 +1,420 @@ +#pragma once + +#include "memory_traits.h" + +namespace NAccessors { + namespace NPrivate { + template <typename Ta> + struct TMemoryAccessorBase { + enum { + SimpleMemory = TMemoryTraits<Ta>::SimpleMemory, + ContinuousMemory = TMemoryTraits<Ta>::ContinuousMemory, + }; + + struct TBadAccessor; + }; + + template <typename Ta> + struct TBegin: public TMemoryAccessorBase<Ta> { + using TElementType = typename TMemoryTraits<Ta>::TElementType; + + template <typename Tb> + struct TNoMemoryIndirectionBegin { + static const TElementType* Get(const Tb& b) { + return (const TElementType*)&b; + } + }; + + template <typename Tb> + struct TIndirectMemoryRegionBegin { + Y_HAS_MEMBER(Begin); + Y_HAS_MEMBER(begin); + + template <typename Tc> + struct TByBegin { + static const TElementType* Get(const Tc& b) { + return (const TElementType*)b.Begin(); + } + }; + + template <typename Tc> + struct TBybegin { + static const TElementType* Get(const Tc& b) { + return (const TElementType*)b.begin(); + } + }; + + using TGet = std::conditional_t<THasBegin<Tb>::value, TByBegin<Tb>, TBybegin<Tb>>; + + static const TElementType* Get(const Tb& b) { + return TGet::Get(b); + } + }; + + using TGet = std::conditional_t< + TMemoryAccessorBase<Ta>::SimpleMemory, + TNoMemoryIndirectionBegin<Ta>, + std::conditional_t< + TMemoryAccessorBase<Ta>::ContinuousMemory, + TIndirectMemoryRegionBegin<Ta>, + typename TMemoryAccessorBase<Ta>::TBadAccessor>>; + + static const TElementType* Get(const Ta& b) { + return TGet::Get(b); + } + }; + + template <typename Ta> + struct TEnd: public TMemoryAccessorBase<Ta> { + using TElementType = typename TMemoryTraits<Ta>::TElementType; + + template <typename Tb> + struct TNoMemoryIndirectionEnd { + static const TElementType* Get(const Tb& b) { + return (const TElementType*)(&b + 1); + } + }; + + template <typename Tb> + struct TIndirectMemoryRegionEnd { + Y_HAS_MEMBER(End); + Y_HAS_MEMBER(end); + + template <typename Tc> + struct TByEnd { + static const TElementType* Get(const Tc& b) { + return (const TElementType*)b.End(); + } + }; + + template <typename Tc> + struct TByend { + static const TElementType* Get(const Tc& b) { + return (const TElementType*)b.end(); + } + }; + + using TGet = std::conditional_t<THasEnd<Tb>::value, TByEnd<Tb>, TByend<Tb>>; + + static const TElementType* Get(const Tb& b) { + return TGet::Get(b); + } + }; + + using TGet = std::conditional_t< + TMemoryAccessorBase<Ta>::SimpleMemory, + TNoMemoryIndirectionEnd<Ta>, + std::conditional_t< + TMemoryAccessorBase<Ta>::ContinuousMemory, + TIndirectMemoryRegionEnd<Ta>, + typename TMemoryAccessorBase<Ta>::TBadAccessor>>; + + static const TElementType* Get(const Ta& b) { + return TGet::Get(b); + } + }; + + template <typename Ta, bool Init> + struct TClear: public TMemoryAccessorBase<Ta> { + template <typename Tb> + struct TNoMemoryIndirectionClear { + static void Do(Tb& b) { + Zero(b); + } + }; + + template <typename Tb> + struct TIndirectMemoryRegionClear { + Y_HAS_MEMBER(Clear); + Y_HAS_MEMBER(clear); + + template <typename Tc> + struct TByClear { + static void Do(Tc& b) { + b.Clear(); + } + }; + + template <typename Tc> + struct TByclear { + static void Do(Tc& b) { + b.clear(); + } + }; + + template <typename Tc> + struct TByNone { + static void Do(Tc& b) { + if (!Init) + b = Tc(); + } + }; + + using TDo = std::conditional_t< + THasClear<Tb>::value, + TByClear<Tb>, + std::conditional_t< + THasclear<Tb>::value, + TByclear<Tb>, + TByNone<Tb>>>; + + static void Do(Tb& b) { + TDo::Do(b); + } + }; + + using TDo = std::conditional_t<TMemoryAccessorBase<Ta>::SimpleMemory, TNoMemoryIndirectionClear<Ta>, TIndirectMemoryRegionClear<Ta>>; + + static void Do(Ta& b) { + TDo::Do(b); + } + }; + + template <typename Tb> + struct TReserve { + Y_HAS_MEMBER(Reserve); + Y_HAS_MEMBER(reserve); + + template <typename Tc> + struct TByReserve { + static void Do(Tc& b, size_t sz) { + b.Reserve(sz); + } + }; + + template <typename Tc> + struct TByreserve { + static void Do(Tc& b, size_t sz) { + b.reserve(sz); + } + }; + + template <typename Tc> + struct TByNone { + static void Do(Tc&, size_t) { + } + }; + + using TDo = std::conditional_t< + THasReserve<Tb>::value, + TByReserve<Tb>, + std::conditional_t< + THasreserve<Tb>::value, + TByreserve<Tb>, + TByNone<Tb>>>; + + static void Do(Tb& b, size_t sz) { + TDo::Do(b, sz); + } + }; + + template <typename Tb> + struct TResize { + Y_HAS_MEMBER(Resize); + Y_HAS_MEMBER(resize); + + template <typename Tc> + struct TByResize { + static void Do(Tc& b, size_t sz) { + b.Resize(sz); + } + }; + + template <typename Tc> + struct TByresize { + static void Do(Tc& b, size_t sz) { + b.resize(sz); + } + }; + + using TDo = std::conditional_t<THasResize<Tb>::value, TByResize<Tb>, TByresize<Tb>>; + + static void Do(Tb& b, size_t sz) { + TDo::Do(b, sz); + } + }; + + template <typename Tb> + struct TAppend { + Y_HAS_MEMBER(Append); + Y_HAS_MEMBER(append); + Y_HAS_MEMBER(push_back); + + template <typename Tc> + struct TByAppend { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType& val) { + b.Append(val); + } + }; + + template <typename Tc> + struct TByappend { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType& val) { + b.append(val); + } + }; + + template <typename Tc> + struct TBypush_back { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType& val) { + b.push_back(val); + } + }; + + using TDo = std::conditional_t< + THasAppend<Tb>::value, + TByAppend<Tb>, + std::conditional_t< + THasappend<Tb>::value, + TByappend<Tb>, + TBypush_back<Tb>>>; + + using TElementType = typename TMemoryTraits<Tb>::TElementType; + + static void Do(Tb& b, const TElementType& val) { + TDo::Do(b, val); + } + }; + + template <typename Tb> + struct TAppendRegion { + Y_HAS_MEMBER(Append); + Y_HAS_MEMBER(append); + Y_HAS_MEMBER(insert); + + template <typename Tc> + struct TByAppend { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b.Append(beg, end); + } + }; + + template <typename Tc> + struct TByappend { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b.append(beg, end); + } + }; + + template <typename Tc> + struct TByinsert { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b.insert(b.end(), beg, end); + } + }; + + template <typename Tc> + struct TByNone { + using TElementType = typename TMemoryTraits<Tc>::TElementType; + + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + for (const TElementType* it = beg; it != end; ++it) + TAppend<Tc>::Do(b, *it); + } + }; + + using TDo = std::conditional_t< + THasAppend<Tb>::value, + TByAppend<Tb>, + std::conditional_t< + THasappend<Tb>::value, + TByappend<Tb>, + std::conditional_t< + THasinsert<Tb>::value, + TByinsert<Tb>, + TByNone<Tb>>>>; + + using TElementType = typename TMemoryTraits<Tb>::TElementType; + + static void Do(Tb& b, const TElementType* beg, const TElementType* end) { + TDo::Do(b, beg, end); + } + }; + + template <typename Ta> + struct TAssign: public TMemoryAccessorBase<Ta> { + using TElementType = typename TMemoryTraits<Ta>::TElementType; + + template <typename Tb> + struct TNoMemoryIndirectionAssign { + static void Do(Tb& b, const TElementType* beg, const TElementType* end) { + if (sizeof(Tb) == sizeof(TElementType) && end - beg > 0) { + memcpy(&b, beg, sizeof(Tb)); + } else if (end - beg > 0) { + memcpy(&b, beg, Min<size_t>((end - beg) * sizeof(TElementType), sizeof(Tb))); + } else { + Zero(b); + } + } + }; + + template <typename Tb> + struct TIndirectMemoryRegionAssign { + Y_HAS_MEMBER(Assign); + Y_HAS_MEMBER(assign); + + template <typename Tc> + struct TByAssign { + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b.Assign(beg, end); + } + }; + + template <typename Tc> + struct TByassign { + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b.assign(beg, end); + } + }; + + template <typename Tc> + struct TByClearAppend { + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + TClear<Tc, false>::Do(b); + TAppendRegion<Tc>::Do(b, beg, end); + } + }; + + template <typename Tc> + struct TByConstruction { + static void Do(Tc& b, const TElementType* beg, const TElementType* end) { + b = Tc(beg, end); + } + }; + + using TDo = std::conditional_t< + THasAssign<Tb>::value, + TByAssign<Tb>, + std::conditional_t< + THasassign<Tb>::value, + TByassign<Tb>, + std::conditional_t< + TMemoryTraits<Tb>::OwnsMemory, + TByClearAppend<Tb>, + TByConstruction<Tb>>>>; + + static void Do(Tb& b, const TElementType* beg, const TElementType* end) { + TDo::Do(b, beg, end); + } + }; + + using TDo = std::conditional_t<TMemoryAccessorBase<Ta>::SimpleMemory, TNoMemoryIndirectionAssign<Ta>, TIndirectMemoryRegionAssign<Ta>>; + + static void Do(Ta& b, const TElementType* beg, const TElementType* end) { + TDo::Do(b, beg, end); + } + }; + } +} diff --git a/library/cpp/deprecated/accessors/accessors_ut.cpp b/library/cpp/deprecated/accessors/accessors_ut.cpp new file mode 100644 index 0000000000..a9bdc9fcc4 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_ut.cpp @@ -0,0 +1,92 @@ +#include "accessors.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/buffer.h> +#include <util/generic/vector.h> + +#include <array> + +class TAccessorsTest: public TTestBase { + UNIT_TEST_SUITE(TAccessorsTest); + UNIT_TEST(TestAccessors); + UNIT_TEST_SUITE_END(); + +private: + template <typename T> + void TestRead(const T& t, const char* comm) { + const char* beg = (const char*)NAccessors::Begin(t); + const char* end = (const char*)NAccessors::End(t); + long sz = NAccessors::Size(t) * sizeof(typename TMemoryTraits<T>::TElementType); + + UNIT_ASSERT_VALUES_EQUAL_C(end - beg, sz, comm); + } + + template <typename T> + void TestWrite(const char* comm) { + typename TMemoryTraits<T>::TElementType val[4] = {'t', 'e', 's', 't'}; + T t; + NAccessors::Init(t); + NAccessors::Reserve(t, 6); + + size_t sz = NAccessors::Size(t); + UNIT_ASSERT_VALUES_EQUAL_C(0u, sz, comm); + + NAccessors::Append(t, 'a'); + sz = NAccessors::Size(t); + UNIT_ASSERT_VALUES_EQUAL_C(1u, sz, comm); + + NAccessors::Append(t, val, val + 4); + sz = NAccessors::Size(t); + UNIT_ASSERT_VALUES_EQUAL_C(5u, sz, comm); + + NAccessors::Clear(t); + + sz = NAccessors::Size(t); + UNIT_ASSERT_VALUES_EQUAL_C(0u, sz, comm); + } + + void TestAccessors() { + TestRead('a', "char"); + TestRead(1, "int"); + + int t[4] = {0, 1, 2, 3}; + + TestRead(t, "int[4]"); + + TStringBuf sbuf = "test"; + + TestRead(sbuf, "TStringBuf"); + + TUtf16String wtr; + wtr.resize(10, 1024); + + TestRead(wtr, "TUtf16String"); + + TBuffer buf; + buf.Resize(30); + + TestRead(buf, "TBuffer"); + + TVector<ui64> vec(10, 100); + + TestRead(vec, "TVector<ui64>"); + + TestWrite<TString>("TString"); + TestWrite<TVector<char>>("TVector<char>"); + TestWrite<TBuffer>("TBuffer"); + TestWrite<TVector<ui64>>("TVector<ui64>"); + TestWrite<TUtf16String>("TUtf16String"); + + std::array<TString, 10> sarr; + NAccessors::Init(sarr); + NAccessors::Clear(sarr); + + std::array<char, 10> carr; + NAccessors::Init(carr); + NAccessors::Clear(carr); + TestRead(carr, "std::array<char, 10>"); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TAccessorsTest) diff --git a/library/cpp/deprecated/accessors/memory_traits.cpp b/library/cpp/deprecated/accessors/memory_traits.cpp new file mode 100644 index 0000000000..df53026cf4 --- /dev/null +++ b/library/cpp/deprecated/accessors/memory_traits.cpp @@ -0,0 +1 @@ +#include "memory_traits.h" diff --git a/library/cpp/deprecated/accessors/memory_traits.h b/library/cpp/deprecated/accessors/memory_traits.h new file mode 100644 index 0000000000..aa837705d3 --- /dev/null +++ b/library/cpp/deprecated/accessors/memory_traits.h @@ -0,0 +1,168 @@ +#pragma once + +#include <util/generic/array_ref.h> +#include <util/memory/blob.h> +#include <util/memory/tempbuf.h> +#include <util/generic/buffer.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/typetraits.h> + +#include <array> +#include <string> +#include <utility> + +template <typename T> +struct TMemoryTraits { + enum { + SimpleMemory = std::is_arithmetic<T>::value, + ContinuousMemory = SimpleMemory, + OwnsMemory = SimpleMemory, + }; + + using TElementType = T; +}; + +template <typename T, size_t n> +struct TMemoryTraits<T[n]> { + enum { + SimpleMemory = TMemoryTraits<T>::SimpleMemory, + ContinuousMemory = SimpleMemory, + OwnsMemory = SimpleMemory, + }; + + using TElementType = T; +}; + +template <typename T, size_t n> +struct TMemoryTraits<std::array<T, n>> { + enum { + SimpleMemory = TMemoryTraits<T>::SimpleMemory, + ContinuousMemory = SimpleMemory, + OwnsMemory = SimpleMemory, + }; + + using TElementType = T; +}; + +template <typename A, typename B> +struct TMemoryTraits<std::pair<A, B>> { + enum { + SimpleMemory = TMemoryTraits<A>::SimpleMemory && TMemoryTraits<B>::SimpleMemory, + ContinuousMemory = SimpleMemory, + OwnsMemory = SimpleMemory, + }; + + using TElementType = std::pair<A, B>; +}; + +template <> +struct TMemoryTraits<TBuffer> { + enum { + SimpleMemory = false, + ContinuousMemory = true, + OwnsMemory = true, + }; + + using TElementType = char; +}; + +template <> +struct TMemoryTraits<TTempBuf> { + enum { + SimpleMemory = false, + ContinuousMemory = true, + OwnsMemory = true, + }; + + using TElementType = char; +}; + +template <> +struct TMemoryTraits< ::TBlob> { + enum { + SimpleMemory = false, + ContinuousMemory = true, + OwnsMemory = true, + }; + + using TElementType = char; +}; + +template <typename T> +struct TElementDependentMemoryTraits { + enum { + SimpleMemory = false, + ContinuousMemory = TMemoryTraits<T>::SimpleMemory, + }; + + using TElementType = T; +}; + +template <typename T, typename TAlloc> +struct TMemoryTraits<std::vector<T, TAlloc>>: public TElementDependentMemoryTraits<T> { + enum { + OwnsMemory = TMemoryTraits<T>::OwnsMemory + }; +}; + +template <typename T, typename TAlloc> +struct TMemoryTraits<TVector<T, TAlloc>>: public TMemoryTraits<std::vector<T, TAlloc>> { +}; + +template <typename T> +struct TMemoryTraits<TTempArray<T>>: public TElementDependentMemoryTraits<T> { + enum { + OwnsMemory = TMemoryTraits<T>::OwnsMemory + }; +}; + +template <typename T, typename TCharTraits, typename TAlloc> +struct TMemoryTraits<std::basic_string<T, TCharTraits, TAlloc>>: public TElementDependentMemoryTraits<T> { + enum { + OwnsMemory = TMemoryTraits<T>::OwnsMemory + }; +}; + +template <> +struct TMemoryTraits<TString>: public TElementDependentMemoryTraits<char> { + enum { + OwnsMemory = true + }; +}; + +template <> +struct TMemoryTraits<TUtf16String>: public TElementDependentMemoryTraits<wchar16> { + enum { + OwnsMemory = true + }; +}; + +template <typename T> +struct TMemoryTraits<TArrayRef<T>>: public TElementDependentMemoryTraits<T> { + enum { + OwnsMemory = false + }; +}; + +template <typename TCharType, typename TCharTraits> +struct TMemoryTraits<TBasicStringBuf<TCharType, TCharTraits>>: public TElementDependentMemoryTraits<TCharType> { + enum { + OwnsMemory = false + }; +}; + +template <> +struct TMemoryTraits<TStringBuf>: public TElementDependentMemoryTraits<char> { + enum { + OwnsMemory = false + }; +}; + +template <> +struct TMemoryTraits<TWtringBuf>: public TElementDependentMemoryTraits<wchar16> { + enum { + OwnsMemory = false + }; +}; diff --git a/library/cpp/deprecated/accessors/ut/ya.make b/library/cpp/deprecated/accessors/ut/ya.make new file mode 100644 index 0000000000..5ea976566f --- /dev/null +++ b/library/cpp/deprecated/accessors/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/deprecated/accessors) + +OWNER(velavokr) + +SRCS( + accessors_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/accessors/ya.make b/library/cpp/deprecated/accessors/ya.make new file mode 100644 index 0000000000..e322026a1c --- /dev/null +++ b/library/cpp/deprecated/accessors/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +OWNER(elric) + +SRCS( + accessors.cpp + accessors_impl.cpp + memory_traits.cpp +) + +END() diff --git a/library/cpp/deprecated/enum_codegen/README.md b/library/cpp/deprecated/enum_codegen/README.md new file mode 100644 index 0000000000..3bdac29af1 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/README.md @@ -0,0 +1,3 @@ +Some macros for generating enum <-> string conversions. + +Just use GENERATE_ENUM_SERIALIZATION. See https://wiki.yandex-team.ru/yatool/HowToWriteYaMakeFiles/#generate-enum diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen.cpp b/library/cpp/deprecated/enum_codegen/enum_codegen.cpp new file mode 100644 index 0000000000..3931b05924 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen.cpp @@ -0,0 +1 @@ +#include "enum_codegen.h" diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen.h b/library/cpp/deprecated/enum_codegen/enum_codegen.h new file mode 100644 index 0000000000..dfb04ecac2 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen.h @@ -0,0 +1,33 @@ +#pragma once + +/// see enum_codegen_ut.cpp for examples + +#define ENUM_VALUE_GEN(name, value, ...) name = value, +#define ENUM_VALUE_GEN_NO_VALUE(name, ...) name, + +#define ENUM_TO_STRING_IMPL_ITEM(name, ...) \ + case name: \ + return #name; +#define ENUM_LTLT_IMPL_ITEM(name, ...) \ + case name: \ + os << #name; \ + break; + +#define ENUM_TO_STRING(type, MAP) \ + static inline const char* ToCString(type value) { \ + switch (value) { \ + MAP(ENUM_TO_STRING_IMPL_ITEM) \ + default: \ + return "UNKNOWN"; \ + } \ + } \ + \ + static inline IOutputStream& operator<<(IOutputStream& os, type value) { \ + switch (value) { \ + MAP(ENUM_LTLT_IMPL_ITEM) \ + default: \ + os << int(value); \ + break; \ + } \ + return os; \ + } diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp b/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp new file mode 100644 index 0000000000..f8f1c9b6df --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp @@ -0,0 +1,40 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "enum_codegen.h" + +#include <util/string/builder.h> + +#define COLOR_MAP(XX) \ + XX(RED) \ + XX(GREEN) \ + XX(BLUE) + +enum EColor { + COLOR_MAP(ENUM_VALUE_GEN_NO_VALUE) +}; + +ENUM_TO_STRING(EColor, COLOR_MAP) + +#define MULTIPLIER_MAP(XX) \ + XX(GB, 9) \ + XX(MB, 6) \ + XX(KB, 3) + +enum EMultiplier { + MULTIPLIER_MAP(ENUM_VALUE_GEN) +}; + +ENUM_TO_STRING(EMultiplier, MULTIPLIER_MAP) + +Y_UNIT_TEST_SUITE(EnumCodegen) { + Y_UNIT_TEST(GenWithValue) { + UNIT_ASSERT_VALUES_EQUAL(6, MB); + } + + Y_UNIT_TEST(ToCString) { + UNIT_ASSERT_VALUES_EQUAL("RED", ToCString(RED)); + UNIT_ASSERT_VALUES_EQUAL("BLUE", ToCString(BLUE)); + UNIT_ASSERT_VALUES_EQUAL("GREEN", (TStringBuilder() << GREEN)); + UNIT_ASSERT_VALUES_EQUAL("GB", ToCString(GB)); + } +} diff --git a/library/cpp/deprecated/enum_codegen/ut/ya.make b/library/cpp/deprecated/enum_codegen/ut/ya.make new file mode 100644 index 0000000000..32e7ad77a2 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/ut/ya.make @@ -0,0 +1,15 @@ +UNITTEST() + +OWNER(g:util) + +SRCDIR(library/cpp/deprecated/enum_codegen) + +PEERDIR( + library/cpp/deprecated/enum_codegen +) + +SRCS( + enum_codegen_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/enum_codegen/ya.make b/library/cpp/deprecated/enum_codegen/ya.make new file mode 100644 index 0000000000..1df07d2192 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(elric) + +SRCS( + enum_codegen.cpp +) + +END() diff --git a/library/cpp/deprecated/kmp/kmp.cpp b/library/cpp/deprecated/kmp/kmp.cpp new file mode 100644 index 0000000000..d02074c94a --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp.cpp @@ -0,0 +1,21 @@ +#include "kmp.h" + +#include <util/generic/yexception.h> + +TKMPMatcher::TKMPMatcher(const char* patternBegin, const char* patternEnd) + : Pattern(patternBegin, patternEnd) +{ + ComputePrefixFunction(); +} + +TKMPMatcher::TKMPMatcher(const TString& pattern) + : Pattern(pattern) +{ + ComputePrefixFunction(); +} + +void TKMPMatcher::ComputePrefixFunction() { + ssize_t* pf; + ::ComputePrefixFunction(Pattern.data(), Pattern.data() + Pattern.size(), &pf); + PrefixFunction.Reset(pf); +} diff --git a/library/cpp/deprecated/kmp/kmp.h b/library/cpp/deprecated/kmp/kmp.h new file mode 100644 index 0000000000..a7f72eece6 --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp.h @@ -0,0 +1,108 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +template <typename T> +void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) { + Y_ENSURE(begin != end, TStringBuf("empty pattern")); + ssize_t len = end - begin; + TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]); + ssize_t i = 0; + ssize_t j = -1; + resultHolder[0] = -1; + while (i < len) { + while ((j >= 0) && (begin[j] != begin[i])) + j = resultHolder[j]; + ++i; + ++j; + Y_ASSERT(i >= 0); + Y_ASSERT(j >= 0); + Y_ASSERT(j < len); + if ((i < len) && (begin[i] == begin[j])) + resultHolder[i] = resultHolder[j]; + else + resultHolder[i] = j; + } + *result = resultHolder.Release(); +} + +class TKMPMatcher { +private: + TArrayHolder<ssize_t> PrefixFunction; + TString Pattern; + + void ComputePrefixFunction(); + +public: + TKMPMatcher(const char* patternBegin, const char* patternEnd); + TKMPMatcher(const TString& pattern); + + bool SubStr(const char* begin, const char* end, const char*& result) const { + Y_ASSERT(begin <= end); + ssize_t m = Pattern.size(); + ssize_t n = end - begin; + ssize_t i, j; + for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) { + while ((j >= 0) && (Pattern[j] != begin[i])) + j = PrefixFunction[j]; + } + if (j == m) { + result = begin + i - m; + return true; + } else { + return false; + } + } +}; + +template <typename T> +class TKMPStreamMatcher { +public: + class ICallback { + public: + virtual void OnMatch(const T* begin, const T* end) = 0; + virtual ~ICallback() = default; + }; + +private: + ICallback* Callback; + TArrayHolder<ssize_t> PrefixFunction; + using TTVector = TVector<T>; + TTVector Pattern; + ssize_t State; + TTVector Candidate; + +public: + TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback) + : Callback(callback) + , Pattern(patternBegin, patternEnd) + , State(0) + , Candidate(Pattern.size()) + { + ssize_t* pf; + ComputePrefixFunction(patternBegin, patternEnd, &pf); + PrefixFunction.Reset(pf); + } + + void Push(const T& symbol) { + while ((State >= 0) && (Pattern[State] != symbol)) { + Y_ASSERT(State <= (ssize_t) Pattern.size()); + State = PrefixFunction[State]; + Y_ASSERT(State <= (ssize_t) Pattern.size()); + } + if (State >= 0) + Candidate[State] = symbol; + ++State; + if (State == (ssize_t) Pattern.size()) { + Callback->OnMatch(Candidate.begin(), Candidate.end()); + State = 0; + } + } + + void Clear() { + State = 0; + } +}; diff --git a/library/cpp/deprecated/kmp/kmp_ut.cpp b/library/cpp/deprecated/kmp/kmp_ut.cpp new file mode 100644 index 0000000000..c2eda83c57 --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp_ut.cpp @@ -0,0 +1,80 @@ +#include "kmp.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/output.h> + +static TVector<int> FindAll(const TString& pattern, const TString& string) { + TVector<int> result; + TKMPMatcher kmp(pattern); + const char* pResult; + const char* begin = string.begin(); + const char* end = string.end(); + while (kmp.SubStr(begin, end, pResult)) { + result.push_back(int(pResult - string.data())); + begin = pResult + pattern.size(); + } + return result; +} + +class TTestKMP: public TTestBase { + UNIT_TEST_SUITE(TTestKMP); + UNIT_TEST(Test); + UNIT_TEST(TestStream); + UNIT_TEST_SUITE_END(); + +public: + void Test() { + TVector<int> ans = {0, 2}; + UNIT_ASSERT_EQUAL(FindAll("a", "aba"), ans); + ans = {0}; + UNIT_ASSERT_EQUAL(FindAll("aba", "aba"), ans); + ans.clear(); + UNIT_ASSERT_EQUAL(FindAll("abad", "aba"), ans); + ans = {0, 2}; + UNIT_ASSERT_EQUAL(FindAll("ab", "abab"), ans); + } + + class TKMPSimpleCallback: public TKMPStreamMatcher<int>::ICallback { + private: + int* Begin; + int* End; + int Count; + + public: + TKMPSimpleCallback(int* begin, int* end) + : Begin(begin) + , End(end) + , Count(0) + { + } + + void OnMatch(const int* begin, const int* end) override { + UNIT_ASSERT_EQUAL(end - begin, End - Begin); + const int* p0 = Begin; + const int* p1 = begin; + while (p0 < End) { + UNIT_ASSERT_EQUAL(*p0, *p1); + ++p0; + ++p1; + } + ++Count; + } + + int GetCount() const { + return Count; + } + }; + + void TestStream() { + int pattern[] = {2, 3}; + int data[] = {1, 2, 3, 5, 2, 2, 3, 2, 4, 3, 2}; + TKMPSimpleCallback callback(pattern, pattern + 2); + TKMPStreamMatcher<int> matcher(pattern, pattern + 2, &callback); + for (auto& i : data) + matcher.Push(i); + UNIT_ASSERT_EQUAL(2, callback.GetCount()); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TTestKMP); diff --git a/library/cpp/deprecated/kmp/ut/ya.make b/library/cpp/deprecated/kmp/ut/ya.make new file mode 100644 index 0000000000..9c54ee2715 --- /dev/null +++ b/library/cpp/deprecated/kmp/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/deprecated/kmp) + +OWNER(g:util) + +SRCS( + kmp_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/kmp/ya.make b/library/cpp/deprecated/kmp/ya.make new file mode 100644 index 0000000000..7c1c557934 --- /dev/null +++ b/library/cpp/deprecated/kmp/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +OWNER(g:util) + +SRCS( + kmp.cpp + kmp.h +) + +END() diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp new file mode 100644 index 0000000000..b0e4511299 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp @@ -0,0 +1,64 @@ +#include "mapped_file.h" + +#include <util/generic/yexception.h> +#include <util/system/defaults.h> +#include <util/system/hi_lo.h> +#include <util/system/filemap.h> + +TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) { + Map_ = map; + i64 len = Map_->Length(); + if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32)) + ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large"; + + Map_->Map(0, static_cast<size_t>(len)); +} + +TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) + : Map_(nullptr) +{ + init(file, om, dbgName); +} + +void TMappedFile::precharge(size_t off, size_t size) const { + if (!Map_) + return; + + Map_->Precharge(off, size); +} + +void TMappedFile::init(const TString& name) { + THolder<TFileMap> map(new TFileMap(name)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) { + THolder<TFileMap> map(new TFileMap(name, length, om)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) { + THolder<TFileMap> map(new TFileMap(file, om)); + TMappedFile newFile(map.Get(), dbgName); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { + THolder<TFileMap> map(new TFileMap(name, om)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::flush() { + Map_->Flush(); +} diff --git a/library/cpp/deprecated/mapped_file/mapped_file.h b/library/cpp/deprecated/mapped_file/mapped_file.h new file mode 100644 index 0000000000..45859ed65a --- /dev/null +++ b/library/cpp/deprecated/mapped_file/mapped_file.h @@ -0,0 +1,72 @@ +#pragma once + +#include <util/generic/flags.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/utility.h> +#include <util/generic/yexception.h> +#include <util/system/align.h> +#include <util/system/file.h> +#include <util/system/filemap.h> +#include <util/system/yassert.h> + +#include <cstdio> +#include <new> + +/// Deprecated (by pg@), use TFileMap or TMemoryMap instead +class TMappedFile { +private: + TFileMap* Map_; + +private: + TMappedFile(TFileMap* map, const char* dbgName); + +public: + TMappedFile() { + Map_ = nullptr; + } + + ~TMappedFile() { + term(); + } + + explicit TMappedFile(const TString& name) { + Map_ = nullptr; + init(name, TFileMap::oRdOnly); + } + + TMappedFile(const TFile& file, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); + + void init(const TString& name); + + void init(const TString& name, TFileMap::EOpenMode om); + + void init(const TString& name, size_t length, TFileMap::EOpenMode om); + + void init(const TFile&, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); + + void flush(); + + void term() { + if (Map_) { + Map_->Unmap(); + delete Map_; + Map_ = nullptr; + } + } + + size_t getSize() const { + return (Map_ ? Map_->MappedSize() : 0); + } + + void* getData(size_t pos = 0) const { + Y_ASSERT(!Map_ || (pos <= getSize())); + return (Map_ ? (void*)((unsigned char*)Map_->Ptr() + pos) : nullptr); + } + + void precharge(size_t pos = 0, size_t size = (size_t)-1) const; + + void swap(TMappedFile& file) noexcept { + DoSwap(Map_, file.Map_); + } +}; diff --git a/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp b/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp new file mode 100644 index 0000000000..afbd5b3358 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp @@ -0,0 +1,18 @@ +#include <library/cpp/deprecated/mapped_file/mapped_file.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/fs.h> + +Y_UNIT_TEST_SUITE(TMappedFileTest) { + static const char* FileName_("./mappped_file"); + Y_UNIT_TEST(TestFileMapEmpty) { + TFile file(FileName_, CreateAlways | WrOnly); + file.Close(); + + TMappedFile map; + map.init(FileName_); + map.getData(0); + + NFs::Remove(FileName_); + } +}; diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make new file mode 100644 index 0000000000..415c438382 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(g:util) + +SRCS( + mapped_file.cpp +) + +END() diff --git a/library/cpp/deprecated/split/delim_string_iter.cpp b/library/cpp/deprecated/split/delim_string_iter.cpp new file mode 100644 index 0000000000..af418c5bfb --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter.cpp @@ -0,0 +1,45 @@ +#include "delim_string_iter.h" + +// +// TKeyValueDelimStringIter +// + +void TKeyValueDelimStringIter::ReadKeyAndValue() { + TStringBuf currentToken(*DelimIter); + + size_t pos = currentToken.find('='); + if (pos == TString::npos) { + ChunkValue.Clear(); + ChunkKey = currentToken; + } else { + ChunkKey = currentToken.SubStr(0, pos); + ChunkValue = currentToken.SubStr(pos + 1); + } +} + +TKeyValueDelimStringIter::TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim) + : DelimIter(str, delim) +{ + if (DelimIter.Valid()) + ReadKeyAndValue(); +} + +bool TKeyValueDelimStringIter::Valid() const { + return DelimIter.Valid(); +} + +TKeyValueDelimStringIter& TKeyValueDelimStringIter::operator++() { + ++DelimIter; + if (DelimIter.Valid()) + ReadKeyAndValue(); + + return *this; +} + +const TStringBuf& TKeyValueDelimStringIter::Key() const { + return ChunkKey; +} + +const TStringBuf& TKeyValueDelimStringIter::Value() const { + return ChunkValue; +} diff --git a/library/cpp/deprecated/split/delim_string_iter.h b/library/cpp/deprecated/split/delim_string_iter.h new file mode 100644 index 0000000000..8e4ca171a0 --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter.h @@ -0,0 +1,185 @@ +#pragma once + +#include <util/generic/algorithm.h> +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> + +#include <iterator> + +class TDelimStringIter { +public: + using value_type = TStringBuf; + using difference_type = ptrdiff_t; + using pointer = const TStringBuf*; + using reference = const TStringBuf&; + using iterator_category = std::forward_iterator_tag; + + inline TDelimStringIter(const char* begin, const char* strEnd, TStringBuf delim) + : TDelimStringIter(TStringBuf(begin, strEnd), delim) + { + } + + inline TDelimStringIter(TStringBuf str, TStringBuf delim) + : IsValid(true) + , Str(str) + , Delim(delim) + { + UpdateCurrent(); + } + + inline TDelimStringIter() + : IsValid(false) + { + } + + inline explicit operator bool() const { + return IsValid; + } + + // NOTE: this is a potentially unsafe operation (no overrun check) + inline TDelimStringIter& operator++() { + if (Current.end() != Str.end()) { + Str.Skip(Current.length() + Delim.length()); + UpdateCurrent(); + } else { + Str.Clear(); + Current.Clear(); + IsValid = false; + } + return *this; + } + + inline void operator+=(size_t n) { + for (; n > 0; --n) { + ++(*this); + } + } + + inline bool operator==(const TDelimStringIter& rhs) const { + return (IsValid == rhs.IsValid) && (!IsValid || (Current.begin() == rhs.Current.begin())); + } + + inline bool operator!=(const TDelimStringIter& rhs) const { + return !(*this == rhs); + } + + inline TStringBuf operator*() const { + return Current; + } + + inline const TStringBuf* operator->() const { + return &Current; + } + + // Get & advance + template <class T> + inline bool TryNext(T& t) { + if (IsValid) { + t = FromString<T>(Current); + operator++(); + return true; + } else { + return false; + } + } + + template <class T> + inline TDelimStringIter& Next(T& t) // Get & advance + { + if (!TryNext(t)) + ythrow yexception() << "No valid field"; + return *this; + } + + template <class T> + inline T GetNext() { + T res; + Next(res); + return res; + } + + inline const char* GetBegin() const { + return Current.begin(); + } + + inline const char* GetEnd() const { + return Current.end(); + } + + inline bool Valid() const { + return IsValid; + } + + // contents from next token to the end of string + inline TStringBuf Cdr() const { + return Str.SubStr(Current.length() + Delim.length()); + } + + inline TDelimStringIter IterEnd() const { + return TDelimStringIter(); + } + +private: + inline void UpdateCurrent() { + // it is much faster than TStringBuf::find + size_t pos = std::search(Str.begin(), Str.end(), Delim.begin(), Delim.end()) - Str.begin(); + Current = Str.Head(pos); + } + +private: + bool IsValid; + + TStringBuf Str; + TStringBuf Current; + TStringBuf Delim; +}; + +//example: for (TStringBuf field: TDelimStroka(line, "@@")) { ... } +struct TDelimStroka { + TStringBuf S; + TStringBuf Delim; + + inline TDelimStroka(TStringBuf s, TStringBuf delim) + : S(s) + , Delim(delim) + { + } + + inline TDelimStringIter begin() const { + return TDelimStringIter(S, Delim); + } + + inline TDelimStringIter end() const { + return TDelimStringIter(); + } +}; + +inline TDelimStringIter begin_delim(const TString& str, TStringBuf delim) { + return TDelimStringIter(str, delim); +} + +inline TDelimStringIter begin_delim(TStringBuf str, TStringBuf delim) { + return TDelimStringIter(str.begin(), str.end(), delim); +} + +inline TDelimStringIter end_delim(const TString& /*str*/, TStringBuf /*delim*/) { + return TDelimStringIter(); +} + +class TKeyValueDelimStringIter { +public: + TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim); + bool Valid() const; + TKeyValueDelimStringIter& operator++(); + const TStringBuf& Key() const; + const TStringBuf& Value() const; + +private: + TDelimStringIter DelimIter; + TStringBuf ChunkKey, ChunkValue; + +private: + void ReadKeyAndValue(); +}; diff --git a/library/cpp/deprecated/split/delim_string_iter_ut.cpp b/library/cpp/deprecated/split/delim_string_iter_ut.cpp new file mode 100644 index 0000000000..18a8b2a160 --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter_ut.cpp @@ -0,0 +1,99 @@ +#include "delim_string_iter.h" +#include <util/generic/vector.h> +#include <library/cpp/testing/unittest/registar.h> + +/// Test that TDelimStringIter build on top of given string and delimeter will produce expected sequence +static void AssertStringSplit(const TString& str, const TString& delim, const TVector<TString>& expected) { + TDelimStringIter it(str, delim); + + // test iterator invariants + for (const auto& expectedString : expected) { + UNIT_ASSERT(it.Valid()); + UNIT_ASSERT(bool(it)); + UNIT_ASSERT_STRINGS_EQUAL(it->ToString(), expectedString); + ++it; + } + UNIT_ASSERT(!it.Valid()); +}; + +Y_UNIT_TEST_SUITE(TDelimStrokaIterTestSuite) { + Y_UNIT_TEST(SingleCharacterAsDelimiter) { + AssertStringSplit( + "Hello words!", " ", {"Hello", "words!"}); + } + + Y_UNIT_TEST(MultipleCharactersAsDelimiter) { + AssertStringSplit( + "0, 1, 1, 2, 3, 5, 8, 13, 21, 34", "1, ", {"0, ", "", "2, 3, 5, 8, 13, 2", "34"}); + } + + Y_UNIT_TEST(NoDelimitersPresent) { + AssertStringSplit("This string could be yours", "\t", {"This string could be yours"}); + } + + Y_UNIT_TEST(Cdr) { + TDelimStringIter it("a\tc\t", "\t"); + UNIT_ASSERT_STRINGS_EQUAL(*it, "a"); + UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), "c\t"); + ++it; + UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), ""); + } + + Y_UNIT_TEST(ForIter) { + TVector<TStringBuf> expected = {"1", "", "3@4", ""}; + TVector<TStringBuf> got; + + for (TStringBuf x : TDelimStroka("1@@@@3@4@@", "@@")) { + got.push_back(x); + } + + UNIT_ASSERT_EQUAL(got, expected); + } +} + +static void AssertKeyValueStringSplit( + const TStringBuf str, + const TStringBuf delim, + const TVector<std::pair<TStringBuf, TStringBuf>>& expected) { + TKeyValueDelimStringIter it(str, delim); + + for (const auto& expectedKeyValue : expected) { + UNIT_ASSERT(it.Valid()); + UNIT_ASSERT_STRINGS_EQUAL(it.Key(), expectedKeyValue.first); + UNIT_ASSERT_STRINGS_EQUAL(it.Value(), expectedKeyValue.second); + ++it; + } + UNIT_ASSERT(!it.Valid()); +} + +Y_UNIT_TEST_SUITE(TKeyValueDelimStringIterTestSuite) { + Y_UNIT_TEST(SingleCharacterAsDelimiter) { + AssertKeyValueStringSplit( + "abc=123,cde=qwer", ",", + {{"abc", "123"}, + {"cde", "qwer"}}); + } + + Y_UNIT_TEST(MultipleCharactersAsDelimiter) { + AssertKeyValueStringSplit( + "abc=xyz@@qwerty=zxcv", "@@", + {{"abc", "xyz"}, + {"qwerty", "zxcv"}}); + } + + Y_UNIT_TEST(NoDelimiters) { + AssertKeyValueStringSplit( + "abc=zz", ",", + {{"abc", "zz"}}); + } + + Y_UNIT_TEST(EmptyElements) { + AssertKeyValueStringSplit( + "@@abc=zxy@@@@qwerty=y@@", "@@", + {{"", ""}, + {"abc", "zxy"}, + {"", ""}, + {"qwerty", "y"}, + {"", ""}}); + } +} diff --git a/library/cpp/deprecated/split/split_iterator.cpp b/library/cpp/deprecated/split/split_iterator.cpp new file mode 100644 index 0000000000..32262d25bd --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator.cpp @@ -0,0 +1,318 @@ +#include "split_iterator.h" + +#include <util/system/yassert.h> + +#include <cctype> +#include <cstring> +#include <cstdlib> + +/****************** TSplitDelimiters2 ******************/ + +TSplitDelimiters::TSplitDelimiters(const char* s) { + memset(Delims, 0, sizeof(Delims)); + while (*s) + Delims[(ui8) * (s++)] = true; +} + +/****************** TSplitBase ******************/ +TSplitBase::TSplitBase(const char* str, size_t length) + : Str(str) + , Len(length) +{ +} + +TSplitBase::TSplitBase(const TString& s) + : Str(s.data()) + , Len(s.size()) +{ +} + +/****************** TDelimitersSplit ******************/ + +TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) + : TSplitBase(str, length) + , Delimiters(delimiters) +{ +} + +TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters) + : TSplitBase(s) + , Delimiters(delimiters) +{ +} + +size_t TDelimitersSplit::Begin() const { + size_t pos = 0; + while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) + ++pos; + return pos; +} + +TSizeTRegion TDelimitersSplit::Next(size_t& pos) const { + size_t begin = pos; + while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) + ++pos; + TSizeTRegion result(begin, pos); + + while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) + ++pos; + + return result; +} + +TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const { + return TIterator(*this); +} + +/****************** TDelimitersStrictSplit ******************/ + +TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) + : TSplitBase(str, length) + , Delimiters(delimiters) +{ +} + +TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters) + : TSplitBase(s) + , Delimiters(delimiters) +{ +} + +TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const { + size_t begin = pos; + while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) + ++pos; + TSizeTRegion result(begin, pos); + + if (pos < Len) + ++pos; + + return result; +} + +size_t TDelimitersStrictSplit::Begin() const { + return 0; +} + +/****************** TScreenedDelimitersSplit ******************/ + +TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) + : TSplitBase(s) + , Delimiters(delimiters) + , Screens(screens) +{ +} + +TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) + : TSplitBase(str, length) + , Delimiters(delimiters) + , Screens(screens) +{ +} + +TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const { + size_t begin = pos; + bool screened = false; + while (pos < Len) { + if (Screens.IsDelimiter(Str[pos])) + screened = !screened; + if (Delimiters.IsDelimiter(Str[pos]) && !screened) + break; + ++pos; + } + TSizeTRegion result(begin, pos); + + if (pos < Len) + ++pos; + + return result; +} + +size_t TScreenedDelimitersSplit::Begin() const { + return 0; +} + +/****************** TDelimitersSplitWithoutTags ******************/ + +TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters) + : TSplitBase(str, length) + , Delimiters(delimiters) +{ +} + +TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters) + : TSplitBase(s) + , Delimiters(delimiters) +{ +} + +size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const { + Y_ASSERT('<' == Str[pos]); + while ((pos < Len) && ('>' != Str[pos])) + ++pos; + return pos + 1; +} + +size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const { + while (true) { + while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) + ++pos; + if (pos < Len) { + if ('<' != Str[pos]) + break; + else + pos = SkipTag(pos); + } else + break; + } + return pos; +} + +size_t TDelimitersSplitWithoutTags::Begin() const { + size_t pos = 0; + pos = SkipDelimiters(pos); + return pos; +} + +TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const { + size_t begin = pos; + while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) + ++pos; + TSizeTRegion result(begin, pos); + + pos = SkipDelimiters(pos); + + return result; +} + +TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const { + return TIterator(*this); +} + +/****************** TCharSplit ******************/ + +TCharSplit::TCharSplit(const char* str, size_t length) + : TSplitBase(str, length) +{ +} + +TCharSplit::TCharSplit(const TString& s) + : TSplitBase(s) +{ +} + +TCharSplit::TIterator TCharSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TCharSplit::Next(size_t& pos) const { + TSizeTRegion result(pos, pos + 1); + ++pos; + return result; +} + +size_t TCharSplit::Begin() const { + return 0; +} + +/****************** TCharSplitWithoutTags ******************/ + +TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length) + : TSplitBase(str, length) +{ +} + +TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s) + : TSplitBase(s) +{ +} + +size_t TCharSplitWithoutTags::SkipTag(size_t pos) const { + Y_ASSERT('<' == Str[pos]); + while ((pos < Len) && ('>' != Str[pos])) + ++pos; + return pos + 1; +} + +size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const { + while (true) { + if (pos < Len) { + if ('<' != Str[pos]) + break; + else + pos = SkipTag(pos); + } else + break; + } + return pos; +} + +size_t TCharSplitWithoutTags::Begin() const { + size_t pos = 0; + pos = SkipDelimiters(pos); + return pos; +} + +TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const { + size_t begin = pos++; + TSizeTRegion result(begin, pos); + + pos = SkipDelimiters(pos); + + return result; +} + +TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const { + return TIterator(*this); +} + +TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s) + : Matcher(s) + , Len(s.size()) +{ +} + +/****************** TSubstringSplit ******************/ + +TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter) + : TSplitBase(str, length) + , Delimiter(delimiter) +{ +} + +TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter) + : TSplitBase(str) + , Delimiter(delimiter) +{ +} + +TSubstringSplit::TIterator TSubstringSplit::Iterator() const { + return TIterator(*this); +} + +TSizeTRegion TSubstringSplit::Next(size_t& pos) const { + const char* begin = Str + pos; + const char* end = Str + Len; + const char* delim; + if (Delimiter.Matcher.SubStr(begin, end, delim)) { + TSizeTRegion result(pos, delim - begin + pos); + pos += delim - begin + Delimiter.Len; + return result; + } else { + TSizeTRegion result(pos, end - begin + pos); + pos += end - begin; + return result; + } +} + +size_t TSubstringSplit::Begin() const { + return 0; +} diff --git a/library/cpp/deprecated/split/split_iterator.h b/library/cpp/deprecated/split/split_iterator.h new file mode 100644 index 0000000000..0eacc29228 --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator.h @@ -0,0 +1,317 @@ +#pragma once + +#include <library/cpp/deprecated/kmp/kmp.h> +#include <util/string/cast.h> +#include <util/string/util.h> +#include <util/string/builder.h> + +#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +#include <cstdio> + +template <typename T> +struct TNumPair { + T Begin; + T End; + + TNumPair() = default; + + TNumPair(T begin, T end) + : Begin(begin) + , End(end) + { + Y_ASSERT(begin <= end); + } + + T Length() const { + return End - Begin + 1; + } + + bool operator==(const TNumPair& r) const { + return (Begin == r.Begin) && (End == r.End); + } + + bool operator!=(const TNumPair& r) const { + return (Begin != r.Begin) || (End != r.End); + } +}; + +using TSizeTRegion = TNumPair<size_t>; +using TUi32Region = TNumPair<ui32>; + +template <> +inline TString ToString(const TUi32Region& r) { + return TStringBuilder() << "(" << r.Begin << ", " << r.End << ")"; +} + +template <> +inline TUi32Region FromString(const TString& s) { + TUi32Region result; + sscanf(s.data(), "(%" PRIu32 ", %" PRIu32 ")", &result.Begin, &result.End); + return result; +} + +class TSplitDelimiters { +private: + bool Delims[256]; + +public: + explicit TSplitDelimiters(const char* s); + + Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const { + return Delims[ch]; + } +}; + +template <class Split> +class TSplitIterator; + +class TSplitBase { +protected: + const char* Str; + size_t Len; + +public: + TSplitBase(const char* str, size_t length); + TSplitBase(const TString& s); + + Y_FORCE_INLINE const char* GetString() const { + return Str; + } + + Y_FORCE_INLINE size_t GetLength() const { + return Len; + } + +private: + // we don't own Str, make sure that no one calls us with temporary object + TSplitBase(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4512) +#endif + +class TDelimitersSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + +public: + using TIterator = TSplitIterator<TDelimitersSplit>; + friend class TSplitIterator<TDelimitersSplit>; + + TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete; + TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TDelimitersStrictSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + +public: + using TIterator = TSplitIterator<TDelimitersStrictSplit>; + friend class TSplitIterator<TDelimitersStrictSplit>; + + TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete; + TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TScreenedDelimitersSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + const TSplitDelimiters& Screens; + +public: + using TIterator = TSplitIterator<TScreenedDelimitersSplit>; + friend class TSplitIterator<TScreenedDelimitersSplit>; + + TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); + TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters and Screens, make sure that no one calls us with temporary object + TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete; + TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete; + TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete; +}; + +class TDelimitersSplitWithoutTags: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: + using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>; + friend class TSplitIterator<TDelimitersSplitWithoutTags>; + + TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete; + TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete; +}; + +class TCharSplit: public TSplitBase { +public: + using TIterator = TSplitIterator<TCharSplit>; + friend class TSplitIterator<TCharSplit>; + + TCharSplit(const char* str, size_t length); + TCharSplit(const TString& s); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Str, make sure that no one calls us with temporary object + TCharSplit(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +class TCharSplitWithoutTags: public TSplitBase { +private: + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: + using TIterator = TSplitIterator<TCharSplitWithoutTags>; + friend class TSplitIterator<TCharSplitWithoutTags>; + + TCharSplitWithoutTags(const char* str, size_t length); + TCharSplitWithoutTags(const TString& s); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Str, make sure that no one calls us with temporary object + TCharSplitWithoutTags(TString&&) = delete; +}; + +class TSubstringSplitDelimiter { +public: + TKMPMatcher Matcher; + size_t Len; + + TSubstringSplitDelimiter(const TString& s); +}; + +class TSubstringSplit: public TSplitBase { +private: + const TSubstringSplitDelimiter& Delimiter; + +public: + using TIterator = TSplitIterator<TSubstringSplit>; + friend class TSplitIterator<TSubstringSplit>; + + TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter); + TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete; + TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete; +}; + +template <class TSplit> +class TSplitIterator { +protected: + const TSplit& Split; + size_t Pos; + TString* CurrentStroka; + +public: + TSplitIterator(const TSplit& split) + : Split(split) + , Pos(Split.Begin()) + , CurrentStroka(nullptr) + { + } + + virtual ~TSplitIterator() { + delete CurrentStroka; + } + + inline TSizeTRegion Next() { + Y_ENSURE(!Eof(), TStringBuf("eof reached")); + return Split.Next(Pos); + } + + TStringBuf NextTok() { + if (Eof()) + return TStringBuf(); + TSizeTRegion region = Next(); + return TStringBuf(Split.Str + region.Begin, region.End - region.Begin); + } + + const TString& NextString() { + if (!CurrentStroka) + CurrentStroka = new TString(); + TSizeTRegion region = Next(); + CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1); + return *CurrentStroka; + } + + inline bool Eof() const { + return Pos >= Split.Len; + } + + TString GetTail() const { + return TString(Split.Str + Pos); + } + + void Skip(size_t count) { + for (size_t i = 0; i < count; ++i) + Next(); + } +}; + +using TSplitTokens = TVector<TString>; + +template <typename TSplit> +void Split(const TSplit& split, TSplitTokens* words) { + words->clear(); + TSplitIterator<TSplit> it(split); + while (!it.Eof()) + words->push_back(it.NextString()); +} diff --git a/library/cpp/deprecated/split/split_iterator_ut.cpp b/library/cpp/deprecated/split/split_iterator_ut.cpp new file mode 100644 index 0000000000..be5069c4be --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator_ut.cpp @@ -0,0 +1,152 @@ +#include "split_iterator.h" + +#include <library/cpp/testing/unittest/registar.h> + +class TSplitIteratorTest: public TTestBase { + UNIT_TEST_SUITE(TSplitIteratorTest); + UNIT_TEST(TestDelimiters); + UNIT_TEST(TestDelimitersSplit); + UNIT_TEST(TestDelimitersStrictSplit); + UNIT_TEST(TestTail); + UNIT_TEST(TestScreenedDelimitersSplit); + UNIT_TEST(TestSubstringDelimiter); + UNIT_TEST_SUITE_END(); + +public: + void TestDelimiters(); + void TestDelimitersSplit(); + void TestDelimitersStrictSplit(); + void TestTail(); + void TestScreenedDelimitersSplit(); + void TestSubstringDelimiter(); +}; + +void TSplitIteratorTest::TestDelimiters() { + TSplitDelimiters delims("@"); + for (int i = 0; i < 256; ++i) + if ('@' != i) { + UNIT_ASSERT(!delims.IsDelimiter((ui8)i)); + } else { + UNIT_ASSERT(delims.IsDelimiter((ui8)i)); + } +} + +void TSplitIteratorTest::TestDelimitersSplit() { + { + TString s = "1a3b45cd"; + TSplitDelimiters delims("abcd"); + TDelimitersSplit split(s, delims); + TSplitTokens tokens; + Split(split, &tokens); + TSplitTokens pattern = {"1", "3", "45"}; + UNIT_ASSERT(tokens == pattern); + } + { + TString s = "aaaaaa"; + TSplitDelimiters delims("abcd"); + TDelimitersSplit split(s, delims); + TSplitTokens tokens; + Split(split, &tokens); + TSplitTokens pattern = {}; + UNIT_ASSERT(tokens == pattern); + } +} + +void TSplitIteratorTest::TestDelimitersStrictSplit() { + { + TString s = "grp@2"; + TSplitDelimiters delims("@"); + TDelimitersStrictSplit split(s, delims); + TSplitTokens tokens; + Split(split, &tokens); + TSplitTokens pattern = {"grp", "2"}; + UNIT_ASSERT(tokens == pattern); + } + + { + TString s = "@grp@2@@"; + TSplitDelimiters delims("@"); + TDelimitersStrictSplit split(s, delims); + TSplitTokens tokens; + Split(split, &tokens); + TSplitTokens pattern = {"", "grp", "2", ""}; + UNIT_ASSERT(tokens == pattern); + } +} + +void TSplitIteratorTest::TestTail() { + TString s = "grp@2@4"; + TSplitDelimiters delims("@"); + TDelimitersSplit split(s, delims); + TDelimitersSplit::TIterator it = split.Iterator(); + UNIT_ASSERT_EQUAL(it.GetTail(), "grp@2@4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), "2@4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), "4"); + it.Next(); + UNIT_ASSERT_EQUAL(it.GetTail(), ""); +} + +void TSplitIteratorTest::TestScreenedDelimitersSplit() { + { + const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2"; + const TSplitDelimiters delims(" "); + const TSplitDelimiters screens("\"[]"); + const TScreenedDelimitersSplit splitter(s, delims, screens); + TScreenedDelimitersSplit::TIterator it = splitter.Iterator(); + UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\""); + UNIT_ASSERT_EQUAL(it.NextString(), "304"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "1219867687"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"0\""); + UNIT_ASSERT_EQUAL(it.NextString(), "3283"); + UNIT_ASSERT_EQUAL(it.NextString(), "2"); + } + { + const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2"; + const TSplitDelimiters delims(" "); + const TSplitDelimiters screens("\"[]"); + const TScreenedDelimitersSplit splitter(s.Data(), s.Size(), delims, screens); + TScreenedDelimitersSplit::TIterator it = splitter.Iterator(); + UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\""); + UNIT_ASSERT_EQUAL(it.NextString(), "304"); + UNIT_ASSERT_EQUAL(it.NextString(), "-"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); + UNIT_ASSERT_EQUAL(it.NextString(), "1219867687"); + UNIT_ASSERT_EQUAL(it.NextString(), "\"0\""); + UNIT_ASSERT_EQUAL(it.NextString(), "3283"); + UNIT_ASSERT_EQUAL(it.NextString(), "2"); + } +} + +void TSplitIteratorTest::TestSubstringDelimiter() { + const TString s = "a@@bb@@cc@c.d@@r"; + static const TSubstringSplitDelimiter delimiter("@@"); + const TSubstringSplit splitter(s, delimiter); + TSubstringSplit::TIterator it = splitter.Iterator(); + UNIT_ASSERT_EQUAL(it.NextString(), "a"); + UNIT_ASSERT_EQUAL(it.NextString(), "bb"); + UNIT_ASSERT_EQUAL(it.NextString(), "cc@c.d"); + UNIT_ASSERT_EQUAL(it.NextString(), "r"); + UNIT_ASSERT(it.Eof()); +} + +UNIT_TEST_SUITE_REGISTRATION(TSplitIteratorTest); diff --git a/library/cpp/deprecated/split/ya.make b/library/cpp/deprecated/split/ya.make new file mode 100644 index 0000000000..946e685ac8 --- /dev/null +++ b/library/cpp/deprecated/split/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +OWNER(wrg0ababd) + +SRCS( + delim_string_iter.cpp + split_iterator.cpp +) + +PEERDIR( + library/cpp/deprecated/kmp +) + +END() diff --git a/library/cpp/deprecated/ya.make b/library/cpp/deprecated/ya.make new file mode 100644 index 0000000000..6c753f68a9 --- /dev/null +++ b/library/cpp/deprecated/ya.make @@ -0,0 +1,49 @@ +RECURSE( + abstract_iterator + abstract_iterator/ut + accessors + accessors/ut + autoarray + base64 + datafile + dater_old + dater_old/ut + enum_codegen + enum_codegen/ut + fgood + fgood/ut + histogram + ipreg1 + ipreg1/ut + ipreg1/ut_full + ipreg1/util + iter + kmp + kmp/ut + mapped_file + mapped_file/ut + mbitmap + omni + text_norm + omni/print_omni + omni/usage + omni/ut + prog_options + prog_options/ut + sgi_hash + threadable + transgene + datawork + datawork/conf + calc_module + iterators_heap + parse_utils + parse_utils/ut + small_array + solartrie + solartrie/indexed_region/ut + solartrie/test + solartrie/test/tests + solartrie/ut + split +) |