diff options
| author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
|---|---|---|
| committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
| commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
| tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/deprecated | |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/deprecated')
33 files changed, 2462 insertions, 0 deletions
| diff --git a/library/cpp/deprecated/accessors/README.md b/library/cpp/deprecated/accessors/README.md new file mode 100644 index 00000000000..498f1203e00 --- /dev/null +++ b/library/cpp/deprecated/accessors/README.md @@ -0,0 +1,5 @@ +Unified accessors for Arcadia containers and user types. + +Accessors implemented here mix different kinds of access at the wrong abstraction level, so they shouldn't be used. + +If you want begin/end/size for your containers, use std::begin, std::end, std::size. If you need generic reserve / resize / clear / insert, just use appropriate container methods or do your own overloads in place. diff --git a/library/cpp/deprecated/accessors/accessors.cpp b/library/cpp/deprecated/accessors/accessors.cpp new file mode 100644 index 00000000000..7d37e586fa6 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors.cpp @@ -0,0 +1 @@ +#include "accessors.h" diff --git a/library/cpp/deprecated/accessors/accessors.h b/library/cpp/deprecated/accessors/accessors.h new file mode 100644 index 00000000000..6d4b1da3ad7 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors.h @@ -0,0 +1,83 @@ +#pragma once + +#include "accessors_impl.h" + +namespace NAccessors { +    /* + * Adds API compatibility between different types representing memory regions. + * + * i.e. this will work: + * + * TString t; + * const char* beg = NAccessors::Begin(t); // t.begin() + * const char* end = NAccessors::End(t); // t.end() + * size_t sz = NAccessors::Size(t); // t.size() + * + * as well as this: + * + * ui64 t; + * const ui64* beg = NAccessors::Begin(t); // &t + * const ui64* end = NAccessors::End(t); // &t + 1 + * size_t sz = NAccessors::Size(t); // 1 + * + * Both will give you begin, end and size of the underlying memory region. + */ + +    template <typename T> +    inline const typename TMemoryTraits<T>::TElementType* Begin(const T& t) { +        return NPrivate::TBegin<T>::Get(t); +    } + +    template <typename T> +    inline const typename TMemoryTraits<T>::TElementType* End(const T& t) { +        return NPrivate::TEnd<T>::Get(t); +    } + +    template <typename T> +    inline size_t Size(const T& t) { +        return End(t) - Begin(t); +    } + +    /** + * This gives some unification in terms of memory manipulation. + */ + +    template <typename T> +    inline void Reserve(T& t, size_t sz) { +        NPrivate::TReserve<T>::Do(t, sz); +    } + +    template <typename T> +    inline void Resize(T& t, size_t sz) { +        NPrivate::TResize<T>::Do(t, sz); +    } + +    template <typename T> +    inline void Clear(T& t) { +        NPrivate::TClear<T, false>::Do(t); +    } + +    template <typename T> +    inline void Init(T& t) { +        NPrivate::TClear<T, true>::Do(t); +    } + +    template <typename T> +    inline void Append(T& t, const typename TMemoryTraits<T>::TElementType& v) { +        NPrivate::TAppend<T>::Do(t, v); +    } + +    template <typename T> +    inline void Append(T& t, +                       const typename TMemoryTraits<T>::TElementType* beg, +                       const typename TMemoryTraits<T>::TElementType* end) { +        NPrivate::TAppendRegion<T>::Do(t, beg, end); +    } + +    template <typename T> +    inline void Assign(T& t, +                       const typename TMemoryTraits<T>::TElementType* beg, +                       const typename TMemoryTraits<T>::TElementType* end) { +        NPrivate::TAssign<T>::Do(t, beg, end); +    } +} diff --git a/library/cpp/deprecated/accessors/accessors_impl.cpp b/library/cpp/deprecated/accessors/accessors_impl.cpp new file mode 100644 index 00000000000..0bf74cab7b2 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_impl.cpp @@ -0,0 +1 @@ +#include "accessors_impl.h" diff --git a/library/cpp/deprecated/accessors/accessors_impl.h b/library/cpp/deprecated/accessors/accessors_impl.h new file mode 100644 index 00000000000..6b2b987351f --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_impl.h @@ -0,0 +1,420 @@ +#pragma once + +#include "memory_traits.h" + +namespace NAccessors { +    namespace NPrivate { +        template <typename Ta> +        struct TMemoryAccessorBase { +            enum { +                SimpleMemory = TMemoryTraits<Ta>::SimpleMemory, +                ContinuousMemory = TMemoryTraits<Ta>::ContinuousMemory, +            }; + +            struct TBadAccessor; +        }; + +        template <typename Ta> +        struct TBegin: public TMemoryAccessorBase<Ta> { +            using TElementType = typename TMemoryTraits<Ta>::TElementType; + +            template <typename Tb> +            struct TNoMemoryIndirectionBegin { +                static const TElementType* Get(const Tb& b) { +                    return (const TElementType*)&b; +                } +            }; + +            template <typename Tb> +            struct TIndirectMemoryRegionBegin { +                Y_HAS_MEMBER(Begin); +                Y_HAS_MEMBER(begin); + +                template <typename Tc> +                struct TByBegin { +                    static const TElementType* Get(const Tc& b) { +                        return (const TElementType*)b.Begin(); +                    } +                }; + +                template <typename Tc> +                struct TBybegin { +                    static const TElementType* Get(const Tc& b) { +                        return (const TElementType*)b.begin(); +                    } +                }; + +                using TGet = std::conditional_t<THasBegin<Tb>::value, TByBegin<Tb>, TBybegin<Tb>>; + +                static const TElementType* Get(const Tb& b) { +                    return TGet::Get(b); +                } +            }; + +            using TGet = std::conditional_t< +                TMemoryAccessorBase<Ta>::SimpleMemory, +                TNoMemoryIndirectionBegin<Ta>, +                std::conditional_t< +                    TMemoryAccessorBase<Ta>::ContinuousMemory, +                    TIndirectMemoryRegionBegin<Ta>, +                    typename TMemoryAccessorBase<Ta>::TBadAccessor>>; + +            static const TElementType* Get(const Ta& b) { +                return TGet::Get(b); +            } +        }; + +        template <typename Ta> +        struct TEnd: public TMemoryAccessorBase<Ta> { +            using TElementType = typename TMemoryTraits<Ta>::TElementType; + +            template <typename Tb> +            struct TNoMemoryIndirectionEnd { +                static const TElementType* Get(const Tb& b) { +                    return (const TElementType*)(&b + 1); +                } +            }; + +            template <typename Tb> +            struct TIndirectMemoryRegionEnd { +                Y_HAS_MEMBER(End); +                Y_HAS_MEMBER(end); + +                template <typename Tc> +                struct TByEnd { +                    static const TElementType* Get(const Tc& b) { +                        return (const TElementType*)b.End(); +                    } +                }; + +                template <typename Tc> +                struct TByend { +                    static const TElementType* Get(const Tc& b) { +                        return (const TElementType*)b.end(); +                    } +                }; + +                using TGet = std::conditional_t<THasEnd<Tb>::value, TByEnd<Tb>, TByend<Tb>>; + +                static const TElementType* Get(const Tb& b) { +                    return TGet::Get(b); +                } +            }; + +            using TGet = std::conditional_t< +                TMemoryAccessorBase<Ta>::SimpleMemory, +                TNoMemoryIndirectionEnd<Ta>, +                std::conditional_t< +                    TMemoryAccessorBase<Ta>::ContinuousMemory, +                    TIndirectMemoryRegionEnd<Ta>, +                    typename TMemoryAccessorBase<Ta>::TBadAccessor>>; + +            static const TElementType* Get(const Ta& b) { +                return TGet::Get(b); +            } +        }; + +        template <typename Ta, bool Init> +        struct TClear: public TMemoryAccessorBase<Ta> { +            template <typename Tb> +            struct TNoMemoryIndirectionClear { +                static void Do(Tb& b) { +                    Zero(b); +                } +            }; + +            template <typename Tb> +            struct TIndirectMemoryRegionClear { +                Y_HAS_MEMBER(Clear); +                Y_HAS_MEMBER(clear); + +                template <typename Tc> +                struct TByClear { +                    static void Do(Tc& b) { +                        b.Clear(); +                    } +                }; + +                template <typename Tc> +                struct TByclear { +                    static void Do(Tc& b) { +                        b.clear(); +                    } +                }; + +                template <typename Tc> +                struct TByNone { +                    static void Do(Tc& b) { +                        if (!Init) +                            b = Tc(); +                    } +                }; + +                using TDo = std::conditional_t< +                    THasClear<Tb>::value, +                    TByClear<Tb>, +                    std::conditional_t< +                        THasclear<Tb>::value, +                        TByclear<Tb>, +                        TByNone<Tb>>>; + +                static void Do(Tb& b) { +                    TDo::Do(b); +                } +            }; + +            using TDo = std::conditional_t<TMemoryAccessorBase<Ta>::SimpleMemory, TNoMemoryIndirectionClear<Ta>, TIndirectMemoryRegionClear<Ta>>; + +            static void Do(Ta& b) { +                TDo::Do(b); +            } +        }; + +        template <typename Tb> +        struct TReserve { +            Y_HAS_MEMBER(Reserve); +            Y_HAS_MEMBER(reserve); + +            template <typename Tc> +            struct TByReserve { +                static void Do(Tc& b, size_t sz) { +                    b.Reserve(sz); +                } +            }; + +            template <typename Tc> +            struct TByreserve { +                static void Do(Tc& b, size_t sz) { +                    b.reserve(sz); +                } +            }; + +            template <typename Tc> +            struct TByNone { +                static void Do(Tc&, size_t) { +                } +            }; + +            using TDo = std::conditional_t< +                THasReserve<Tb>::value, +                TByReserve<Tb>, +                std::conditional_t< +                    THasreserve<Tb>::value, +                    TByreserve<Tb>, +                    TByNone<Tb>>>; + +            static void Do(Tb& b, size_t sz) { +                TDo::Do(b, sz); +            } +        }; + +        template <typename Tb> +        struct TResize { +            Y_HAS_MEMBER(Resize); +            Y_HAS_MEMBER(resize); + +            template <typename Tc> +            struct TByResize { +                static void Do(Tc& b, size_t sz) { +                    b.Resize(sz); +                } +            }; + +            template <typename Tc> +            struct TByresize { +                static void Do(Tc& b, size_t sz) { +                    b.resize(sz); +                } +            }; + +            using TDo = std::conditional_t<THasResize<Tb>::value, TByResize<Tb>, TByresize<Tb>>; + +            static void Do(Tb& b, size_t sz) { +                TDo::Do(b, sz); +            } +        }; + +        template <typename Tb> +        struct TAppend { +            Y_HAS_MEMBER(Append); +            Y_HAS_MEMBER(append); +            Y_HAS_MEMBER(push_back); + +            template <typename Tc> +            struct TByAppend { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType& val) { +                    b.Append(val); +                } +            }; + +            template <typename Tc> +            struct TByappend { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType& val) { +                    b.append(val); +                } +            }; + +            template <typename Tc> +            struct TBypush_back { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType& val) { +                    b.push_back(val); +                } +            }; + +            using TDo = std::conditional_t< +                THasAppend<Tb>::value, +                TByAppend<Tb>, +                std::conditional_t< +                    THasappend<Tb>::value, +                    TByappend<Tb>, +                    TBypush_back<Tb>>>; + +            using TElementType = typename TMemoryTraits<Tb>::TElementType; + +            static void Do(Tb& b, const TElementType& val) { +                TDo::Do(b, val); +            } +        }; + +        template <typename Tb> +        struct TAppendRegion { +            Y_HAS_MEMBER(Append); +            Y_HAS_MEMBER(append); +            Y_HAS_MEMBER(insert); + +            template <typename Tc> +            struct TByAppend { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                    b.Append(beg, end); +                } +            }; + +            template <typename Tc> +            struct TByappend { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                    b.append(beg, end); +                } +            }; + +            template <typename Tc> +            struct TByinsert { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                    b.insert(b.end(), beg, end); +                } +            }; + +            template <typename Tc> +            struct TByNone { +                using TElementType = typename TMemoryTraits<Tc>::TElementType; + +                static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                    for (const TElementType* it = beg; it != end; ++it) +                        TAppend<Tc>::Do(b, *it); +                } +            }; + +            using TDo = std::conditional_t< +                THasAppend<Tb>::value, +                TByAppend<Tb>, +                std::conditional_t< +                    THasappend<Tb>::value, +                    TByappend<Tb>, +                    std::conditional_t< +                        THasinsert<Tb>::value, +                        TByinsert<Tb>, +                        TByNone<Tb>>>>; + +            using TElementType = typename TMemoryTraits<Tb>::TElementType; + +            static void Do(Tb& b, const TElementType* beg, const TElementType* end) { +                TDo::Do(b, beg, end); +            } +        }; + +        template <typename Ta> +        struct TAssign: public TMemoryAccessorBase<Ta> { +            using TElementType = typename TMemoryTraits<Ta>::TElementType; + +            template <typename Tb> +            struct TNoMemoryIndirectionAssign { +                static void Do(Tb& b, const TElementType* beg, const TElementType* end) { +                    if (sizeof(Tb) == sizeof(TElementType) && end - beg > 0) { +                        memcpy(&b, beg, sizeof(Tb)); +                    } else if (end - beg > 0) { +                        memcpy(&b, beg, Min<size_t>((end - beg) * sizeof(TElementType), sizeof(Tb))); +                    } else { +                        Zero(b); +                    } +                } +            }; + +            template <typename Tb> +            struct TIndirectMemoryRegionAssign { +                Y_HAS_MEMBER(Assign); +                Y_HAS_MEMBER(assign); + +                template <typename Tc> +                struct TByAssign { +                    static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                        b.Assign(beg, end); +                    } +                }; + +                template <typename Tc> +                struct TByassign { +                    static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                        b.assign(beg, end); +                    } +                }; + +                template <typename Tc> +                struct TByClearAppend { +                    static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                        TClear<Tc, false>::Do(b); +                        TAppendRegion<Tc>::Do(b, beg, end); +                    } +                }; + +                template <typename Tc> +                struct TByConstruction { +                    static void Do(Tc& b, const TElementType* beg, const TElementType* end) { +                        b = Tc(beg, end); +                    } +                }; + +                using TDo = std::conditional_t< +                    THasAssign<Tb>::value, +                    TByAssign<Tb>, +                    std::conditional_t< +                        THasassign<Tb>::value, +                        TByassign<Tb>, +                        std::conditional_t< +                            TMemoryTraits<Tb>::OwnsMemory, +                            TByClearAppend<Tb>, +                            TByConstruction<Tb>>>>; + +                static void Do(Tb& b, const TElementType* beg, const TElementType* end) { +                    TDo::Do(b, beg, end); +                } +            }; + +            using TDo = std::conditional_t<TMemoryAccessorBase<Ta>::SimpleMemory, TNoMemoryIndirectionAssign<Ta>, TIndirectMemoryRegionAssign<Ta>>; + +            static void Do(Ta& b, const TElementType* beg, const TElementType* end) { +                TDo::Do(b, beg, end); +            } +        }; +    } +} diff --git a/library/cpp/deprecated/accessors/accessors_ut.cpp b/library/cpp/deprecated/accessors/accessors_ut.cpp new file mode 100644 index 00000000000..a9bdc9fcc42 --- /dev/null +++ b/library/cpp/deprecated/accessors/accessors_ut.cpp @@ -0,0 +1,92 @@ +#include "accessors.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/buffer.h> +#include <util/generic/vector.h> + +#include <array> + +class TAccessorsTest: public TTestBase { +    UNIT_TEST_SUITE(TAccessorsTest); +    UNIT_TEST(TestAccessors); +    UNIT_TEST_SUITE_END(); + +private: +    template <typename T> +    void TestRead(const T& t, const char* comm) { +        const char* beg = (const char*)NAccessors::Begin(t); +        const char* end = (const char*)NAccessors::End(t); +        long sz = NAccessors::Size(t) * sizeof(typename TMemoryTraits<T>::TElementType); + +        UNIT_ASSERT_VALUES_EQUAL_C(end - beg, sz, comm); +    } + +    template <typename T> +    void TestWrite(const char* comm) { +        typename TMemoryTraits<T>::TElementType val[4] = {'t', 'e', 's', 't'}; +        T t; +        NAccessors::Init(t); +        NAccessors::Reserve(t, 6); + +        size_t sz = NAccessors::Size(t); +        UNIT_ASSERT_VALUES_EQUAL_C(0u, sz, comm); + +        NAccessors::Append(t, 'a'); +        sz = NAccessors::Size(t); +        UNIT_ASSERT_VALUES_EQUAL_C(1u, sz, comm); + +        NAccessors::Append(t, val, val + 4); +        sz = NAccessors::Size(t); +        UNIT_ASSERT_VALUES_EQUAL_C(5u, sz, comm); + +        NAccessors::Clear(t); + +        sz = NAccessors::Size(t); +        UNIT_ASSERT_VALUES_EQUAL_C(0u, sz, comm); +    } + +    void TestAccessors() { +        TestRead('a', "char"); +        TestRead(1, "int"); + +        int t[4] = {0, 1, 2, 3}; + +        TestRead(t, "int[4]"); + +        TStringBuf sbuf = "test"; + +        TestRead(sbuf, "TStringBuf"); + +        TUtf16String wtr; +        wtr.resize(10, 1024); + +        TestRead(wtr, "TUtf16String"); + +        TBuffer buf; +        buf.Resize(30); + +        TestRead(buf, "TBuffer"); + +        TVector<ui64> vec(10, 100); + +        TestRead(vec, "TVector<ui64>"); + +        TestWrite<TString>("TString"); +        TestWrite<TVector<char>>("TVector<char>"); +        TestWrite<TBuffer>("TBuffer"); +        TestWrite<TVector<ui64>>("TVector<ui64>"); +        TestWrite<TUtf16String>("TUtf16String"); + +        std::array<TString, 10> sarr; +        NAccessors::Init(sarr); +        NAccessors::Clear(sarr); + +        std::array<char, 10> carr; +        NAccessors::Init(carr); +        NAccessors::Clear(carr); +        TestRead(carr, "std::array<char, 10>"); +    } +}; + +UNIT_TEST_SUITE_REGISTRATION(TAccessorsTest) diff --git a/library/cpp/deprecated/accessors/memory_traits.cpp b/library/cpp/deprecated/accessors/memory_traits.cpp new file mode 100644 index 00000000000..df53026cf4f --- /dev/null +++ b/library/cpp/deprecated/accessors/memory_traits.cpp @@ -0,0 +1 @@ +#include "memory_traits.h" diff --git a/library/cpp/deprecated/accessors/memory_traits.h b/library/cpp/deprecated/accessors/memory_traits.h new file mode 100644 index 00000000000..aa837705d3d --- /dev/null +++ b/library/cpp/deprecated/accessors/memory_traits.h @@ -0,0 +1,168 @@ +#pragma once + +#include <util/generic/array_ref.h> +#include <util/memory/blob.h> +#include <util/memory/tempbuf.h> +#include <util/generic/buffer.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/typetraits.h> + +#include <array> +#include <string> +#include <utility> + +template <typename T> +struct TMemoryTraits { +    enum { +        SimpleMemory = std::is_arithmetic<T>::value, +        ContinuousMemory = SimpleMemory, +        OwnsMemory = SimpleMemory, +    }; + +    using TElementType = T; +}; + +template <typename T, size_t n> +struct TMemoryTraits<T[n]> { +    enum { +        SimpleMemory = TMemoryTraits<T>::SimpleMemory, +        ContinuousMemory = SimpleMemory, +        OwnsMemory = SimpleMemory, +    }; + +    using TElementType = T; +}; + +template <typename T, size_t n> +struct TMemoryTraits<std::array<T, n>> { +    enum { +        SimpleMemory = TMemoryTraits<T>::SimpleMemory, +        ContinuousMemory = SimpleMemory, +        OwnsMemory = SimpleMemory, +    }; + +    using TElementType = T; +}; + +template <typename A, typename B> +struct TMemoryTraits<std::pair<A, B>> { +    enum { +        SimpleMemory = TMemoryTraits<A>::SimpleMemory && TMemoryTraits<B>::SimpleMemory, +        ContinuousMemory = SimpleMemory, +        OwnsMemory = SimpleMemory, +    }; + +    using TElementType = std::pair<A, B>; +}; + +template <> +struct TMemoryTraits<TBuffer> { +    enum { +        SimpleMemory = false, +        ContinuousMemory = true, +        OwnsMemory = true, +    }; + +    using TElementType = char; +}; + +template <> +struct TMemoryTraits<TTempBuf> { +    enum { +        SimpleMemory = false, +        ContinuousMemory = true, +        OwnsMemory = true, +    }; + +    using TElementType = char; +}; + +template <> +struct TMemoryTraits< ::TBlob> { +    enum { +        SimpleMemory = false, +        ContinuousMemory = true, +        OwnsMemory = true, +    }; + +    using TElementType = char; +}; + +template <typename T> +struct TElementDependentMemoryTraits { +    enum { +        SimpleMemory = false, +        ContinuousMemory = TMemoryTraits<T>::SimpleMemory, +    }; + +    using TElementType = T; +}; + +template <typename T, typename TAlloc> +struct TMemoryTraits<std::vector<T, TAlloc>>: public TElementDependentMemoryTraits<T> { +    enum { +        OwnsMemory = TMemoryTraits<T>::OwnsMemory +    }; +}; + +template <typename T, typename TAlloc> +struct TMemoryTraits<TVector<T, TAlloc>>: public TMemoryTraits<std::vector<T, TAlloc>> { +}; + +template <typename T> +struct TMemoryTraits<TTempArray<T>>: public TElementDependentMemoryTraits<T> { +    enum { +        OwnsMemory = TMemoryTraits<T>::OwnsMemory +    }; +}; + +template <typename T, typename TCharTraits, typename TAlloc> +struct TMemoryTraits<std::basic_string<T, TCharTraits, TAlloc>>: public TElementDependentMemoryTraits<T> { +    enum { +        OwnsMemory = TMemoryTraits<T>::OwnsMemory +    }; +}; + +template <> +struct TMemoryTraits<TString>: public TElementDependentMemoryTraits<char> { +    enum { +        OwnsMemory = true +    }; +}; + +template <> +struct TMemoryTraits<TUtf16String>: public TElementDependentMemoryTraits<wchar16> { +    enum { +        OwnsMemory = true +    }; +}; + +template <typename T> +struct TMemoryTraits<TArrayRef<T>>: public TElementDependentMemoryTraits<T> { +    enum { +        OwnsMemory = false +    }; +}; + +template <typename TCharType, typename TCharTraits> +struct TMemoryTraits<TBasicStringBuf<TCharType, TCharTraits>>: public TElementDependentMemoryTraits<TCharType> { +    enum { +        OwnsMemory = false +    }; +}; + +template <> +struct TMemoryTraits<TStringBuf>: public TElementDependentMemoryTraits<char> { +    enum { +        OwnsMemory = false +    }; +}; + +template <> +struct TMemoryTraits<TWtringBuf>: public TElementDependentMemoryTraits<wchar16> { +    enum { +        OwnsMemory = false +    }; +}; diff --git a/library/cpp/deprecated/accessors/ut/ya.make b/library/cpp/deprecated/accessors/ut/ya.make new file mode 100644 index 00000000000..5ea976566f6 --- /dev/null +++ b/library/cpp/deprecated/accessors/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/deprecated/accessors) + +OWNER(velavokr) + +SRCS( +    accessors_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/accessors/ya.make b/library/cpp/deprecated/accessors/ya.make new file mode 100644 index 00000000000..e322026a1cb --- /dev/null +++ b/library/cpp/deprecated/accessors/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +OWNER(elric) + +SRCS( +    accessors.cpp +    accessors_impl.cpp +    memory_traits.cpp +) + +END() diff --git a/library/cpp/deprecated/enum_codegen/README.md b/library/cpp/deprecated/enum_codegen/README.md new file mode 100644 index 00000000000..3bdac29af19 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/README.md @@ -0,0 +1,3 @@ +Some macros for generating enum <-> string conversions. + +Just use GENERATE_ENUM_SERIALIZATION. See https://wiki.yandex-team.ru/yatool/HowToWriteYaMakeFiles/#generate-enum diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen.cpp b/library/cpp/deprecated/enum_codegen/enum_codegen.cpp new file mode 100644 index 00000000000..3931b05924a --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen.cpp @@ -0,0 +1 @@ +#include "enum_codegen.h" diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen.h b/library/cpp/deprecated/enum_codegen/enum_codegen.h new file mode 100644 index 00000000000..dfb04ecac29 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen.h @@ -0,0 +1,33 @@ +#pragma once + +/// see enum_codegen_ut.cpp for examples + +#define ENUM_VALUE_GEN(name, value, ...) name = value, +#define ENUM_VALUE_GEN_NO_VALUE(name, ...) name, + +#define ENUM_TO_STRING_IMPL_ITEM(name, ...) \ +    case name:                              \ +        return #name; +#define ENUM_LTLT_IMPL_ITEM(name, ...) \ +    case name:                         \ +        os << #name;                   \ +        break; + +#define ENUM_TO_STRING(type, MAP)                                            \ +    static inline const char* ToCString(type value) {                        \ +        switch (value) {                                                     \ +            MAP(ENUM_TO_STRING_IMPL_ITEM)                                    \ +            default:                                                         \ +                return "UNKNOWN";                                            \ +        }                                                                    \ +    }                                                                        \ +                                                                             \ +    static inline IOutputStream& operator<<(IOutputStream& os, type value) { \ +        switch (value) {                                                     \ +            MAP(ENUM_LTLT_IMPL_ITEM)                                         \ +            default:                                                         \ +                os << int(value);                                            \ +                break;                                                       \ +        }                                                                    \ +        return os;                                                           \ +    } diff --git a/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp b/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp new file mode 100644 index 00000000000..f8f1c9b6dff --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/enum_codegen_ut.cpp @@ -0,0 +1,40 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "enum_codegen.h" + +#include <util/string/builder.h> + +#define COLOR_MAP(XX) \ +    XX(RED)           \ +    XX(GREEN)         \ +    XX(BLUE) + +enum EColor { +    COLOR_MAP(ENUM_VALUE_GEN_NO_VALUE) +}; + +ENUM_TO_STRING(EColor, COLOR_MAP) + +#define MULTIPLIER_MAP(XX) \ +    XX(GB, 9)              \ +    XX(MB, 6)              \ +    XX(KB, 3) + +enum EMultiplier { +    MULTIPLIER_MAP(ENUM_VALUE_GEN) +}; + +ENUM_TO_STRING(EMultiplier, MULTIPLIER_MAP) + +Y_UNIT_TEST_SUITE(EnumCodegen) { +    Y_UNIT_TEST(GenWithValue) { +        UNIT_ASSERT_VALUES_EQUAL(6, MB); +    } + +    Y_UNIT_TEST(ToCString) { +        UNIT_ASSERT_VALUES_EQUAL("RED", ToCString(RED)); +        UNIT_ASSERT_VALUES_EQUAL("BLUE", ToCString(BLUE)); +        UNIT_ASSERT_VALUES_EQUAL("GREEN", (TStringBuilder() << GREEN)); +        UNIT_ASSERT_VALUES_EQUAL("GB", ToCString(GB)); +    } +} diff --git a/library/cpp/deprecated/enum_codegen/ut/ya.make b/library/cpp/deprecated/enum_codegen/ut/ya.make new file mode 100644 index 00000000000..32e7ad77a25 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/ut/ya.make @@ -0,0 +1,15 @@ +UNITTEST() + +OWNER(g:util) + +SRCDIR(library/cpp/deprecated/enum_codegen) + +PEERDIR( +    library/cpp/deprecated/enum_codegen +) + +SRCS( +    enum_codegen_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/enum_codegen/ya.make b/library/cpp/deprecated/enum_codegen/ya.make new file mode 100644 index 00000000000..1df07d21929 --- /dev/null +++ b/library/cpp/deprecated/enum_codegen/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(elric) + +SRCS( +    enum_codegen.cpp +) + +END() diff --git a/library/cpp/deprecated/kmp/kmp.cpp b/library/cpp/deprecated/kmp/kmp.cpp new file mode 100644 index 00000000000..d02074c94ae --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp.cpp @@ -0,0 +1,21 @@ +#include "kmp.h" + +#include <util/generic/yexception.h> + +TKMPMatcher::TKMPMatcher(const char* patternBegin, const char* patternEnd) +    : Pattern(patternBegin, patternEnd) +{ +    ComputePrefixFunction(); +} + +TKMPMatcher::TKMPMatcher(const TString& pattern) +    : Pattern(pattern) +{ +    ComputePrefixFunction(); +} + +void TKMPMatcher::ComputePrefixFunction() { +    ssize_t* pf; +    ::ComputePrefixFunction(Pattern.data(), Pattern.data() + Pattern.size(), &pf); +    PrefixFunction.Reset(pf); +} diff --git a/library/cpp/deprecated/kmp/kmp.h b/library/cpp/deprecated/kmp/kmp.h new file mode 100644 index 00000000000..a7f72eece6a --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp.h @@ -0,0 +1,108 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +template <typename T> +void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) { +    Y_ENSURE(begin != end, TStringBuf("empty pattern")); +    ssize_t len = end - begin; +    TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]); +    ssize_t i = 0; +    ssize_t j = -1; +    resultHolder[0] = -1; +    while (i < len) { +        while ((j >= 0) && (begin[j] != begin[i])) +            j = resultHolder[j]; +        ++i; +        ++j; +        Y_ASSERT(i >= 0); +        Y_ASSERT(j >= 0); +        Y_ASSERT(j < len); +        if ((i < len) && (begin[i] == begin[j])) +            resultHolder[i] = resultHolder[j]; +        else +            resultHolder[i] = j; +    } +    *result = resultHolder.Release(); +} + +class TKMPMatcher { +private: +    TArrayHolder<ssize_t> PrefixFunction; +    TString Pattern; + +    void ComputePrefixFunction(); + +public: +    TKMPMatcher(const char* patternBegin, const char* patternEnd); +    TKMPMatcher(const TString& pattern); + +    bool SubStr(const char* begin, const char* end, const char*& result) const { +        Y_ASSERT(begin <= end); +        ssize_t m = Pattern.size(); +        ssize_t n = end - begin; +        ssize_t i, j; +        for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) { +            while ((j >= 0) && (Pattern[j] != begin[i])) +                j = PrefixFunction[j]; +        } +        if (j == m) { +            result = begin + i - m; +            return true; +        } else { +            return false; +        } +    } +}; + +template <typename T> +class TKMPStreamMatcher { +public: +    class ICallback { +    public: +        virtual void OnMatch(const T* begin, const T* end) = 0; +        virtual ~ICallback() = default; +    }; + +private: +    ICallback* Callback; +    TArrayHolder<ssize_t> PrefixFunction; +    using TTVector = TVector<T>; +    TTVector Pattern; +    ssize_t State; +    TTVector Candidate; + +public: +    TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback) +        : Callback(callback) +        , Pattern(patternBegin, patternEnd) +        , State(0) +        , Candidate(Pattern.size()) +    { +        ssize_t* pf; +        ComputePrefixFunction(patternBegin, patternEnd, &pf); +        PrefixFunction.Reset(pf); +    } + +    void Push(const T& symbol) { +        while ((State >= 0) && (Pattern[State] != symbol)) { +            Y_ASSERT(State <= (ssize_t) Pattern.size()); +            State = PrefixFunction[State]; +            Y_ASSERT(State <= (ssize_t) Pattern.size()); +        } +        if (State >= 0) +            Candidate[State] = symbol; +        ++State; +        if (State == (ssize_t) Pattern.size()) { +            Callback->OnMatch(Candidate.begin(), Candidate.end()); +            State = 0; +        } +    } + +    void Clear() { +        State = 0; +    } +}; diff --git a/library/cpp/deprecated/kmp/kmp_ut.cpp b/library/cpp/deprecated/kmp/kmp_ut.cpp new file mode 100644 index 00000000000..c2eda83c57a --- /dev/null +++ b/library/cpp/deprecated/kmp/kmp_ut.cpp @@ -0,0 +1,80 @@ +#include "kmp.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/output.h> + +static TVector<int> FindAll(const TString& pattern, const TString& string) { +    TVector<int> result; +    TKMPMatcher kmp(pattern); +    const char* pResult; +    const char* begin = string.begin(); +    const char* end = string.end(); +    while (kmp.SubStr(begin, end, pResult)) { +        result.push_back(int(pResult - string.data())); +        begin = pResult + pattern.size(); +    } +    return result; +} + +class TTestKMP: public TTestBase { +    UNIT_TEST_SUITE(TTestKMP); +    UNIT_TEST(Test); +    UNIT_TEST(TestStream); +    UNIT_TEST_SUITE_END(); + +public: +    void Test() { +        TVector<int> ans = {0, 2}; +        UNIT_ASSERT_EQUAL(FindAll("a", "aba"), ans); +        ans = {0}; +        UNIT_ASSERT_EQUAL(FindAll("aba", "aba"), ans); +        ans.clear(); +        UNIT_ASSERT_EQUAL(FindAll("abad", "aba"), ans); +        ans = {0, 2}; +        UNIT_ASSERT_EQUAL(FindAll("ab", "abab"), ans); +    } + +    class TKMPSimpleCallback: public TKMPStreamMatcher<int>::ICallback { +    private: +        int* Begin; +        int* End; +        int Count; + +    public: +        TKMPSimpleCallback(int* begin, int* end) +            : Begin(begin) +            , End(end) +            , Count(0) +        { +        } + +        void OnMatch(const int* begin, const int* end) override { +            UNIT_ASSERT_EQUAL(end - begin, End - Begin); +            const int* p0 = Begin; +            const int* p1 = begin; +            while (p0 < End) { +                UNIT_ASSERT_EQUAL(*p0, *p1); +                ++p0; +                ++p1; +            } +            ++Count; +        } + +        int GetCount() const { +            return Count; +        } +    }; + +    void TestStream() { +        int pattern[] = {2, 3}; +        int data[] = {1, 2, 3, 5, 2, 2, 3, 2, 4, 3, 2}; +        TKMPSimpleCallback callback(pattern, pattern + 2); +        TKMPStreamMatcher<int> matcher(pattern, pattern + 2, &callback); +        for (auto& i : data) +            matcher.Push(i); +        UNIT_ASSERT_EQUAL(2, callback.GetCount()); +    } +}; + +UNIT_TEST_SUITE_REGISTRATION(TTestKMP); diff --git a/library/cpp/deprecated/kmp/ut/ya.make b/library/cpp/deprecated/kmp/ut/ya.make new file mode 100644 index 00000000000..9c54ee27154 --- /dev/null +++ b/library/cpp/deprecated/kmp/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/deprecated/kmp) + +OWNER(g:util) + +SRCS( +    kmp_ut.cpp +) + +END() diff --git a/library/cpp/deprecated/kmp/ya.make b/library/cpp/deprecated/kmp/ya.make new file mode 100644 index 00000000000..7c1c5579344 --- /dev/null +++ b/library/cpp/deprecated/kmp/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +OWNER(g:util) + +SRCS( +    kmp.cpp +    kmp.h +) + +END() diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp new file mode 100644 index 00000000000..b0e4511299b --- /dev/null +++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp @@ -0,0 +1,64 @@ +#include "mapped_file.h" + +#include <util/generic/yexception.h> +#include <util/system/defaults.h> +#include <util/system/hi_lo.h> +#include <util/system/filemap.h> + +TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) { +    Map_ = map; +    i64 len = Map_->Length(); +    if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32)) +        ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large"; + +    Map_->Map(0, static_cast<size_t>(len)); +} + +TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) +    : Map_(nullptr) +{ +    init(file, om, dbgName); +} + +void TMappedFile::precharge(size_t off, size_t size) const { +    if (!Map_) +        return; + +    Map_->Precharge(off, size); +} + +void TMappedFile::init(const TString& name) { +    THolder<TFileMap> map(new TFileMap(name)); +    TMappedFile newFile(map.Get(), name.data()); +    Y_UNUSED(map.Release()); +    newFile.swap(*this); +    newFile.term(); +} + +void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) { +    THolder<TFileMap> map(new TFileMap(name, length, om)); +    TMappedFile newFile(map.Get(), name.data()); +    Y_UNUSED(map.Release()); +    newFile.swap(*this); +    newFile.term(); +} + +void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) { +    THolder<TFileMap> map(new TFileMap(file, om)); +    TMappedFile newFile(map.Get(), dbgName); +    Y_UNUSED(map.Release()); +    newFile.swap(*this); +    newFile.term(); +} + +void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { +    THolder<TFileMap> map(new TFileMap(name, om)); +    TMappedFile newFile(map.Get(), name.data()); +    Y_UNUSED(map.Release()); +    newFile.swap(*this); +    newFile.term(); +} + +void TMappedFile::flush() { +    Map_->Flush(); +} diff --git a/library/cpp/deprecated/mapped_file/mapped_file.h b/library/cpp/deprecated/mapped_file/mapped_file.h new file mode 100644 index 00000000000..45859ed65a0 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/mapped_file.h @@ -0,0 +1,72 @@ +#pragma once + +#include <util/generic/flags.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/utility.h> +#include <util/generic/yexception.h> +#include <util/system/align.h> +#include <util/system/file.h> +#include <util/system/filemap.h> +#include <util/system/yassert.h> + +#include <cstdio> +#include <new> + +/// Deprecated (by pg@), use TFileMap or TMemoryMap instead +class TMappedFile { +private: +    TFileMap* Map_; + +private: +    TMappedFile(TFileMap* map, const char* dbgName); + +public: +    TMappedFile() { +        Map_ = nullptr; +    } + +    ~TMappedFile() { +        term(); +    } + +    explicit TMappedFile(const TString& name) { +        Map_ = nullptr; +        init(name, TFileMap::oRdOnly); +    } + +    TMappedFile(const TFile& file, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); + +    void init(const TString& name); + +    void init(const TString& name, TFileMap::EOpenMode om); + +    void init(const TString& name, size_t length, TFileMap::EOpenMode om); + +    void init(const TFile&, TFileMap::EOpenMode om = TFileMap::oRdOnly, const char* dbgName = "unknown"); + +    void flush(); + +    void term() { +        if (Map_) { +            Map_->Unmap(); +            delete Map_; +            Map_ = nullptr; +        } +    } + +    size_t getSize() const { +        return (Map_ ? Map_->MappedSize() : 0); +    } + +    void* getData(size_t pos = 0) const { +        Y_ASSERT(!Map_ || (pos <= getSize())); +        return (Map_ ? (void*)((unsigned char*)Map_->Ptr() + pos) : nullptr); +    } + +    void precharge(size_t pos = 0, size_t size = (size_t)-1) const; + +    void swap(TMappedFile& file) noexcept { +        DoSwap(Map_, file.Map_); +    } +}; diff --git a/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp b/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp new file mode 100644 index 00000000000..afbd5b33582 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/ut/mapped_file_ut.cpp @@ -0,0 +1,18 @@ +#include <library/cpp/deprecated/mapped_file/mapped_file.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/fs.h> + +Y_UNIT_TEST_SUITE(TMappedFileTest) { +    static const char* FileName_("./mappped_file"); +    Y_UNIT_TEST(TestFileMapEmpty) { +        TFile file(FileName_, CreateAlways | WrOnly); +        file.Close(); + +        TMappedFile map; +        map.init(FileName_); +        map.getData(0); + +        NFs::Remove(FileName_); +    } +}; diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make new file mode 100644 index 00000000000..415c4383823 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(g:util) + +SRCS( +    mapped_file.cpp +) + +END() diff --git a/library/cpp/deprecated/split/delim_string_iter.cpp b/library/cpp/deprecated/split/delim_string_iter.cpp new file mode 100644 index 00000000000..af418c5bfb9 --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter.cpp @@ -0,0 +1,45 @@ +#include "delim_string_iter.h" + +// +// TKeyValueDelimStringIter +// + +void TKeyValueDelimStringIter::ReadKeyAndValue() { +    TStringBuf currentToken(*DelimIter); + +    size_t pos = currentToken.find('='); +    if (pos == TString::npos) { +        ChunkValue.Clear(); +        ChunkKey = currentToken; +    } else { +        ChunkKey = currentToken.SubStr(0, pos); +        ChunkValue = currentToken.SubStr(pos + 1); +    } +} + +TKeyValueDelimStringIter::TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim) +    : DelimIter(str, delim) +{ +    if (DelimIter.Valid()) +        ReadKeyAndValue(); +} + +bool TKeyValueDelimStringIter::Valid() const { +    return DelimIter.Valid(); +} + +TKeyValueDelimStringIter& TKeyValueDelimStringIter::operator++() { +    ++DelimIter; +    if (DelimIter.Valid()) +        ReadKeyAndValue(); + +    return *this; +} + +const TStringBuf& TKeyValueDelimStringIter::Key() const { +    return ChunkKey; +} + +const TStringBuf& TKeyValueDelimStringIter::Value() const { +    return ChunkValue; +} diff --git a/library/cpp/deprecated/split/delim_string_iter.h b/library/cpp/deprecated/split/delim_string_iter.h new file mode 100644 index 00000000000..8e4ca171a09 --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter.h @@ -0,0 +1,185 @@ +#pragma once + +#include <util/generic/algorithm.h> +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> + +#include <iterator> + +class TDelimStringIter { +public: +    using value_type = TStringBuf; +    using difference_type = ptrdiff_t; +    using pointer = const TStringBuf*; +    using reference = const TStringBuf&; +    using iterator_category = std::forward_iterator_tag; + +    inline TDelimStringIter(const char* begin, const char* strEnd, TStringBuf delim) +        : TDelimStringIter(TStringBuf(begin, strEnd), delim) +    { +    } + +    inline TDelimStringIter(TStringBuf str, TStringBuf delim) +        : IsValid(true) +        , Str(str) +        , Delim(delim) +    { +        UpdateCurrent(); +    } + +    inline TDelimStringIter() +        : IsValid(false) +    { +    } + +    inline explicit operator bool() const { +        return IsValid; +    } + +    // NOTE: this is a potentially unsafe operation (no overrun check) +    inline TDelimStringIter& operator++() { +        if (Current.end() != Str.end()) { +            Str.Skip(Current.length() + Delim.length()); +            UpdateCurrent(); +        } else { +            Str.Clear(); +            Current.Clear(); +            IsValid = false; +        } +        return *this; +    } + +    inline void operator+=(size_t n) { +        for (; n > 0; --n) { +            ++(*this); +        } +    } + +    inline bool operator==(const TDelimStringIter& rhs) const { +        return (IsValid == rhs.IsValid) && (!IsValid || (Current.begin() == rhs.Current.begin())); +    } + +    inline bool operator!=(const TDelimStringIter& rhs) const { +        return !(*this == rhs); +    } + +    inline TStringBuf operator*() const { +        return Current; +    } + +    inline const TStringBuf* operator->() const { +        return &Current; +    } + +    // Get & advance +    template <class T> +    inline bool TryNext(T& t) { +        if (IsValid) { +            t = FromString<T>(Current); +            operator++(); +            return true; +        } else { +            return false; +        } +    } + +    template <class T> +    inline TDelimStringIter& Next(T& t) // Get & advance +    { +        if (!TryNext(t)) +            ythrow yexception() << "No valid field"; +        return *this; +    } + +    template <class T> +    inline T GetNext() { +        T res; +        Next(res); +        return res; +    } + +    inline const char* GetBegin() const { +        return Current.begin(); +    } + +    inline const char* GetEnd() const { +        return Current.end(); +    } + +    inline bool Valid() const { +        return IsValid; +    } + +    // contents from next token to the end of string +    inline TStringBuf Cdr() const { +        return Str.SubStr(Current.length() + Delim.length()); +    } + +    inline TDelimStringIter IterEnd() const { +        return TDelimStringIter(); +    } + +private: +    inline void UpdateCurrent() { +        // it is much faster than TStringBuf::find +        size_t pos = std::search(Str.begin(), Str.end(), Delim.begin(), Delim.end()) - Str.begin(); +        Current = Str.Head(pos); +    } + +private: +    bool IsValid; + +    TStringBuf Str; +    TStringBuf Current; +    TStringBuf Delim; +}; + +//example: for (TStringBuf field: TDelimStroka(line, "@@")) { ... } +struct TDelimStroka { +    TStringBuf S; +    TStringBuf Delim; + +    inline TDelimStroka(TStringBuf s, TStringBuf delim) +        : S(s) +        , Delim(delim) +    { +    } + +    inline TDelimStringIter begin() const { +        return TDelimStringIter(S, Delim); +    } + +    inline TDelimStringIter end() const { +        return TDelimStringIter(); +    } +}; + +inline TDelimStringIter begin_delim(const TString& str, TStringBuf delim) { +    return TDelimStringIter(str, delim); +} + +inline TDelimStringIter begin_delim(TStringBuf str, TStringBuf delim) { +    return TDelimStringIter(str.begin(), str.end(), delim); +} + +inline TDelimStringIter end_delim(const TString& /*str*/, TStringBuf /*delim*/) { +    return TDelimStringIter(); +} + +class TKeyValueDelimStringIter { +public: +    TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim); +    bool Valid() const; +    TKeyValueDelimStringIter& operator++(); +    const TStringBuf& Key() const; +    const TStringBuf& Value() const; + +private: +    TDelimStringIter DelimIter; +    TStringBuf ChunkKey, ChunkValue; + +private: +    void ReadKeyAndValue(); +}; diff --git a/library/cpp/deprecated/split/delim_string_iter_ut.cpp b/library/cpp/deprecated/split/delim_string_iter_ut.cpp new file mode 100644 index 00000000000..18a8b2a1604 --- /dev/null +++ b/library/cpp/deprecated/split/delim_string_iter_ut.cpp @@ -0,0 +1,99 @@ +#include "delim_string_iter.h" +#include <util/generic/vector.h> +#include <library/cpp/testing/unittest/registar.h> + +/// Test that TDelimStringIter build on top of given string and delimeter will produce expected sequence +static void AssertStringSplit(const TString& str, const TString& delim, const TVector<TString>& expected) { +    TDelimStringIter it(str, delim); + +    // test iterator invariants +    for (const auto& expectedString : expected) { +        UNIT_ASSERT(it.Valid()); +        UNIT_ASSERT(bool(it)); +        UNIT_ASSERT_STRINGS_EQUAL(it->ToString(), expectedString); +        ++it; +    } +    UNIT_ASSERT(!it.Valid()); +}; + +Y_UNIT_TEST_SUITE(TDelimStrokaIterTestSuite) { +    Y_UNIT_TEST(SingleCharacterAsDelimiter) { +        AssertStringSplit( +            "Hello words!", " ", {"Hello", "words!"}); +    } + +    Y_UNIT_TEST(MultipleCharactersAsDelimiter) { +        AssertStringSplit( +            "0, 1, 1, 2, 3, 5, 8, 13, 21, 34", "1, ", {"0, ", "", "2, 3, 5, 8, 13, 2", "34"}); +    } + +    Y_UNIT_TEST(NoDelimitersPresent) { +        AssertStringSplit("This string could be yours", "\t", {"This string could be yours"}); +    } + +    Y_UNIT_TEST(Cdr) { +        TDelimStringIter it("a\tc\t", "\t"); +        UNIT_ASSERT_STRINGS_EQUAL(*it, "a"); +        UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), "c\t"); +        ++it; +        UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), ""); +    } + +    Y_UNIT_TEST(ForIter) { +        TVector<TStringBuf> expected = {"1", "", "3@4", ""}; +        TVector<TStringBuf> got; + +        for (TStringBuf x : TDelimStroka("1@@@@3@4@@", "@@")) { +            got.push_back(x); +        } + +        UNIT_ASSERT_EQUAL(got, expected); +    } +} + +static void AssertKeyValueStringSplit( +    const TStringBuf str, +    const TStringBuf delim, +    const TVector<std::pair<TStringBuf, TStringBuf>>& expected) { +    TKeyValueDelimStringIter it(str, delim); + +    for (const auto& expectedKeyValue : expected) { +        UNIT_ASSERT(it.Valid()); +        UNIT_ASSERT_STRINGS_EQUAL(it.Key(), expectedKeyValue.first); +        UNIT_ASSERT_STRINGS_EQUAL(it.Value(), expectedKeyValue.second); +        ++it; +    } +    UNIT_ASSERT(!it.Valid()); +} + +Y_UNIT_TEST_SUITE(TKeyValueDelimStringIterTestSuite) { +    Y_UNIT_TEST(SingleCharacterAsDelimiter) { +        AssertKeyValueStringSplit( +            "abc=123,cde=qwer", ",", +            {{"abc", "123"}, +             {"cde", "qwer"}}); +    } + +    Y_UNIT_TEST(MultipleCharactersAsDelimiter) { +        AssertKeyValueStringSplit( +            "abc=xyz@@qwerty=zxcv", "@@", +            {{"abc", "xyz"}, +             {"qwerty", "zxcv"}}); +    } + +    Y_UNIT_TEST(NoDelimiters) { +        AssertKeyValueStringSplit( +            "abc=zz", ",", +            {{"abc", "zz"}}); +    } + +    Y_UNIT_TEST(EmptyElements) { +        AssertKeyValueStringSplit( +            "@@abc=zxy@@@@qwerty=y@@", "@@", +            {{"", ""}, +             {"abc", "zxy"}, +             {"", ""}, +             {"qwerty", "y"}, +             {"", ""}}); +    } +} diff --git a/library/cpp/deprecated/split/split_iterator.cpp b/library/cpp/deprecated/split/split_iterator.cpp new file mode 100644 index 00000000000..32262d25bd1 --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator.cpp @@ -0,0 +1,318 @@ +#include "split_iterator.h" + +#include <util/system/yassert.h> + +#include <cctype> +#include <cstring> +#include <cstdlib> + +/****************** TSplitDelimiters2 ******************/ + +TSplitDelimiters::TSplitDelimiters(const char* s) { +    memset(Delims, 0, sizeof(Delims)); +    while (*s) +        Delims[(ui8) * (s++)] = true; +} + +/****************** TSplitBase ******************/ +TSplitBase::TSplitBase(const char* str, size_t length) +    : Str(str) +    , Len(length) +{ +} + +TSplitBase::TSplitBase(const TString& s) +    : Str(s.data()) +    , Len(s.size()) +{ +} + +/****************** TDelimitersSplit ******************/ + +TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) +    : TSplitBase(str, length) +    , Delimiters(delimiters) +{ +} + +TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters) +    : TSplitBase(s) +    , Delimiters(delimiters) +{ +} + +size_t TDelimitersSplit::Begin() const { +    size_t pos = 0; +    while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) +        ++pos; +    return pos; +} + +TSizeTRegion TDelimitersSplit::Next(size_t& pos) const { +    size_t begin = pos; +    while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) +        ++pos; +    TSizeTRegion result(begin, pos); + +    while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) +        ++pos; + +    return result; +} + +TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const { +    return TIterator(*this); +} + +/****************** TDelimitersStrictSplit ******************/ + +TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) +    : TSplitBase(str, length) +    , Delimiters(delimiters) +{ +} + +TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters) +    : TSplitBase(s) +    , Delimiters(delimiters) +{ +} + +TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const { +    return TIterator(*this); +} + +TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const { +    size_t begin = pos; +    while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) +        ++pos; +    TSizeTRegion result(begin, pos); + +    if (pos < Len) +        ++pos; + +    return result; +} + +size_t TDelimitersStrictSplit::Begin() const { +    return 0; +} + +/****************** TScreenedDelimitersSplit ******************/ + +TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) +    : TSplitBase(s) +    , Delimiters(delimiters) +    , Screens(screens) +{ +} + +TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) +    : TSplitBase(str, length) +    , Delimiters(delimiters) +    , Screens(screens) +{ +} + +TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const { +    return TIterator(*this); +} + +TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const { +    size_t begin = pos; +    bool screened = false; +    while (pos < Len) { +        if (Screens.IsDelimiter(Str[pos])) +            screened = !screened; +        if (Delimiters.IsDelimiter(Str[pos]) && !screened) +            break; +        ++pos; +    } +    TSizeTRegion result(begin, pos); + +    if (pos < Len) +        ++pos; + +    return result; +} + +size_t TScreenedDelimitersSplit::Begin() const { +    return 0; +} + +/****************** TDelimitersSplitWithoutTags ******************/ + +TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters) +    : TSplitBase(str, length) +    , Delimiters(delimiters) +{ +} + +TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters) +    : TSplitBase(s) +    , Delimiters(delimiters) +{ +} + +size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const { +    Y_ASSERT('<' == Str[pos]); +    while ((pos < Len) && ('>' != Str[pos])) +        ++pos; +    return pos + 1; +} + +size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const { +    while (true) { +        while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) +            ++pos; +        if (pos < Len) { +            if ('<' != Str[pos]) +                break; +            else +                pos = SkipTag(pos); +        } else +            break; +    } +    return pos; +} + +size_t TDelimitersSplitWithoutTags::Begin() const { +    size_t pos = 0; +    pos = SkipDelimiters(pos); +    return pos; +} + +TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const { +    size_t begin = pos; +    while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) +        ++pos; +    TSizeTRegion result(begin, pos); + +    pos = SkipDelimiters(pos); + +    return result; +} + +TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const { +    return TIterator(*this); +} + +/****************** TCharSplit ******************/ + +TCharSplit::TCharSplit(const char* str, size_t length) +    : TSplitBase(str, length) +{ +} + +TCharSplit::TCharSplit(const TString& s) +    : TSplitBase(s) +{ +} + +TCharSplit::TIterator TCharSplit::Iterator() const { +    return TIterator(*this); +} + +TSizeTRegion TCharSplit::Next(size_t& pos) const { +    TSizeTRegion result(pos, pos + 1); +    ++pos; +    return result; +} + +size_t TCharSplit::Begin() const { +    return 0; +} + +/****************** TCharSplitWithoutTags ******************/ + +TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length) +    : TSplitBase(str, length) +{ +} + +TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s) +    : TSplitBase(s) +{ +} + +size_t TCharSplitWithoutTags::SkipTag(size_t pos) const { +    Y_ASSERT('<' == Str[pos]); +    while ((pos < Len) && ('>' != Str[pos])) +        ++pos; +    return pos + 1; +} + +size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const { +    while (true) { +        if (pos < Len) { +            if ('<' != Str[pos]) +                break; +            else +                pos = SkipTag(pos); +        } else +            break; +    } +    return pos; +} + +size_t TCharSplitWithoutTags::Begin() const { +    size_t pos = 0; +    pos = SkipDelimiters(pos); +    return pos; +} + +TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const { +    size_t begin = pos++; +    TSizeTRegion result(begin, pos); + +    pos = SkipDelimiters(pos); + +    return result; +} + +TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const { +    return TIterator(*this); +} + +TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s) +    : Matcher(s) +    , Len(s.size()) +{ +} + +/****************** TSubstringSplit ******************/ + +TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter) +    : TSplitBase(str, length) +    , Delimiter(delimiter) +{ +} + +TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter) +    : TSplitBase(str) +    , Delimiter(delimiter) +{ +} + +TSubstringSplit::TIterator TSubstringSplit::Iterator() const { +    return TIterator(*this); +} + +TSizeTRegion TSubstringSplit::Next(size_t& pos) const { +    const char* begin = Str + pos; +    const char* end = Str + Len; +    const char* delim; +    if (Delimiter.Matcher.SubStr(begin, end, delim)) { +        TSizeTRegion result(pos, delim - begin + pos); +        pos += delim - begin + Delimiter.Len; +        return result; +    } else { +        TSizeTRegion result(pos, end - begin + pos); +        pos += end - begin; +        return result; +    } +} + +size_t TSubstringSplit::Begin() const { +    return 0; +} diff --git a/library/cpp/deprecated/split/split_iterator.h b/library/cpp/deprecated/split/split_iterator.h new file mode 100644 index 00000000000..0eacc29228e --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator.h @@ -0,0 +1,317 @@ +#pragma once + +#include <library/cpp/deprecated/kmp/kmp.h> +#include <util/string/cast.h> +#include <util/string/util.h> +#include <util/string/builder.h> + +#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +#include <cstdio> + +template <typename T> +struct TNumPair { +    T Begin; +    T End; + +    TNumPair() = default; + +    TNumPair(T begin, T end) +        : Begin(begin) +        , End(end) +    { +        Y_ASSERT(begin <= end); +    } + +    T Length() const { +        return End - Begin + 1; +    } + +    bool operator==(const TNumPair& r) const { +        return (Begin == r.Begin) && (End == r.End); +    } + +    bool operator!=(const TNumPair& r) const { +        return (Begin != r.Begin) || (End != r.End); +    } +}; + +using TSizeTRegion = TNumPair<size_t>; +using TUi32Region = TNumPair<ui32>; + +template <> +inline TString ToString(const TUi32Region& r) { +    return TStringBuilder() << "(" << r.Begin << ", " << r.End << ")"; +} + +template <> +inline TUi32Region FromString(const TString& s) { +    TUi32Region result; +    sscanf(s.data(), "(%" PRIu32 ", %" PRIu32 ")", &result.Begin, &result.End); +    return result; +} + +class TSplitDelimiters { +private: +    bool Delims[256]; + +public: +    explicit TSplitDelimiters(const char* s); + +    Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const { +        return Delims[ch]; +    } +}; + +template <class Split> +class TSplitIterator; + +class TSplitBase { +protected: +    const char* Str; +    size_t Len; + +public: +    TSplitBase(const char* str, size_t length); +    TSplitBase(const TString& s); + +    Y_FORCE_INLINE const char* GetString() const { +        return Str; +    } + +    Y_FORCE_INLINE size_t GetLength() const { +        return Len; +    } + +private: +    // we don't own Str, make sure that no one calls us with temporary object +    TSplitBase(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4512) +#endif + +class TDelimitersSplit: public TSplitBase { +private: +    const TSplitDelimiters& Delimiters; + +public: +    using TIterator = TSplitIterator<TDelimitersSplit>; +    friend class TSplitIterator<TDelimitersSplit>; + +    TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); +    TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Delimiters, make sure that no one calls us with temporary object +    TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete; +    TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete; +    TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TDelimitersStrictSplit: public TSplitBase { +private: +    const TSplitDelimiters& Delimiters; + +public: +    using TIterator = TSplitIterator<TDelimitersStrictSplit>; +    friend class TSplitIterator<TDelimitersStrictSplit>; + +    TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); +    TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Delimiters, make sure that no one calls us with temporary object +    TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete; +    TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete; +    TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TScreenedDelimitersSplit: public TSplitBase { +private: +    const TSplitDelimiters& Delimiters; +    const TSplitDelimiters& Screens; + +public: +    using TIterator = TSplitIterator<TScreenedDelimitersSplit>; +    friend class TSplitIterator<TScreenedDelimitersSplit>; + +    TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); +    TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Delimiters and Screens, make sure that no one calls us with temporary object +    TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete; +    TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete; +    TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete; +}; + +class TDelimitersSplitWithoutTags: public TSplitBase { +private: +    const TSplitDelimiters& Delimiters; +    size_t SkipTag(size_t pos) const; +    size_t SkipDelimiters(size_t pos) const; + +public: +    using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>; +    friend class TSplitIterator<TDelimitersSplitWithoutTags>; + +    TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters); +    TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Delimiters, make sure that no one calls us with temporary object +    TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete; +    TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete; +    TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete; +}; + +class TCharSplit: public TSplitBase { +public: +    using TIterator = TSplitIterator<TCharSplit>; +    friend class TSplitIterator<TCharSplit>; + +    TCharSplit(const char* str, size_t length); +    TCharSplit(const TString& s); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Str, make sure that no one calls us with temporary object +    TCharSplit(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +class TCharSplitWithoutTags: public TSplitBase { +private: +    size_t SkipTag(size_t pos) const; +    size_t SkipDelimiters(size_t pos) const; + +public: +    using TIterator = TSplitIterator<TCharSplitWithoutTags>; +    friend class TSplitIterator<TCharSplitWithoutTags>; + +    TCharSplitWithoutTags(const char* str, size_t length); +    TCharSplitWithoutTags(const TString& s); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Str, make sure that no one calls us with temporary object +    TCharSplitWithoutTags(TString&&) = delete; +}; + +class TSubstringSplitDelimiter { +public: +    TKMPMatcher Matcher; +    size_t Len; + +    TSubstringSplitDelimiter(const TString& s); +}; + +class TSubstringSplit: public TSplitBase { +private: +    const TSubstringSplitDelimiter& Delimiter; + +public: +    using TIterator = TSplitIterator<TSubstringSplit>; +    friend class TSplitIterator<TSubstringSplit>; + +    TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter); +    TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter); +    TIterator Iterator() const; +    TSizeTRegion Next(size_t& pos) const; +    size_t Begin() const; + +private: +    // we don't own Delimiters, make sure that no one calls us with temporary object +    TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete; +    TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete; +}; + +template <class TSplit> +class TSplitIterator { +protected: +    const TSplit& Split; +    size_t Pos; +    TString* CurrentStroka; + +public: +    TSplitIterator(const TSplit& split) +        : Split(split) +        , Pos(Split.Begin()) +        , CurrentStroka(nullptr) +    { +    } + +    virtual ~TSplitIterator() { +        delete CurrentStroka; +    } + +    inline TSizeTRegion Next() { +        Y_ENSURE(!Eof(), TStringBuf("eof reached")); +        return Split.Next(Pos); +    } + +    TStringBuf NextTok() { +        if (Eof()) +            return TStringBuf(); +        TSizeTRegion region = Next(); +        return TStringBuf(Split.Str + region.Begin, region.End - region.Begin); +    } + +    const TString& NextString() { +        if (!CurrentStroka) +            CurrentStroka = new TString(); +        TSizeTRegion region = Next(); +        CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1); +        return *CurrentStroka; +    } + +    inline bool Eof() const { +        return Pos >= Split.Len; +    } + +    TString GetTail() const { +        return TString(Split.Str + Pos); +    } + +    void Skip(size_t count) { +        for (size_t i = 0; i < count; ++i) +            Next(); +    } +}; + +using TSplitTokens = TVector<TString>; + +template <typename TSplit> +void Split(const TSplit& split, TSplitTokens* words) { +    words->clear(); +    TSplitIterator<TSplit> it(split); +    while (!it.Eof()) +        words->push_back(it.NextString()); +} diff --git a/library/cpp/deprecated/split/split_iterator_ut.cpp b/library/cpp/deprecated/split/split_iterator_ut.cpp new file mode 100644 index 00000000000..be5069c4be4 --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator_ut.cpp @@ -0,0 +1,152 @@ +#include "split_iterator.h" + +#include <library/cpp/testing/unittest/registar.h> + +class TSplitIteratorTest: public TTestBase { +    UNIT_TEST_SUITE(TSplitIteratorTest); +    UNIT_TEST(TestDelimiters); +    UNIT_TEST(TestDelimitersSplit); +    UNIT_TEST(TestDelimitersStrictSplit); +    UNIT_TEST(TestTail); +    UNIT_TEST(TestScreenedDelimitersSplit); +    UNIT_TEST(TestSubstringDelimiter); +    UNIT_TEST_SUITE_END(); + +public: +    void TestDelimiters(); +    void TestDelimitersSplit(); +    void TestDelimitersStrictSplit(); +    void TestTail(); +    void TestScreenedDelimitersSplit(); +    void TestSubstringDelimiter(); +}; + +void TSplitIteratorTest::TestDelimiters() { +    TSplitDelimiters delims("@"); +    for (int i = 0; i < 256; ++i) +        if ('@' != i) { +            UNIT_ASSERT(!delims.IsDelimiter((ui8)i)); +        } else { +            UNIT_ASSERT(delims.IsDelimiter((ui8)i)); +        } +} + +void TSplitIteratorTest::TestDelimitersSplit() { +    { +        TString s = "1a3b45cd"; +        TSplitDelimiters delims("abcd"); +        TDelimitersSplit split(s, delims); +        TSplitTokens tokens; +        Split(split, &tokens); +        TSplitTokens pattern = {"1", "3", "45"}; +        UNIT_ASSERT(tokens == pattern); +    } +    { +        TString s = "aaaaaa"; +        TSplitDelimiters delims("abcd"); +        TDelimitersSplit split(s, delims); +        TSplitTokens tokens; +        Split(split, &tokens); +        TSplitTokens pattern = {}; +        UNIT_ASSERT(tokens == pattern); +    } +} + +void TSplitIteratorTest::TestDelimitersStrictSplit() { +    { +        TString s = "grp@2"; +        TSplitDelimiters delims("@"); +        TDelimitersStrictSplit split(s, delims); +        TSplitTokens tokens; +        Split(split, &tokens); +        TSplitTokens pattern = {"grp", "2"}; +        UNIT_ASSERT(tokens == pattern); +    } + +    { +        TString s = "@grp@2@@"; +        TSplitDelimiters delims("@"); +        TDelimitersStrictSplit split(s, delims); +        TSplitTokens tokens; +        Split(split, &tokens); +        TSplitTokens pattern = {"", "grp", "2", ""}; +        UNIT_ASSERT(tokens == pattern); +    } +} + +void TSplitIteratorTest::TestTail() { +    TString s = "grp@2@4"; +    TSplitDelimiters delims("@"); +    TDelimitersSplit split(s, delims); +    TDelimitersSplit::TIterator it = split.Iterator(); +    UNIT_ASSERT_EQUAL(it.GetTail(), "grp@2@4"); +    it.Next(); +    UNIT_ASSERT_EQUAL(it.GetTail(), "2@4"); +    it.Next(); +    UNIT_ASSERT_EQUAL(it.GetTail(), "4"); +    it.Next(); +    UNIT_ASSERT_EQUAL(it.GetTail(), ""); +} + +void TSplitIteratorTest::TestScreenedDelimitersSplit() { +    { +        const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2"; +        const TSplitDelimiters delims(" "); +        const TSplitDelimiters screens("\"[]"); +        const TScreenedDelimitersSplit splitter(s, delims, screens); +        TScreenedDelimitersSplit::TIterator it = splitter.Iterator(); +        UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "304"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "1219867687"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"0\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "3283"); +        UNIT_ASSERT_EQUAL(it.NextString(), "2"); +    } +    { +        const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2"; +        const TSplitDelimiters delims(" "); +        const TSplitDelimiters screens("\"[]"); +        const TScreenedDelimitersSplit splitter(s.Data(), s.Size(), delims, screens); +        TScreenedDelimitersSplit::TIterator it = splitter.Iterator(); +        UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "304"); +        UNIT_ASSERT_EQUAL(it.NextString(), "-"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"-\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "1219867687"); +        UNIT_ASSERT_EQUAL(it.NextString(), "\"0\""); +        UNIT_ASSERT_EQUAL(it.NextString(), "3283"); +        UNIT_ASSERT_EQUAL(it.NextString(), "2"); +    } +} + +void TSplitIteratorTest::TestSubstringDelimiter() { +    const TString s = "a@@bb@@[email protected]@@r"; +    static const TSubstringSplitDelimiter delimiter("@@"); +    const TSubstringSplit splitter(s, delimiter); +    TSubstringSplit::TIterator it = splitter.Iterator(); +    UNIT_ASSERT_EQUAL(it.NextString(), "a"); +    UNIT_ASSERT_EQUAL(it.NextString(), "bb"); +    UNIT_ASSERT_EQUAL(it.NextString(), "[email protected]"); +    UNIT_ASSERT_EQUAL(it.NextString(), "r"); +    UNIT_ASSERT(it.Eof()); +} + +UNIT_TEST_SUITE_REGISTRATION(TSplitIteratorTest); diff --git a/library/cpp/deprecated/split/ya.make b/library/cpp/deprecated/split/ya.make new file mode 100644 index 00000000000..946e685ac82 --- /dev/null +++ b/library/cpp/deprecated/split/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +OWNER(wrg0ababd) + +SRCS( +    delim_string_iter.cpp +    split_iterator.cpp +) + +PEERDIR( +    library/cpp/deprecated/kmp +) + +END() diff --git a/library/cpp/deprecated/ya.make b/library/cpp/deprecated/ya.make new file mode 100644 index 00000000000..6c753f68a9b --- /dev/null +++ b/library/cpp/deprecated/ya.make @@ -0,0 +1,49 @@ +RECURSE( +    abstract_iterator +    abstract_iterator/ut +    accessors +    accessors/ut +    autoarray +    base64 +    datafile +    dater_old +    dater_old/ut +    enum_codegen +    enum_codegen/ut +    fgood +    fgood/ut +    histogram +    ipreg1 +    ipreg1/ut +    ipreg1/ut_full +    ipreg1/util +    iter +    kmp +    kmp/ut +    mapped_file +    mapped_file/ut +    mbitmap +    omni +    text_norm +    omni/print_omni +    omni/usage +    omni/ut +    prog_options +    prog_options/ut +    sgi_hash +    threadable +    transgene +    datawork +    datawork/conf +    calc_module +    iterators_heap +    parse_utils +    parse_utils/ut +    small_array +    solartrie +    solartrie/indexed_region/ut +    solartrie/test +    solartrie/test/tests +    solartrie/ut +    split +) | 
