diff options
author | smikler <smikler@yandex-team.ru> | 2022-02-10 16:49:33 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:33 +0300 |
commit | 0e68ae909d3b76a5a001a07880eb0010dec6b2ea (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library | |
parent | e4f0fd4ab53ca40eb91e750cf3e7f76c21e930db (diff) | |
download | ydb-0e68ae909d3b76a5a001a07880eb0010dec6b2ea.tar.gz |
Restoring authorship annotation for <smikler@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library')
-rw-r--r-- | library/cpp/compproto/bit.h | 2 | ||||
-rw-r--r-- | library/cpp/compproto/metainfo.h | 2 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_trie.h | 16 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_ut.cpp | 4 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/leaf_skipper.h | 34 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/opaque_trie_iterator.cpp | 2 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/protopacker.h | 12 | ||||
-rw-r--r-- | library/cpp/packedtypes/packedfloat.h | 12 | ||||
-rw-r--r-- | library/cpp/packers/packers.h | 174 | ||||
-rw-r--r-- | library/cpp/packers/ut/packers_ut.cpp | 6 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.cpp | 210 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.h | 30 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 20 | ||||
-rw-r--r-- | library/cpp/regex/pire/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/string_utils/url/url.cpp | 68 | ||||
-rw-r--r-- | library/cpp/string_utils/url/url.h | 4 | ||||
-rw-r--r-- | library/cpp/string_utils/url/url_ut.cpp | 34 |
17 files changed, 316 insertions, 316 deletions
diff --git a/library/cpp/compproto/bit.h b/library/cpp/compproto/bit.h index 58c74e7f74..6a421b65f7 100644 --- a/library/cpp/compproto/bit.h +++ b/library/cpp/compproto/bit.h @@ -5,7 +5,7 @@ #include <util/stream/output.h> #include <util/stream/input.h> -#include "huff.h" +#include "huff.h" #include "compressor.h" #include "metainfo.h" diff --git a/library/cpp/compproto/metainfo.h b/library/cpp/compproto/metainfo.h index 04ccb714aa..6e68f86e12 100644 --- a/library/cpp/compproto/metainfo.h +++ b/library/cpp/compproto/metainfo.h @@ -5,7 +5,7 @@ #include <util/generic/ptr.h> #include <util/generic/refcount.h> #include <util/stream/input.h> -#include <util/stream/str.h> +#include <util/stream/str.h> #include "compressor.h" diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 8be73307e2..40ec1e52b3 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -331,7 +331,7 @@ bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value if (!LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext) || prefixLen != keylen) return false; if (value) - Packer.UnpackLeaf(valuepos, *value); + Packer.UnpackLeaf(valuepos, *value); return true; } @@ -468,7 +468,7 @@ bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, if (prefixLen) *prefixLen = tempPrefixLen; if (found && value) - Packer.UnpackLeaf(valuepos, *value); + Packer.UnpackLeaf(valuepos, *value); if (hasNext) *hasNext = tempHasNext; return found; @@ -647,16 +647,16 @@ const char* TCompactTrie<T, D, S>::TConstIterator::GetValuePtr() const { template <class T, class D, class S> typename TCompactTrie<T, D, S>::TData TCompactTrie<T, D, S>::TConstIterator::GetValue() const { - D data; - GetValue(data); - return data; -} - + D data; + GetValue(data); + return data; +} + template <class T, class D, class S> void TCompactTrie<T, D, S>::TConstIterator::GetValue(typename TCompactTrie<T, D, S>::TData& data) const { const char* ptr = GetValuePtr(); if (ptr) { - Packer.UnpackLeaf(ptr, data); + Packer.UnpackLeaf(ptr, data); } else { data = typename TCompactTrie<T, D, S>::TData(); } diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 7e773d4b06..74bee09b5d 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -1016,9 +1016,9 @@ template <class T> class TCompactTrieTest::TDummyPacker: public TNullPacker<T> { public: static T Data(const TString&) { - T data; + T data; TNullPacker<T>().UnpackLeaf(nullptr, data); - return data; + return data; } typedef T TData; diff --git a/library/cpp/containers/comptrie/leaf_skipper.h b/library/cpp/containers/comptrie/leaf_skipper.h index bd599ceaa2..3959258948 100644 --- a/library/cpp/containers/comptrie/leaf_skipper.h +++ b/library/cpp/containers/comptrie/leaf_skipper.h @@ -3,32 +3,32 @@ #include <cstddef> namespace NCompactTrie { - class ILeafSkipper { - public: - virtual size_t SkipLeaf(const char* p) const = 0; + class ILeafSkipper { + public: + virtual size_t SkipLeaf(const char* p) const = 0; virtual ~ILeafSkipper() = default; - }; - + }; + template <class TPacker> class TPackerLeafSkipper: public ILeafSkipper { - private: - const TPacker* Packer; - - public: - TPackerLeafSkipper(const TPacker* packer) - : Packer(packer) - { - } - + private: + const TPacker* Packer; + + public: + TPackerLeafSkipper(const TPacker* packer) + : Packer(packer) + { + } + size_t SkipLeaf(const char* p) const override { - return Packer->SkipLeaf(p); - } + return Packer->SkipLeaf(p); + } // For test purposes. const TPacker* GetPacker() const { return Packer; } - }; + }; // The data you need to traverse the trie without unpacking the values. struct TOpaqueTrie { diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp index 0f09b17f8e..5fd3914be6 100644 --- a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp +++ b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp @@ -168,7 +168,7 @@ namespace NCompactTrie { } //------------------------------------------------------------------------- - + TFork::TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper) : Node(data, offset, skipper) , Data(data) diff --git a/library/cpp/containers/comptrie/protopacker.h b/library/cpp/containers/comptrie/protopacker.h index 8349f83582..3e15866dc5 100644 --- a/library/cpp/containers/comptrie/protopacker.h +++ b/library/cpp/containers/comptrie/protopacker.h @@ -4,22 +4,22 @@ #include <util/ysaveload.h> template <class Proto> -class TProtoPacker { +class TProtoPacker { public: TProtoPacker() = default; - void UnpackLeaf(const char* p, Proto& entry) const { + void UnpackLeaf(const char* p, Proto& entry) const { TMemoryInput in(p + sizeof(ui32), SkipLeaf(p) - sizeof(ui32)); entry.ParseFromArcadiaStream(&in); } - void PackLeaf(char* p, const Proto& entry, size_t size) const { + void PackLeaf(char* p, const Proto& entry, size_t size) const { TMemoryOutput out(p, size + sizeof(ui32)); Save<ui32>(&out, size); entry.SerializeToArcadiaStream(&out); } - size_t MeasureLeaf(const Proto& entry) const { - return entry.ByteSize() + sizeof(ui32); - } + size_t MeasureLeaf(const Proto& entry) const { + return entry.ByteSize() + sizeof(ui32); + } size_t SkipLeaf(const char* p) const { TMemoryInput in(p, sizeof(ui32)); ui32 size; diff --git a/library/cpp/packedtypes/packedfloat.h b/library/cpp/packedtypes/packedfloat.h index 51d8821b13..f178912ed3 100644 --- a/library/cpp/packedtypes/packedfloat.h +++ b/library/cpp/packedtypes/packedfloat.h @@ -178,7 +178,7 @@ inline constexpr float Frac2Float(T pf) { constexpr float multiplier = float(1.0 / Max<T>()); return pf * multiplier; } - + class TUi82FloatMapping { private: float Mapping[Max<ui8>() + 1] = {}; @@ -208,10 +208,10 @@ inline float Frac2Float(ui32 pf) = delete; template <class T> inline float FracOrFloatToFloat(T t) { - return Frac2Float(t); -} - + return Frac2Float(t); +} + template <> inline float FracOrFloatToFloat<float>(float t) { - return t; -} + return t; +} diff --git a/library/cpp/packers/packers.h b/library/cpp/packers/packers.h index 1590f96ca5..1bde1b59aa 100644 --- a/library/cpp/packers/packers.h +++ b/library/cpp/packers/packers.h @@ -12,10 +12,10 @@ // Default realization can pack only limited range of types, but you can pack any data other using your own strategy class. template <class T> -class TNullPacker { // Very effective package class - pack any data into zero bytes :) +class TNullPacker { // Very effective package class - pack any data into zero bytes :) public: - void UnpackLeaf(const char*, T& t) const { - t = T(); + void UnpackLeaf(const char*, T& t) const { + t = T(); } void PackLeaf(char*, const T&, size_t) const { @@ -33,7 +33,7 @@ public: template <typename T> class TAsIsPacker { // this packer is not really a packer... public: - void UnpackLeaf(const char* p, T& t) const { + void UnpackLeaf(const char* p, T& t) const { memcpy(&t, p, sizeof(T)); } void PackLeaf(char* buffer, const T& data, size_t computedSize) const { @@ -95,9 +95,9 @@ namespace NPackers { // TIntegralPacker --- for integral types. template <class T> - class TIntegralPacker { // can pack only integral types <= ui64 + class TIntegralPacker { // can pack only integral types <= ui64 public: - void UnpackLeaf(const char* p, T& t) const; + void UnpackLeaf(const char* p, T& t) const; void PackLeaf(char* buffer, const T& data, size_t size) const; size_t MeasureLeaf(const T& data) const; size_t SkipLeaf(const char* p) const; @@ -152,32 +152,32 @@ namespace NPackers { namespace NImpl { template <class T, bool isSigned> struct TUnpackLeafImpl { - inline void UnpackLeaf(const char* p, T& t) const; + inline void UnpackLeaf(const char* p, T& t) const; }; template <class T> struct TUnpackLeafImpl<T, true> { - inline void UnpackLeaf(const char* p, T& t) const { - ui64 val; - TIntegralPacker<ui64>().UnpackLeaf(p, val); + inline void UnpackLeaf(const char* p, T& t) const { + ui64 val; + TIntegralPacker<ui64>().UnpackLeaf(p, val); if (val & 1) { t = -1 * static_cast<i64>(val >> 1); } else { - t = static_cast<T>(val >> 1); + t = static_cast<T>(val >> 1); } } }; template <class T> struct TUnpackLeafImpl<T, false> { - inline void UnpackLeaf(const char* p, T& t) const { - ui64 tmp; - TIntegralPacker<ui64>().UnpackLeaf(p, tmp); - t = static_cast<T>(tmp); + inline void UnpackLeaf(const char* p, T& t) const { + ui64 tmp; + TIntegralPacker<ui64>().UnpackLeaf(p, tmp); + t = static_cast<T>(tmp); } }; } template <class T> - inline void TIntegralPacker<T>::UnpackLeaf(const char* p, T& t) const { + inline void TIntegralPacker<T>::UnpackLeaf(const char* p, T& t) const { NImpl::TUnpackLeafImpl<T, std::is_signed<T>::value>().UnpackLeaf(p, t); } @@ -252,7 +252,7 @@ namespace NPackers { // TStringPacker --- for TString/TUtf16String and TStringBuf. template <class TStringType> - class TStringPacker { + class TStringPacker { public: void UnpackLeaf(const char* p, TStringType& t) const; void PackLeaf(char* buffer, const TStringType& data, size_t size) const; @@ -262,8 +262,8 @@ namespace NPackers { template <class TStringType> inline void TStringPacker<TStringType>::UnpackLeaf(const char* buf, TStringType& t) const { - size_t len; - TIntegralPacker<size_t>().UnpackLeaf(buf, len); + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buf, len); size_t start = TIntegralPacker<size_t>().SkipLeaf(buf); t = TStringType((const typename TStringType::char_type*)(buf + start), len); } @@ -285,13 +285,13 @@ namespace NPackers { template <class TStringType> inline size_t TStringPacker<TStringType>::SkipLeaf(const char* buf) const { - size_t result = TIntegralPacker<size_t>().SkipLeaf(buf); - { - size_t len; - TIntegralPacker<size_t>().UnpackLeaf(buf, len); + size_t result = TIntegralPacker<size_t>().SkipLeaf(buf); + { + size_t len; + TIntegralPacker<size_t>().UnpackLeaf(buf, len); result += len * sizeof(typename TStringType::char_type); - } - return result; + } + return result; } template <class T> @@ -309,64 +309,64 @@ namespace NPackers { // are applicable to C::value_type template <typename T> - struct TContainerInfo { - enum { - IsVector = 0 - }; - }; - + struct TContainerInfo { + enum { + IsVector = 0 + }; + }; + template <typename T> struct TContainerInfo<std::vector<T>> { - enum { - IsVector = 1 - }; - }; - + enum { + IsVector = 1 + }; + }; + template <typename T> struct TContainerInfo<TVector<T>> { - enum { - IsVector = 1 - }; - }; - + enum { + IsVector = 1 + }; + }; + template <bool IsVector> - class TContainerPackerHelper { - }; - + class TContainerPackerHelper { + }; + template <> - class TContainerPackerHelper<false> { - public: + class TContainerPackerHelper<false> { + public: template <class Packer, class Container> static void UnpackLeaf(Packer& p, const char* buffer, Container& c) { p.UnpackLeafSimple(buffer, c); - } - }; - + } + }; + template <> - class TContainerPackerHelper<true> { - public: + class TContainerPackerHelper<true> { + public: template <class Packer, class Container> static void UnpackLeaf(Packer& p, const char* buffer, Container& c) { p.UnpackLeafVector(buffer, c); - } - }; - + } + }; + template <class C, class EP = TPacker<typename C::value_type>> - class TContainerPacker { + class TContainerPacker { private: typedef C TContainer; typedef EP TElementPacker; typedef typename TContainer::const_iterator TElementIterator; - - void UnpackLeafSimple(const char* buffer, TContainer& c) const; - void UnpackLeafVector(const char* buffer, TContainer& c) const; - - friend class TContainerPackerHelper<TContainerInfo<C>::IsVector>; + + void UnpackLeafSimple(const char* buffer, TContainer& c) const; + void UnpackLeafVector(const char* buffer, TContainer& c) const; + + friend class TContainerPackerHelper<TContainerInfo<C>::IsVector>; public: - void UnpackLeaf(const char* buffer, TContainer& c) const { - TContainerPackerHelper<TContainerInfo<C>::IsVector>::UnpackLeaf(*this, buffer, c); - } + void UnpackLeaf(const char* buffer, TContainer& c) const { + TContainerPackerHelper<TContainerInfo<C>::IsVector>::UnpackLeaf(*this, buffer, c); + } void PackLeaf(char* buffer, const TContainer& data, size_t size) const; size_t MeasureLeaf(const TContainer& data) const; size_t SkipLeaf(const char* buffer) const; @@ -375,16 +375,16 @@ namespace NPackers { template <class C, class EP> inline void TContainerPacker<C, EP>::UnpackLeafSimple(const char* buffer, C& result) const { size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here) - size_t len; + size_t len; TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len); offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset); - result.clear(); - - typename C::value_type value; + result.clear(); + + typename C::value_type value; for (size_t i = 0; i < len; i++) { TElementPacker().UnpackLeaf(buffer + offset, value); - result.insert(result.end(), value); + result.insert(result.end(), value); offset += TElementPacker().SkipLeaf(buffer + offset); } } @@ -392,17 +392,17 @@ namespace NPackers { template <class C, class EP> inline void TContainerPacker<C, EP>::UnpackLeafVector(const char* buffer, C& result) const { size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here) - size_t len; + size_t len; TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len); offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset); - result.resize(len); + result.resize(len); - for (size_t i = 0; i < len; i++) { + for (size_t i = 0; i < len; i++) { TElementPacker().UnpackLeaf(buffer + offset, result[i]); offset += TElementPacker().SkipLeaf(buffer + offset); - } - } - + } + } + template <class C, class EP> inline void TContainerPacker<C, EP>::PackLeaf(char* buffer, const C& data, size_t size) const { size_t sizeOfSize = TIntegralPacker<size_t>().MeasureLeaf(size); @@ -445,12 +445,12 @@ namespace NPackers { // TPacker<T1> and TPacker<T2> should be valid classes template <class T1, class T2, class TPacker1 = TPacker<T1>, class TPacker2 = TPacker<T2>> - class TPairPacker { + class TPairPacker { private: typedef std::pair<T1, T2> TMyPair; public: - void UnpackLeaf(const char* buffer, TMyPair& pair) const; + void UnpackLeaf(const char* buffer, TMyPair& pair) const; void PackLeaf(char* buffer, const TMyPair& data, size_t size) const; size_t MeasureLeaf(const TMyPair& data) const; size_t SkipLeaf(const char* buffer) const; @@ -458,9 +458,9 @@ namespace NPackers { template <class T1, class T2, class TPacker1, class TPacker2> inline void TPairPacker<T1, T2, TPacker1, TPacker2>::UnpackLeaf(const char* buffer, std::pair<T1, T2>& pair) const { - TPacker1().UnpackLeaf(buffer, pair.first); + TPacker1().UnpackLeaf(buffer, pair.first); size_t size = TPacker1().SkipLeaf(buffer); - TPacker2().UnpackLeaf(buffer + size, pair.second); + TPacker2().UnpackLeaf(buffer + size, pair.second); } template <class T1, class T2, class TPacker1, class TPacker2> @@ -578,25 +578,25 @@ namespace NPackers { template <class T> class TPacker<std::vector<T>>: public TContainerPacker<std::vector<T>> { - }; - - template <class T> + }; + + template <class T> class TPacker<TVector<T>>: public TContainerPacker<TVector<T>> { }; template <class T> class TPacker<std::list<T>>: public TContainerPacker<std::list<T>> { - }; - - template <class T> + }; + + template <class T> class TPacker<TList<T>>: public TContainerPacker<TList<T>> { }; template <class T> class TPacker<std::set<T>>: public TContainerPacker<std::set<T>> { - }; - - template <class T> + }; + + template <class T> class TPacker<TSet<T>>: public TContainerPacker<TSet<T>> { }; diff --git a/library/cpp/packers/ut/packers_ut.cpp b/library/cpp/packers/ut/packers_ut.cpp index 6ccc581a9d..18ce2150d1 100644 --- a/library/cpp/packers/ut/packers_ut.cpp +++ b/library/cpp/packers/ut/packers_ut.cpp @@ -51,9 +51,9 @@ void TPackersTest::TestPacker(const TData& data) { TPacker().PackLeaf(buf.Get(), data, len); UNIT_ASSERT(TPacker().SkipLeaf(buf.Get()) == len); - - TData dataTmp; - TPacker().UnpackLeaf(buf.Get(), dataTmp); + + TData dataTmp; + TPacker().UnpackLeaf(buf.Get(), dataTmp); UNIT_ASSERT(data == dataTmp); } diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp index bb5c79634d..f788beb85f 100644 --- a/library/cpp/regex/pire/pcre2pire.cpp +++ b/library/cpp/regex/pire/pcre2pire.cpp @@ -1,110 +1,110 @@ -#include "pcre2pire.h" -#include <util/generic/vector.h> -#include <util/generic/yexception.h> - +#include "pcre2pire.h" +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + TString Pcre2Pire(const TString& src) { TVector<char> result; result.reserve(src.size() + 1); - - enum EState { - S_SIMPLE, - S_SLASH, - S_BRACE, - S_EXPECT_Q, - S_QUESTION, - S_P, - S_COMMA, - S_IN, - }; - - EState state = S_SIMPLE; - + + enum EState { + S_SIMPLE, + S_SLASH, + S_BRACE, + S_EXPECT_Q, + S_QUESTION, + S_P, + S_COMMA, + S_IN, + }; + + EState state = S_SIMPLE; + for (ui32 i = 0; i < src.size(); ++i) { - const char c = src[i]; - - switch (state) { - case S_SIMPLE: - if (c == '\\') { - state = S_SLASH; - } else if (c == '(') { - state = S_BRACE; - } else if (c == '*' || c == '?') { - state = S_EXPECT_Q; - result.push_back(c); - } else { + const char c = src[i]; + + switch (state) { + case S_SIMPLE: + if (c == '\\') { + state = S_SLASH; + } else if (c == '(') { + state = S_BRACE; + } else if (c == '*' || c == '?') { + state = S_EXPECT_Q; + result.push_back(c); + } else { if (c == ')' && result.size() > 0 && result.back() == '(') { - // eliminating "()" - result.pop_back(); - } else { - result.push_back(c); - } - } - break; - case S_SLASH: - state = S_SIMPLE; - if (c == ':' || c == '=' || c == '#' || c == '&') { - result.push_back(c); - } else { - result.push_back('\\'); - --i; - } - break; - case S_BRACE: - if (c == '?') { - state = S_QUESTION; - } else { - state = S_COMMA; - --i; - } - break; - case S_EXPECT_Q: - state = S_SIMPLE; - if (c != '?') { - --i; - } - break; - case S_QUESTION: - if (c == 'P') { - state = S_P; - } else if (c == ':' || c == '=') { - state = S_COMMA; - } else { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; - } - break; - case S_P: - if (c == '<') { - state = S_IN; - } else { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; - } - break; - case S_IN: - if (c == '>') { - state = S_COMMA; - } else { - // nothing to do - } - break; - case S_COMMA: - state = S_SIMPLE; - if (c == ')') { - // nothing to do - } else { - result.push_back('('); - --i; - } - break; - default: - ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!"; - } - } - - if (state != S_SIMPLE && state != S_EXPECT_Q) { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!"; - } - - result.push_back('\0'); - - return &result[0]; -} + // eliminating "()" + result.pop_back(); + } else { + result.push_back(c); + } + } + break; + case S_SLASH: + state = S_SIMPLE; + if (c == ':' || c == '=' || c == '#' || c == '&') { + result.push_back(c); + } else { + result.push_back('\\'); + --i; + } + break; + case S_BRACE: + if (c == '?') { + state = S_QUESTION; + } else { + state = S_COMMA; + --i; + } + break; + case S_EXPECT_Q: + state = S_SIMPLE; + if (c != '?') { + --i; + } + break; + case S_QUESTION: + if (c == 'P') { + state = S_P; + } else if (c == ':' || c == '=') { + state = S_COMMA; + } else { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; + } + break; + case S_P: + if (c == '<') { + state = S_IN; + } else { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; + } + break; + case S_IN: + if (c == '>') { + state = S_COMMA; + } else { + // nothing to do + } + break; + case S_COMMA: + state = S_SIMPLE; + if (c == ')') { + // nothing to do + } else { + result.push_back('('); + --i; + } + break; + default: + ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!"; + } + } + + if (state != S_SIMPLE && state != S_EXPECT_Q) { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!"; + } + + result.push_back('\0'); + + return &result[0]; +} diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h index 9a52e1c70f..46e45b9193 100644 --- a/library/cpp/regex/pire/pcre2pire.h +++ b/library/cpp/regex/pire/pcre2pire.h @@ -1,19 +1,19 @@ #pragma once -// Author: smikler@yandex-team.ru - +// Author: smikler@yandex-team.ru + #include <util/generic/string.h> - -/* Converts pcre regular expression to pire compatible format: - * - replaces "\\#" with "#" - * - replaces "\\=" with "=" - * - replaces "\\:" with ":" - * - removes "?P<...>" - * - removes "?:" - * - removes "()" recursively - * - replaces "??" with "?" - * - replaces "*?" with "*" - * NOTE: - * - Not fully tested! - */ + +/* Converts pcre regular expression to pire compatible format: + * - replaces "\\#" with "#" + * - replaces "\\=" with "=" + * - replaces "\\:" with ":" + * - removes "?P<...>" + * - removes "?:" + * - removes "()" recursively + * - replaces "??" with "?" + * - replaces "*?" with "*" + * NOTE: + * - Not fully tested! + */ TString Pcre2Pire(const TString& src); diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index 00211a6f3c..e7206de9ad 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -131,7 +131,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)")); } - + Y_UNIT_TEST(Capture5) { TCapturingFsm fsm("away\\.php\\?to=http:([^\"])+\""); TSearcher searcher(fsm); @@ -306,13 +306,13 @@ Y_UNIT_TEST_SUITE(TRegExp) { } Y_UNIT_TEST(Pcre2PireTest) { - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); - } + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); + } } diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make index cc42ecc7f9..c857e6d18b 100644 --- a/library/cpp/regex/pire/ya.make +++ b/library/cpp/regex/pire/ya.make @@ -12,7 +12,7 @@ CFLAGS(-DPIRE_NO_CONFIG) SRCDIR(contrib/libs/pire/pire) SRCS( - pcre2pire.cpp + pcre2pire.cpp classes.cpp encoding.cpp fsm.cpp diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp index 579d1e25be..85f4ac5d69 100644 --- a/library/cpp/string_utils/url/url.cpp +++ b/library/cpp/string_utils/url/url.cpp @@ -147,41 +147,41 @@ static inline TStringBuf GetHostAndPortImpl(const TStringBuf url) { } TStringBuf GetHost(const TStringBuf url) noexcept { - return GetHostAndPortImpl<false>(url); -} - + return GetHostAndPortImpl<false>(url); +} + TStringBuf GetHostAndPort(const TStringBuf url) noexcept { - return GetHostAndPortImpl<true>(url); -} - + return GetHostAndPortImpl<true>(url); +} + TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp, bool trimDefaultPort) noexcept { - const size_t schemeSize = GetSchemePrefixSize(url); - const TStringBuf scheme = url.Head(schemeSize); - + const size_t schemeSize = GetSchemePrefixSize(url); + const TStringBuf scheme = url.Head(schemeSize); + const bool isHttp = (schemeSize == 0 || scheme == TStringBuf("http://")); - - TStringBuf hostAndPort = GetHostAndPort(url.Tail(schemeSize)); - - if (trimDefaultPort) { - const size_t pos = hostAndPort.find(':'); - if (pos != TStringBuf::npos) { + + TStringBuf hostAndPort = GetHostAndPort(url.Tail(schemeSize)); + + if (trimDefaultPort) { + const size_t pos = hostAndPort.find(':'); + if (pos != TStringBuf::npos) { const bool isHttps = (scheme == TStringBuf("https://")); - - const TStringBuf port = hostAndPort.Tail(pos + 1); + + const TStringBuf port = hostAndPort.Tail(pos + 1); if ((isHttp && port == TStringBuf("80")) || (isHttps && port == TStringBuf("443"))) { - // trimming default port - hostAndPort = hostAndPort.Head(pos); - } - } - } - - if (isHttp && trimHttp) { - return hostAndPort; - } else { - return TStringBuf(scheme.begin(), hostAndPort.end()); - } -} - + // trimming default port + hostAndPort = hostAndPort.Head(pos); + } + } + } + + if (isHttp && trimHttp) { + return hostAndPort; + } else { + return TStringBuf(scheme.begin(), hostAndPort.end()); + } +} + void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path) { auto [hostBuf, pathBuf] = NUrl::SplitUrlToHostAndPath(url); host = hostBuf; @@ -240,14 +240,14 @@ TStringBuf GetOnlyHost(const TStringBuf url) noexcept { } TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment) noexcept { - const size_t off = url.find('/', GetHttpPrefixSize(url)); + const size_t off = url.find('/', GetHttpPrefixSize(url)); TStringBuf hostUnused, path; if (!url.TrySplitAt(off, hostUnused, path)) - return "/"; + return "/"; return trimFragment ? path.Before('#') : path; -} - +} + // this strange creature returns 2nd level domain, possibly with port TStringBuf GetDomain(const TStringBuf host) noexcept { const char* c = !host ? host.data() : host.end() - 1; diff --git a/library/cpp/string_utils/url/url.h b/library/cpp/string_utils/url/url.h index cf90abb8ff..84137ccc57 100644 --- a/library/cpp/string_utils/url/url.h +++ b/library/cpp/string_utils/url/url.h @@ -41,7 +41,7 @@ size_t GetSchemePrefixSize(const TStringBuf url) noexcept; Y_PURE_FUNCTION TStringBuf GetSchemePrefix(const TStringBuf url) noexcept; - + //! removes protocol prefixes 'http://' and 'https://' from given URL //! @note if URL has no prefix or some other prefix the function does nothing //! @param url URL from which the prefix should be removed @@ -122,7 +122,7 @@ bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBu * @throws yexception if present port number cannot be parsed into ui16. */ void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port); - + Y_PURE_FUNCTION TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment = true) noexcept; /** diff --git a/library/cpp/string_utils/url/url_ut.cpp b/library/cpp/string_utils/url/url_ut.cpp index 64988471ea..1588013893 100644 --- a/library/cpp/string_utils/url/url_ut.cpp +++ b/library/cpp/string_utils/url/url_ut.cpp @@ -7,38 +7,38 @@ Y_UNIT_TEST_SUITE(TUtilUrlTest) { Y_UNIT_TEST(TestGetHostAndGetHostAndPort) { UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe")); - UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru")); - UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080")); - UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe:8080")); - UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe:8080")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080/bebe")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("https://ya.ru:8080/bebe")); UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("www.ya.ru:8080/bebe")); UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("https://www.ya.ru:8080/bebe")); - UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080/bebe")); // irl RFC3986 sometimes gets ignored UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHost("pravda-kmv.ru?page=news&id=6973")); - UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHostAndPort("pravda-kmv.ru?page=news&id=6973")); + UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHostAndPort("pravda-kmv.ru?page=news&id=6973")); // check simple string UNIT_ASSERT_VALUES_EQUAL("some_blender_url", GetHost("some_blender_url")); UNIT_ASSERT_VALUES_EQUAL("", GetHost("")); } Y_UNIT_TEST(TestGetPathAndQuery) { - UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org")); - UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/")); - UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org:8080")); - UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("ru.wikipedia.org/index.php?123/")); - UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("http://ru.wikipedia.org:8080")); - UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("https://ru.wikipedia.org/index.php?123/")); - UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/#comment")); - UNIT_ASSERT_VALUES_EQUAL("/?1", GetPathAndQuery("ru.wikipedia.org/?1#comment")); - UNIT_ASSERT_VALUES_EQUAL("/?1#comment", GetPathAndQuery("ru.wikipedia.org/?1#comment", false)); - } - + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org:8080")); + UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("ru.wikipedia.org/index.php?123/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("http://ru.wikipedia.org:8080")); + UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("https://ru.wikipedia.org/index.php?123/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/#comment")); + UNIT_ASSERT_VALUES_EQUAL("/?1", GetPathAndQuery("ru.wikipedia.org/?1#comment")); + UNIT_ASSERT_VALUES_EQUAL("/?1#comment", GetPathAndQuery("ru.wikipedia.org/?1#comment", false)); + } + Y_UNIT_TEST(TestGetDomain) { UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("www.ya.ru")); UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru")); |