diff options
author | Dmitry Potapov <potapov.d@gmail.com> | 2022-02-10 16:46:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:39 +0300 |
commit | 7aa4cf700385ff96999c5cc301171ff157974773 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/regex/pire/regexp.h | |
parent | 536101ea75c9ff5df10d01c2f460b1f6e12311b3 (diff) | |
download | ydb-7aa4cf700385ff96999c5cc301171ff157974773.tar.gz |
Restoring authorship annotation for Dmitry Potapov <potapov.d@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/pire/regexp.h')
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 308 |
1 files changed, 154 insertions, 154 deletions
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index a517b47b71..94bba4064b 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -5,36 +5,36 @@ #include <library/cpp/charset/doccodes.h> #include <library/cpp/charset/recyr.hh> #include <util/generic/maybe.h> -#include <util/generic/strbuf.h> +#include <util/generic/strbuf.h> #include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/generic/yexception.h> - +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + namespace NRegExp { struct TMatcher; - struct TFsmBase { - struct TOptions { + struct TFsmBase { + struct TOptions { inline TOptions& SetCaseInsensitive(bool v) noexcept { - CaseInsensitive = v; - return *this; - } + CaseInsensitive = v; + return *this; + } inline TOptions& SetSurround(bool v) noexcept { - Surround = v; - return *this; - } + Surround = v; + return *this; + } inline TOptions& SetCapture(size_t pos) noexcept { - CapturePos = pos; - return *this; + CapturePos = pos; + return *this; } inline TOptions& SetCharset(ECharset charset) noexcept { - Charset = charset; - return *this; - } - + Charset = charset; + return *this; + } + inline TOptions& SetAndNotSupport(bool andNotSupport) noexcept { AndNotSupport = andNotSupport; return *this; @@ -45,14 +45,14 @@ namespace NRegExp { TMaybe<size_t> CapturePos; ECharset Charset = CODES_UNKNOWN; bool AndNotSupport = false; - }; - + }; + static inline NPire::TFsm Parse(const TStringBuf& regexp, const TOptions& opts, const bool needDetermine = true) { - NPire::TLexer lexer; - if (opts.Charset == CODES_UNKNOWN) { + NPire::TLexer lexer; + if (opts.Charset == CODES_UNKNOWN) { lexer.Assign(regexp.data(), regexp.data() + regexp.size()); - } else { + } else { TVector<wchar32> ucs4(regexp.size() + 1); size_t inRead = 0; size_t outWritten = 0; @@ -61,13 +61,13 @@ namespace NRegExp { Y_ASSERT(recodeRes == RECODE_OK); Y_ASSERT(outWritten < ucs4.size()); ucs4[outWritten] = 0; - - lexer.Assign(ucs4.begin(), + + lexer.Assign(ucs4.begin(), ucs4.begin() + std::char_traits<wchar32>::length(ucs4.data())); - } - - if (opts.CaseInsensitive) { - lexer.AddFeature(NPire::NFeatures::CaseInsensitive()); + } + + if (opts.CaseInsensitive) { + lexer.AddFeature(NPire::NFeatures::CaseInsensitive()); } if (opts.CapturePos) { @@ -78,7 +78,7 @@ namespace NRegExp { lexer.AddFeature(NPire::NFeatures::AndNotSupport()); } - switch (opts.Charset) { + switch (opts.Charset) { case CODES_UNKNOWN: break; case CODES_UTF8: @@ -90,76 +90,76 @@ namespace NRegExp { default: lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset)); break; - } - - NPire::TFsm ret = lexer.Parse(); - - if (opts.Surround) { - ret.Surround(); + } + + NPire::TFsm ret = lexer.Parse(); + + if (opts.Surround) { + ret.Surround(); } if (needDetermine) { ret.Determine(); } - return ret; - } - }; + return ret; + } + }; - template <class TScannerType> - class TFsmParser: public TFsmBase { - public: - typedef TScannerType TScanner; + template <class TScannerType> + class TFsmParser: public TFsmBase { + public: + typedef TScannerType TScanner; - public: + public: inline explicit TFsmParser(const TStringBuf& regexp, const TOptions& opts = TOptions(), bool needDetermine = true) : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) - { - } + { + } inline const TScanner& GetScanner() const noexcept { - return Scanner; - } - - static inline TFsmParser False() { - return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>()); - } - - inline explicit TFsmParser(const TScanner& compiled) - : Scanner(compiled) - { - if (Scanner.Empty()) - ythrow yexception() << "Can't create fsm with empty scanner"; - } - - private: - TScanner Scanner; + return Scanner; + } + + static inline TFsmParser False() { + return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>()); + } + + inline explicit TFsmParser(const TScanner& compiled) + : Scanner(compiled) + { + if (Scanner.Empty()) + ythrow yexception() << "Can't create fsm with empty scanner"; + } + + private: + TScanner Scanner; }; - class TFsm: public TFsmParser<NPire::TNonrelocScanner> { - public: + class TFsm: public TFsmParser<NPire::TNonrelocScanner> { + public: inline explicit TFsm(const TStringBuf& regexp, const TOptions& opts = TOptions()) - : TFsmParser<TScanner>(regexp, opts) - { - } - - inline TFsm(const TFsmParser<TScanner>& fsm) - : TFsmParser<TScanner>(fsm) - { - } - + : TFsmParser<TScanner>(regexp, opts) + { + } + + inline TFsm(const TFsmParser<TScanner>& fsm) + : TFsmParser<TScanner>(fsm) + { + } + static inline TFsm Glue(const TFsm& l, const TFsm& r) { - return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner())); - } - - inline explicit TFsm(const TScanner& compiled) - : TFsmParser<TScanner>(compiled) - { - } - }; - + return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner())); + } + + inline explicit TFsm(const TScanner& compiled) + : TFsmParser<TScanner>(compiled) + { + } + }; + static inline TFsm operator|(const TFsm& l, const TFsm& r) { return TFsm::Glue(l, r); } @@ -167,15 +167,15 @@ namespace NRegExp { struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> { inline explicit TCapturingFsm(const TStringBuf& regexp, TOptions opts = TOptions()) - : TFsmParser<TScanner>(regexp, + : TFsmParser<TScanner>(regexp, opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) { - } + } - inline TCapturingFsm(const TFsmParser<TScanner>& fsm) - : TFsmParser<TScanner>(fsm) - { - } - }; + inline TCapturingFsm(const TFsmParser<TScanner>& fsm) + : TFsmParser<TScanner>(fsm) + { + } + }; struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { inline explicit TSlowCapturingFsm(const TStringBuf& regexp, @@ -190,101 +190,101 @@ namespace NRegExp { } }; - template <class TFsm> - class TMatcherBase { - public: - typedef typename TFsm::TScanner::State TState; + template <class TFsm> + class TMatcherBase { + public: + typedef typename TFsm::TScanner::State TState; - public: - inline explicit TMatcherBase(const TFsm& fsm) - : Fsm(fsm) - { - Fsm.GetScanner().Initialize(State); - } + public: + inline explicit TMatcherBase(const TFsm& fsm) + : Fsm(fsm) + { + Fsm.GetScanner().Initialize(State); + } inline bool Final() const noexcept { - return GetScanner().Final(GetState()); - } + return GetScanner().Final(GetState()); + } - protected: + protected: inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept { if (addBegin) { - NPire::Step(GetScanner(), State, NPire::BeginMark); + NPire::Step(GetScanner(), State, NPire::BeginMark); } - NPire::Run(GetScanner(), State, data, data + len); + NPire::Run(GetScanner(), State, data, data + len); if (addEnd) { - NPire::Step(GetScanner(), State, NPire::EndMark); - } - } + NPire::Step(GetScanner(), State, NPire::EndMark); + } + } inline const typename TFsm::TScanner& GetScanner() const noexcept { - return Fsm.GetScanner(); - } + return Fsm.GetScanner(); + } inline const TState& GetState() const noexcept { - return State; - } + return State; + } - private: - const TFsm& Fsm; - TState State; + private: + const TFsm& Fsm; + TState State; }; - + struct TMatcher : TMatcherBase<TFsm> { - inline explicit TMatcher(const TFsm& fsm) - : TMatcherBase<TFsm>(fsm) - { - } - + inline explicit TMatcher(const TFsm& fsm) + : TMatcherBase<TFsm>(fsm) + { + } + inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { Run(data, len, addBegin, addEnd); - return *this; - } - + return *this; + } + inline TMatcher& Match(const TStringBuf& s, bool addBegin = false, bool addEnd = false) noexcept { return Match(s.data(), s.size(), addBegin, addEnd); - } - + } + inline const char* Find(const char* b, const char* e) noexcept { - return NPire::ShortestPrefix(GetScanner(), b, e); - } - + return NPire::ShortestPrefix(GetScanner(), b, e); + } + typedef std::pair<const size_t*, const size_t*> TMatchedRegexps; - + inline TMatchedRegexps MatchedRegexps() const noexcept { - return GetScanner().AcceptedRegexps(GetState()); - } - }; - - class TSearcher: public TMatcherBase<TCapturingFsm> { - public: - inline explicit TSearcher(const TCapturingFsm& fsm) - : TMatcherBase<TCapturingFsm>(fsm) - { - } - + return GetScanner().AcceptedRegexps(GetState()); + } + }; + + class TSearcher: public TMatcherBase<TCapturingFsm> { + public: + inline explicit TSearcher(const TCapturingFsm& fsm) + : TMatcherBase<TCapturingFsm>(fsm) + { + } + inline bool Captured() const noexcept { - return GetState().Captured(); - } - + return GetState().Captured(); + } + inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept { - Data = TStringBuf(data, len); + Data = TStringBuf(data, len); Run(data, len, addBegin, addEnd); - return *this; - } - + return *this; + } + inline TSearcher& Search(const TStringBuf& s) noexcept { return Search(s.data(), s.size()); - } - + } + inline TStringBuf GetCaptured() const noexcept { return TStringBuf(Data.data() + GetState().Begin() - 1, Data.data() + GetState().End() - 1); - } - - private: - TStringBuf Data; - }; + } + + private: + TStringBuf Data; + }; class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ public: |