aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/regexp.h
diff options
context:
space:
mode:
authorDmitry Potapov <potapov.d@gmail.com>2022-02-10 16:46:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:39 +0300
commit7aa4cf700385ff96999c5cc301171ff157974773 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/regex/pire/regexp.h
parent536101ea75c9ff5df10d01c2f460b1f6e12311b3 (diff)
downloadydb-7aa4cf700385ff96999c5cc301171ff157974773.tar.gz
Restoring authorship annotation for Dmitry Potapov <potapov.d@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/pire/regexp.h')
-rw-r--r--library/cpp/regex/pire/regexp.h308
1 files changed, 154 insertions, 154 deletions
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index a517b47b71..94bba4064b 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -5,36 +5,36 @@
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/recyr.hh>
#include <util/generic/maybe.h>
-#include <util/generic/strbuf.h>
+#include <util/generic/strbuf.h>
#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/generic/yexception.h>
-
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+
namespace NRegExp {
struct TMatcher;
- struct TFsmBase {
- struct TOptions {
+ struct TFsmBase {
+ struct TOptions {
inline TOptions& SetCaseInsensitive(bool v) noexcept {
- CaseInsensitive = v;
- return *this;
- }
+ CaseInsensitive = v;
+ return *this;
+ }
inline TOptions& SetSurround(bool v) noexcept {
- Surround = v;
- return *this;
- }
+ Surround = v;
+ return *this;
+ }
inline TOptions& SetCapture(size_t pos) noexcept {
- CapturePos = pos;
- return *this;
+ CapturePos = pos;
+ return *this;
}
inline TOptions& SetCharset(ECharset charset) noexcept {
- Charset = charset;
- return *this;
- }
-
+ Charset = charset;
+ return *this;
+ }
+
inline TOptions& SetAndNotSupport(bool andNotSupport) noexcept {
AndNotSupport = andNotSupport;
return *this;
@@ -45,14 +45,14 @@ namespace NRegExp {
TMaybe<size_t> CapturePos;
ECharset Charset = CODES_UNKNOWN;
bool AndNotSupport = false;
- };
-
+ };
+
static inline NPire::TFsm Parse(const TStringBuf& regexp,
const TOptions& opts, const bool needDetermine = true) {
- NPire::TLexer lexer;
- if (opts.Charset == CODES_UNKNOWN) {
+ NPire::TLexer lexer;
+ if (opts.Charset == CODES_UNKNOWN) {
lexer.Assign(regexp.data(), regexp.data() + regexp.size());
- } else {
+ } else {
TVector<wchar32> ucs4(regexp.size() + 1);
size_t inRead = 0;
size_t outWritten = 0;
@@ -61,13 +61,13 @@ namespace NRegExp {
Y_ASSERT(recodeRes == RECODE_OK);
Y_ASSERT(outWritten < ucs4.size());
ucs4[outWritten] = 0;
-
- lexer.Assign(ucs4.begin(),
+
+ lexer.Assign(ucs4.begin(),
ucs4.begin() + std::char_traits<wchar32>::length(ucs4.data()));
- }
-
- if (opts.CaseInsensitive) {
- lexer.AddFeature(NPire::NFeatures::CaseInsensitive());
+ }
+
+ if (opts.CaseInsensitive) {
+ lexer.AddFeature(NPire::NFeatures::CaseInsensitive());
}
if (opts.CapturePos) {
@@ -78,7 +78,7 @@ namespace NRegExp {
lexer.AddFeature(NPire::NFeatures::AndNotSupport());
}
- switch (opts.Charset) {
+ switch (opts.Charset) {
case CODES_UNKNOWN:
break;
case CODES_UTF8:
@@ -90,76 +90,76 @@ namespace NRegExp {
default:
lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset));
break;
- }
-
- NPire::TFsm ret = lexer.Parse();
-
- if (opts.Surround) {
- ret.Surround();
+ }
+
+ NPire::TFsm ret = lexer.Parse();
+
+ if (opts.Surround) {
+ ret.Surround();
}
if (needDetermine) {
ret.Determine();
}
- return ret;
- }
- };
+ return ret;
+ }
+ };
- template <class TScannerType>
- class TFsmParser: public TFsmBase {
- public:
- typedef TScannerType TScanner;
+ template <class TScannerType>
+ class TFsmParser: public TFsmBase {
+ public:
+ typedef TScannerType TScanner;
- public:
+ public:
inline explicit TFsmParser(const TStringBuf& regexp,
const TOptions& opts = TOptions(), bool needDetermine = true)
: Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
- {
- }
+ {
+ }
inline const TScanner& GetScanner() const noexcept {
- return Scanner;
- }
-
- static inline TFsmParser False() {
- return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>());
- }
-
- inline explicit TFsmParser(const TScanner& compiled)
- : Scanner(compiled)
- {
- if (Scanner.Empty())
- ythrow yexception() << "Can't create fsm with empty scanner";
- }
-
- private:
- TScanner Scanner;
+ return Scanner;
+ }
+
+ static inline TFsmParser False() {
+ return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>());
+ }
+
+ inline explicit TFsmParser(const TScanner& compiled)
+ : Scanner(compiled)
+ {
+ if (Scanner.Empty())
+ ythrow yexception() << "Can't create fsm with empty scanner";
+ }
+
+ private:
+ TScanner Scanner;
};
- class TFsm: public TFsmParser<NPire::TNonrelocScanner> {
- public:
+ class TFsm: public TFsmParser<NPire::TNonrelocScanner> {
+ public:
inline explicit TFsm(const TStringBuf& regexp,
const TOptions& opts = TOptions())
- : TFsmParser<TScanner>(regexp, opts)
- {
- }
-
- inline TFsm(const TFsmParser<TScanner>& fsm)
- : TFsmParser<TScanner>(fsm)
- {
- }
-
+ : TFsmParser<TScanner>(regexp, opts)
+ {
+ }
+
+ inline TFsm(const TFsmParser<TScanner>& fsm)
+ : TFsmParser<TScanner>(fsm)
+ {
+ }
+
static inline TFsm Glue(const TFsm& l, const TFsm& r) {
- return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner()));
- }
-
- inline explicit TFsm(const TScanner& compiled)
- : TFsmParser<TScanner>(compiled)
- {
- }
- };
-
+ return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner()));
+ }
+
+ inline explicit TFsm(const TScanner& compiled)
+ : TFsmParser<TScanner>(compiled)
+ {
+ }
+ };
+
static inline TFsm operator|(const TFsm& l, const TFsm& r) {
return TFsm::Glue(l, r);
}
@@ -167,15 +167,15 @@ namespace NRegExp {
struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> {
inline explicit TCapturingFsm(const TStringBuf& regexp,
TOptions opts = TOptions())
- : TFsmParser<TScanner>(regexp,
+ : TFsmParser<TScanner>(regexp,
opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) {
- }
+ }
- inline TCapturingFsm(const TFsmParser<TScanner>& fsm)
- : TFsmParser<TScanner>(fsm)
- {
- }
- };
+ inline TCapturingFsm(const TFsmParser<TScanner>& fsm)
+ : TFsmParser<TScanner>(fsm)
+ {
+ }
+ };
struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
inline explicit TSlowCapturingFsm(const TStringBuf& regexp,
@@ -190,101 +190,101 @@ namespace NRegExp {
}
};
- template <class TFsm>
- class TMatcherBase {
- public:
- typedef typename TFsm::TScanner::State TState;
+ template <class TFsm>
+ class TMatcherBase {
+ public:
+ typedef typename TFsm::TScanner::State TState;
- public:
- inline explicit TMatcherBase(const TFsm& fsm)
- : Fsm(fsm)
- {
- Fsm.GetScanner().Initialize(State);
- }
+ public:
+ inline explicit TMatcherBase(const TFsm& fsm)
+ : Fsm(fsm)
+ {
+ Fsm.GetScanner().Initialize(State);
+ }
inline bool Final() const noexcept {
- return GetScanner().Final(GetState());
- }
+ return GetScanner().Final(GetState());
+ }
- protected:
+ protected:
inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept {
if (addBegin) {
- NPire::Step(GetScanner(), State, NPire::BeginMark);
+ NPire::Step(GetScanner(), State, NPire::BeginMark);
}
- NPire::Run(GetScanner(), State, data, data + len);
+ NPire::Run(GetScanner(), State, data, data + len);
if (addEnd) {
- NPire::Step(GetScanner(), State, NPire::EndMark);
- }
- }
+ NPire::Step(GetScanner(), State, NPire::EndMark);
+ }
+ }
inline const typename TFsm::TScanner& GetScanner() const noexcept {
- return Fsm.GetScanner();
- }
+ return Fsm.GetScanner();
+ }
inline const TState& GetState() const noexcept {
- return State;
- }
+ return State;
+ }
- private:
- const TFsm& Fsm;
- TState State;
+ private:
+ const TFsm& Fsm;
+ TState State;
};
-
+
struct TMatcher : TMatcherBase<TFsm> {
- inline explicit TMatcher(const TFsm& fsm)
- : TMatcherBase<TFsm>(fsm)
- {
- }
-
+ inline explicit TMatcher(const TFsm& fsm)
+ : TMatcherBase<TFsm>(fsm)
+ {
+ }
+
inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
Run(data, len, addBegin, addEnd);
- return *this;
- }
-
+ return *this;
+ }
+
inline TMatcher& Match(const TStringBuf& s, bool addBegin = false, bool addEnd = false) noexcept {
return Match(s.data(), s.size(), addBegin, addEnd);
- }
-
+ }
+
inline const char* Find(const char* b, const char* e) noexcept {
- return NPire::ShortestPrefix(GetScanner(), b, e);
- }
-
+ return NPire::ShortestPrefix(GetScanner(), b, e);
+ }
+
typedef std::pair<const size_t*, const size_t*> TMatchedRegexps;
-
+
inline TMatchedRegexps MatchedRegexps() const noexcept {
- return GetScanner().AcceptedRegexps(GetState());
- }
- };
-
- class TSearcher: public TMatcherBase<TCapturingFsm> {
- public:
- inline explicit TSearcher(const TCapturingFsm& fsm)
- : TMatcherBase<TCapturingFsm>(fsm)
- {
- }
-
+ return GetScanner().AcceptedRegexps(GetState());
+ }
+ };
+
+ class TSearcher: public TMatcherBase<TCapturingFsm> {
+ public:
+ inline explicit TSearcher(const TCapturingFsm& fsm)
+ : TMatcherBase<TCapturingFsm>(fsm)
+ {
+ }
+
inline bool Captured() const noexcept {
- return GetState().Captured();
- }
-
+ return GetState().Captured();
+ }
+
inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept {
- Data = TStringBuf(data, len);
+ Data = TStringBuf(data, len);
Run(data, len, addBegin, addEnd);
- return *this;
- }
-
+ return *this;
+ }
+
inline TSearcher& Search(const TStringBuf& s) noexcept {
return Search(s.data(), s.size());
- }
-
+ }
+
inline TStringBuf GetCaptured() const noexcept {
return TStringBuf(Data.data() + GetState().Begin() - 1,
Data.data() + GetState().End() - 1);
- }
-
- private:
- TStringBuf Data;
- };
+ }
+
+ private:
+ TStringBuf Data;
+ };
class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{
public: