aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/regex/pire
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/pire')
-rw-r--r--library/cpp/regex/pire/extraencodings.cpp116
-rw-r--r--library/cpp/regex/pire/inline/ya.make4
-rw-r--r--library/cpp/regex/pire/pcre2pire.cpp2
-rw-r--r--library/cpp/regex/pire/pcre2pire.h4
-rw-r--r--library/cpp/regex/pire/pire.h14
-rw-r--r--library/cpp/regex/pire/regexp.h110
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp36
-rw-r--r--library/cpp/regex/pire/ut/ya.make14
-rw-r--r--library/cpp/regex/pire/ya.make2
9 files changed, 151 insertions, 151 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp
index 8645d6cd4f..2e507e4b67 100644
--- a/library/cpp/regex/pire/extraencodings.cpp
+++ b/library/cpp/regex/pire/extraencodings.cpp
@@ -8,73 +8,73 @@
#include "pire.h"
namespace NPire {
- namespace {
- // A one-byte encoding which is capable of transforming upper half of the character
- // table to/from Unicode chars.
- class TOneByte: public TEncoding {
- public:
- TOneByte(ECharset doccode) {
- Table_ = CodePageByCharset(doccode)->unicode;
- for (size_t i = 0; i < 256; ++i)
- Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
- }
+ namespace {
+ // A one-byte encoding which is capable of transforming upper half of the character
+ // table to/from Unicode chars.
+ class TOneByte: public TEncoding {
+ public:
+ TOneByte(ECharset doccode) {
+ Table_ = CodePageByCharset(doccode)->unicode;
+ for (size_t i = 0; i < 256; ++i)
+ Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
+ }
- wchar32 FromLocal(const char*& begin, const char* end) const override {
- if (begin != end)
- return Table_[static_cast<unsigned char>(*begin++)];
- else
- ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
- }
+ wchar32 FromLocal(const char*& begin, const char* end) const override {
+ if (begin != end)
+ return Table_[static_cast<unsigned char>(*begin++)];
+ else
+ ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
+ }
- TString ToLocal(wchar32 c) const override {
- THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
- if (i != Reverse_.end())
- return TString(1, i->second);
- else
- return TString();
- }
+ TString ToLocal(wchar32 c) const override {
+ THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
+ if (i != Reverse_.end())
+ return TString(1, i->second);
+ else
+ return TString();
+ }
- void AppendDot(TFsm& fsm) const override {
- fsm.AppendDot();
- }
+ void AppendDot(TFsm& fsm) const override {
+ fsm.AppendDot();
+ }
- private:
- const wchar32* Table_;
- THashMap<wchar32, char> Reverse_;
- };
+ private:
+ const wchar32* Table_;
+ THashMap<wchar32, char> Reverse_;
+ };
- template <unsigned N>
- struct TOneByteHelper: public TOneByte {
- inline TOneByteHelper()
- : TOneByte((ECharset)N)
- {
- }
- };
- }
+ template <unsigned N>
+ struct TOneByteHelper: public TOneByte {
+ inline TOneByteHelper()
+ : TOneByte((ECharset)N)
+ {
+ }
+ };
+ }
- namespace NEncodings {
- const NPire::TEncoding& Koi8r() {
- return *Singleton<TOneByteHelper<CODES_KOI8>>();
- }
+ namespace NEncodings {
+ const NPire::TEncoding& Koi8r() {
+ return *Singleton<TOneByteHelper<CODES_KOI8>>();
+ }
- const NPire::TEncoding& Cp1251() {
- return *Singleton<TOneByteHelper<CODES_WIN>>();
+ const NPire::TEncoding& Cp1251() {
+ return *Singleton<TOneByteHelper<CODES_WIN>>();
}
- const NPire::TEncoding& Get(ECharset encoding) {
- switch (encoding) {
- case CODES_WIN:
- return Cp1251();
- case CODES_KOI8:
- return Koi8r();
- case CODES_ASCII:
- return NPire::NEncodings::Latin1();
- case CODES_UTF8:
- return NPire::NEncodings::Utf8();
- default:
- ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
- }
- }
+ const NPire::TEncoding& Get(ECharset encoding) {
+ switch (encoding) {
+ case CODES_WIN:
+ return Cp1251();
+ case CODES_KOI8:
+ return Koi8r();
+ case CODES_ASCII:
+ return NPire::NEncodings::Latin1();
+ case CODES_UTF8:
+ return NPire::NEncodings::Utf8();
+ default:
+ ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
+ }
+ }
}
diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make
index 5a83468746..d4850f7b45 100644
--- a/library/cpp/regex/pire/inline/ya.make
+++ b/library/cpp/regex/pire/inline/ya.make
@@ -6,8 +6,8 @@ OWNER(
g:util
davenger
)
-
-PEERDIR(
+
+PEERDIR(
ADDINCL library/cpp/regex/pire
)
diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp
index 498a8abc25..f788beb85f 100644
--- a/library/cpp/regex/pire/pcre2pire.cpp
+++ b/library/cpp/regex/pire/pcre2pire.cpp
@@ -2,7 +2,7 @@
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
-TString Pcre2Pire(const TString& src) {
+TString Pcre2Pire(const TString& src) {
TVector<char> result;
result.reserve(src.size() + 1);
diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h
index b4d3b34205..46e45b9193 100644
--- a/library/cpp/regex/pire/pcre2pire.h
+++ b/library/cpp/regex/pire/pcre2pire.h
@@ -1,5 +1,5 @@
-#pragma once
-
+#pragma once
+
// Author: smikler@yandex-team.ru
#include <util/generic/string.h>
diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h
index 148301f39d..286fecd693 100644
--- a/library/cpp/regex/pire/pire.h
+++ b/library/cpp/regex/pire/pire.h
@@ -41,9 +41,9 @@ namespace NPire {
using TError = Pire::Error;
// Helper functions
- using Pire::LongestPrefix;
- using Pire::LongestSuffix;
- using Pire::Matches;
+ using Pire::LongestPrefix;
+ using Pire::LongestSuffix;
+ using Pire::Matches;
using Pire::MmappedScanner;
using Pire::Run;
using Pire::Runner;
@@ -55,8 +55,8 @@ namespace NPire {
using namespace Pire::Consts;
namespace NFeatures {
- using Pire::Features::AndNotSupport;
- using Pire::Features::Capture;
+ using Pire::Features::AndNotSupport;
+ using Pire::Features::Capture;
using Pire::Features::CaseInsensitive;
using Pire::Features::GlueSimilarGlyphs;
}
@@ -65,8 +65,8 @@ namespace NPire {
using Pire::Encodings::Latin1;
using Pire::Encodings::Utf8;
- const NPire::TEncoding& Koi8r();
- const NPire::TEncoding& Cp1251();
+ const NPire::TEncoding& Koi8r();
+ const NPire::TEncoding& Cp1251();
const NPire::TEncoding& Get(ECharset encoding);
}
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index d5424e359a..94bba4064b 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -1,7 +1,7 @@
#pragma once
-
-#include "pire.h"
-
+
+#include "pire.h"
+
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/recyr.hh>
#include <util/generic/maybe.h>
@@ -10,26 +10,26 @@
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
-namespace NRegExp {
+namespace NRegExp {
struct TMatcher;
-
+
struct TFsmBase {
struct TOptions {
inline TOptions& SetCaseInsensitive(bool v) noexcept {
CaseInsensitive = v;
return *this;
}
-
+
inline TOptions& SetSurround(bool v) noexcept {
Surround = v;
return *this;
}
-
+
inline TOptions& SetCapture(size_t pos) noexcept {
CapturePos = pos;
return *this;
- }
-
+ }
+
inline TOptions& SetCharset(ECharset charset) noexcept {
Charset = charset;
return *this;
@@ -68,64 +68,64 @@ namespace NRegExp {
if (opts.CaseInsensitive) {
lexer.AddFeature(NPire::NFeatures::CaseInsensitive());
- }
-
+ }
+
if (opts.CapturePos) {
lexer.AddFeature(NPire::NFeatures::Capture(*opts.CapturePos));
- }
-
+ }
+
if (opts.AndNotSupport) {
lexer.AddFeature(NPire::NFeatures::AndNotSupport());
}
switch (opts.Charset) {
- case CODES_UNKNOWN:
- break;
- case CODES_UTF8:
- lexer.SetEncoding(NPire::NEncodings::Utf8());
- break;
- case CODES_KOI8:
- lexer.SetEncoding(NPire::NEncodings::Koi8r());
- break;
- default:
- lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset));
- break;
+ case CODES_UNKNOWN:
+ break;
+ case CODES_UTF8:
+ lexer.SetEncoding(NPire::NEncodings::Utf8());
+ break;
+ case CODES_KOI8:
+ lexer.SetEncoding(NPire::NEncodings::Koi8r());
+ break;
+ default:
+ lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset));
+ break;
}
NPire::TFsm ret = lexer.Parse();
if (opts.Surround) {
ret.Surround();
- }
-
+ }
+
if (needDetermine) {
ret.Determine();
}
-
+
return ret;
}
};
-
+
template <class TScannerType>
class TFsmParser: public TFsmBase {
public:
typedef TScannerType TScanner;
-
+
public:
inline explicit TFsmParser(const TStringBuf& regexp,
const TOptions& opts = TOptions(), bool needDetermine = true)
: Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
{
}
-
+
inline const TScanner& GetScanner() const noexcept {
return Scanner;
}
-
+
static inline TFsmParser False() {
return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>());
}
-
+
inline explicit TFsmParser(const TScanner& compiled)
: Scanner(compiled)
{
@@ -135,12 +135,12 @@ namespace NRegExp {
private:
TScanner Scanner;
- };
-
+ };
+
class TFsm: public TFsmParser<NPire::TNonrelocScanner> {
public:
inline explicit TFsm(const TStringBuf& regexp,
- const TOptions& opts = TOptions())
+ const TOptions& opts = TOptions())
: TFsmParser<TScanner>(regexp, opts)
{
}
@@ -150,7 +150,7 @@ namespace NRegExp {
{
}
- static inline TFsm Glue(const TFsm& l, const TFsm& r) {
+ static inline TFsm Glue(const TFsm& l, const TFsm& r) {
return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner()));
}
@@ -160,23 +160,23 @@ namespace NRegExp {
}
};
- static inline TFsm operator|(const TFsm& l, const TFsm& r) {
- return TFsm::Glue(l, r);
- }
-
- struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> {
+ static inline TFsm operator|(const TFsm& l, const TFsm& r) {
+ return TFsm::Glue(l, r);
+ }
+
+ struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> {
inline explicit TCapturingFsm(const TStringBuf& regexp,
- TOptions opts = TOptions())
+ TOptions opts = TOptions())
: TFsmParser<TScanner>(regexp,
- opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) {
+ opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) {
}
-
+
inline TCapturingFsm(const TFsmParser<TScanner>& fsm)
: TFsmParser<TScanner>(fsm)
{
}
};
-
+
struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
inline explicit TSlowCapturingFsm(const TStringBuf& regexp,
TOptions opts = TOptions())
@@ -194,43 +194,43 @@ namespace NRegExp {
class TMatcherBase {
public:
typedef typename TFsm::TScanner::State TState;
-
+
public:
inline explicit TMatcherBase(const TFsm& fsm)
: Fsm(fsm)
{
Fsm.GetScanner().Initialize(State);
}
-
+
inline bool Final() const noexcept {
return GetScanner().Final(GetState());
}
-
+
protected:
inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept {
if (addBegin) {
NPire::Step(GetScanner(), State, NPire::BeginMark);
- }
+ }
NPire::Run(GetScanner(), State, data, data + len);
if (addEnd) {
NPire::Step(GetScanner(), State, NPire::EndMark);
}
}
-
+
inline const typename TFsm::TScanner& GetScanner() const noexcept {
return Fsm.GetScanner();
}
-
+
inline const TState& GetState() const noexcept {
return State;
}
-
+
private:
const TFsm& Fsm;
TState State;
- };
+ };
- struct TMatcher : TMatcherBase<TFsm> {
+ struct TMatcher : TMatcherBase<TFsm> {
inline explicit TMatcher(const TFsm& fsm)
: TMatcherBase<TFsm>(fsm)
{
@@ -334,4 +334,4 @@ namespace NRegExp {
return *this;
}
};
-}
+}
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index 294bc65fa7..e7206de9ad 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -1,21 +1,21 @@
#include <library/cpp/testing/unittest/registar.h>
-
+
#include <library/cpp/regex/pire/regexp.h>
#include <library/cpp/regex/pire/pcre2pire.h>
-
+
Y_UNIT_TEST_SUITE(TRegExp) {
- using namespace NRegExp;
-
+ using namespace NRegExp;
+
Y_UNIT_TEST(False) {
- UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final());
+ UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final());
UNIT_ASSERT(!TMatcher(TFsm::False()).Match(TStringBuf{}).Final());
- }
-
+ }
+
Y_UNIT_TEST(Surround) {
- UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
- UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final());
- }
-
+ UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final());
+ }
+
Y_UNIT_TEST(Boundaries) {
UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
@@ -29,13 +29,13 @@ Y_UNIT_TEST_SUITE(TRegExp) {
.Match(TStringBuf("q"), false, false)
.Match(TStringBuf("w"), false, false)
.Match(TStringBuf("b"), false, true)
- .Final());
+ .Final());
}
Y_UNIT_TEST(Case) {
- UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final());
- UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
- }
+ UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
+ }
Y_UNIT_TEST(UnicodeCase) {
UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final());
@@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(TRegExp) {
Y_UNIT_TEST(Capture3) {
TCapturingFsm fsm("http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
- TFsm::TOptions().SetCapture(2));
+ TFsm::TOptions().SetCapture(2));
TSearcher searcher(fsm);
searcher.Search("http://vkontakte.ru/id100500");
@@ -124,7 +124,7 @@ Y_UNIT_TEST_SUITE(TRegExp) {
Y_UNIT_TEST(Capture4) {
TCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
- TFsm::TOptions().SetCharset(CODES_UTF8));
+ TFsm::TOptions().SetCharset(CODES_UTF8));
TSearcher searcher(fsm);
searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
@@ -315,4 +315,4 @@ Y_UNIT_TEST_SUITE(TRegExp) {
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), "");
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))");
}
-}
+}
diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make
index 0277d88f8c..8776695f40 100644
--- a/library/cpp/regex/pire/ut/ya.make
+++ b/library/cpp/regex/pire/ut/ya.make
@@ -6,10 +6,10 @@ OWNER(
g:util
davenger
)
-
+
SET(PIRETESTSDIR contrib/libs/pire/ut)
-CFLAGS(-DPIRE_NO_CONFIG)
+CFLAGS(-DPIRE_NO_CONFIG)
PEERDIR(
library/cpp/regex/pire
@@ -18,11 +18,11 @@ PEERDIR(
SRCDIR(
${PIRETESTSDIR}
)
-
-ADDINCL(
- contrib/libs/pire/pire
- contrib/libs/pire/ut
-)
+
+ADDINCL(
+ contrib/libs/pire/pire
+ contrib/libs/pire/ut
+)
SRCS(
pire_ut.cpp
diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make
index 7d14c3b043..c857e6d18b 100644
--- a/library/cpp/regex/pire/ya.make
+++ b/library/cpp/regex/pire/ya.make
@@ -6,7 +6,7 @@ OWNER(
davenger
pg
)
-
+
CFLAGS(-DPIRE_NO_CONFIG)
SRCDIR(contrib/libs/pire/pire)