aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/regex
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.cpp4
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.h4
-rw-r--r--library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp18
-rw-r--r--library/cpp/regex/pcre/regexp.cpp122
-rw-r--r--library/cpp/regex/pcre/regexp.h28
-rw-r--r--library/cpp/regex/pcre/regexp_ut.cpp30
-rw-r--r--library/cpp/regex/pcre/ya.make2
-rw-r--r--library/cpp/regex/pire/extraencodings.cpp116
-rw-r--r--library/cpp/regex/pire/inline/ya.make4
-rw-r--r--library/cpp/regex/pire/pcre2pire.cpp2
-rw-r--r--library/cpp/regex/pire/pcre2pire.h4
-rw-r--r--library/cpp/regex/pire/pire.h14
-rw-r--r--library/cpp/regex/pire/regexp.h110
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp36
-rw-r--r--library/cpp/regex/pire/ut/ya.make14
-rw-r--r--library/cpp/regex/pire/ya.make2
-rw-r--r--library/cpp/regex/ya.make16
17 files changed, 263 insertions, 263 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp
index ba025c72b1..ba321f9c29 100644
--- a/library/cpp/regex/hyperscan/hyperscan.cpp
+++ b/library/cpp/regex/hyperscan/hyperscan.cpp
@@ -255,7 +255,7 @@ namespace NHyperscan {
hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase(
db.Get(),
&databaseBytes,
- &databaseLength);
+ &databaseLength);
TSerializedDatabase serialization(databaseBytes);
if (status != HS_SUCCESS) {
ythrow yexception() << "Failed to serialize hyperscan database";
@@ -268,7 +268,7 @@ namespace NHyperscan {
hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase(
serialization.begin(),
serialization.size(),
- &rawDb);
+ &rawDb);
TDatabase db(rawDb);
if (status != HS_SUCCESS) {
if (status == HS_DB_PLATFORM_ERROR) {
diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h
index 608bc87300..1c8f404389 100644
--- a/library/cpp/regex/hyperscan/hyperscan.h
+++ b/library/cpp/regex/hyperscan/hyperscan.h
@@ -144,7 +144,7 @@ namespace NHyperscan {
const TDatabase& db,
const TScratch& scratch,
const TStringBuf& text,
- TCallback& callback // applied to index of matched regex
+ TCallback& callback // applied to index of matched regex
) {
NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
}
@@ -152,7 +152,7 @@ namespace NHyperscan {
bool Matches(
const TDatabase& db,
const TScratch& scratch,
- const TStringBuf& text);
+ const TStringBuf& text);
TString Serialize(const TDatabase& db);
diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
index 9410c8d6ba..9caa53f2e7 100644
--- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
+++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
@@ -23,14 +23,14 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
db,
scratch,
"abc",
- callback);
+ callback);
UNIT_ASSERT_EQUAL(foundId, 0);
}
Y_UNIT_TEST(Matches) {
NHyperscan::TDatabase db = NHyperscan::Compile(
"a.c",
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc"));
UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo"));
@@ -49,7 +49,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
{
42,
241,
- });
+ });
NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo"));
@@ -65,7 +65,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
db,
scratch,
"fooBaR",
- callback);
+ callback);
UNIT_ASSERT_EQUAL(foundIds.size(), 2);
UNIT_ASSERT(foundIds.contains(42));
UNIT_ASSERT(foundIds.contains(241));
@@ -82,13 +82,13 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
},
{
0,
- });
+ });
}
Y_UNIT_TEST(Serialize) {
NHyperscan::TDatabase db = NHyperscan::Compile(
"foo",
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
TString serialization = Serialize(db);
db.Reset();
TDatabase db2 = Deserialize(serialization);
@@ -101,10 +101,10 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
Y_UNIT_TEST(GrowScratch) {
NHyperscan::TDatabase db1 = NHyperscan::Compile(
"foo",
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
NHyperscan::TDatabase db2 = NHyperscan::Compile(
"longer\\w\\w\\wpattern",
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1);
NHyperscan::GrowScratch(scratch, db2);
UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo"));
@@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
Y_UNIT_TEST(CloneScratch) {
NHyperscan::TDatabase db = NHyperscan::Compile(
"foo",
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db);
NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1);
scratch1.Reset();
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp
index e7108ae5e9..575c09cee4 100644
--- a/library/cpp/regex/pcre/regexp.cpp
+++ b/library/cpp/regex/pcre/regexp.cpp
@@ -1,21 +1,21 @@
-#include "regexp.h"
-
+#include "regexp.h"
+
#include <util/generic/string.h>
#include <util/string/ascii.h>
#include <util/system/defaults.h>
-
+
#include <cstdlib>
#include <util/generic/noncopyable.h>
-
+
class TGlobalImpl : TNonCopyable {
private:
- const char* Str;
- regmatch_t* Pmatch;
+ const char* Str;
+ regmatch_t* Pmatch;
int Options;
int StrLen;
int StartOffset, NotEmptyOpts, MatchPos;
int MatchBuf[NMATCHES * 3];
- pcre* PregComp;
+ pcre* PregComp;
enum StateCode {
TGI_EXIT,
@@ -26,25 +26,25 @@ private:
private:
void CopyResults(int count) {
for (int i = 0; i < count; i++) {
- Pmatch[MatchPos].rm_so = MatchBuf[2 * i];
- Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1];
+ Pmatch[MatchPos].rm_so = MatchBuf[2 * i];
+ Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1];
MatchPos++;
if (MatchPos >= NMATCHES) {
ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
}
- }
+ }
}
int DoPcreExec(int opts) {
int rc = pcre_exec(
- PregComp, /* the compiled pattern */
- nullptr, /* no extra data - we didn't study the pattern */
- Str, /* the subject string */
- StrLen, /* the length of the subject */
- StartOffset, /* start at offset 0 in the subject */
- opts, /* default options */
- MatchBuf, /* output vector for substring information */
- NMATCHES); /* number of elements in the output vector */
+ PregComp, /* the compiled pattern */
+ nullptr, /* no extra data - we didn't study the pattern */
+ Str, /* the subject string */
+ StrLen, /* the length of the subject */
+ StartOffset, /* start at offset 0 in the subject */
+ opts, /* default options */
+ MatchBuf, /* output vector for substring information */
+ NMATCHES); /* number of elements in the output vector */
if (rc == 0) {
ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
@@ -55,7 +55,7 @@ private:
StateCode CheckEmptyCase() {
if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string
- if (MatchBuf[0] == StrLen) { // at the end
+ if (MatchBuf[0] == StrLen) { // at the end
return TGI_EXIT;
}
NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string
@@ -65,25 +65,25 @@ private:
StateCode CheckNoMatch(int rc) {
if (rc == PCRE_ERROR_NOMATCH) {
- if (NotEmptyOpts == 0) {
+ if (NotEmptyOpts == 0) {
return TGI_EXIT;
}
-
- MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset"
+
+ MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset"
return TGI_CONTINUE;
}
return TGI_WALKTHROUGH;
}
public:
- TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re)
- : Str(st)
- , Pmatch(&pma)
- , Options(opts)
- , StartOffset(0)
- , NotEmptyOpts(0)
- , MatchPos(0)
- , PregComp(pc_re)
+ TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re)
+ : Str(st)
+ , Pmatch(&pma)
+ , Options(opts)
+ , StartOffset(0)
+ , NotEmptyOpts(0)
+ , MatchPos(0)
+ , PregComp(pc_re)
{
memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES);
StrLen = strlen(Str);
@@ -114,29 +114,29 @@ public:
return 0;
case TGI_WALKTHROUGH:
default:
- break;
- }
+ break;
+ }
if (rc < 0) {
return rc;
}
CopyResults(rc);
- } while (true);
+ } while (true);
- return 0;
+ return 0;
}
-
+
private:
};
-class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> {
+class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> {
friend class TRegExBase;
protected:
- int CompileOptions;
+ int CompileOptions;
TString RegExpr;
- regex_t Preg;
+ regex_t Preg;
public:
TRegExBaseImpl()
@@ -159,7 +159,7 @@ public:
}
}
- int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const {
+ int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const {
if (!RegExpr) {
ythrow yexception() << "Regular expression is not compiled";
}
@@ -170,12 +170,12 @@ public:
return regexec(&Preg, str, nmatches, pmatch, eflags);
} else {
int options = 0;
- if ((eflags & REG_NOTBOL) != 0)
- options |= PCRE_NOTBOL;
- if ((eflags & REG_NOTEOL) != 0)
- options |= PCRE_NOTEOL;
+ if ((eflags & REG_NOTBOL) != 0)
+ options |= PCRE_NOTBOL;
+ if ((eflags & REG_NOTEOL) != 0)
+ options |= PCRE_NOTEOL;
- return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal();
+ return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal();
}
}
@@ -195,12 +195,12 @@ bool TRegExBase::IsCompiled() const {
return Impl && Impl->IsCompiled();
}
-TRegExBase::TRegExBase(const char* re, int cflags) {
+TRegExBase::TRegExBase(const char* re, int cflags) {
if (re) {
Compile(re, cflags);
}
}
-
+
TRegExBase::TRegExBase(const TString& re, int cflags) {
Compile(re, cflags);
}
@@ -211,8 +211,8 @@ TRegExBase::~TRegExBase() {
void TRegExBase::Compile(const TString& re, int cflags) {
Impl = new TRegExBaseImpl(re, cflags);
}
-
-int TRegExBase::Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const {
+
+int TRegExBase::Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const {
if (!Impl)
ythrow yexception() << "!Regular expression is not compiled";
return Impl->Exec(str, pmatch, eflags, nmatches);
@@ -230,22 +230,22 @@ TString TRegExBase::GetRegExpr() const {
return Impl->RegExpr;
}
-TRegExMatch::TRegExMatch(const char* re, int cflags)
- : TRegExBase(re, cflags)
-{
-}
+TRegExMatch::TRegExMatch(const char* re, int cflags)
+ : TRegExBase(re, cflags)
+{
+}
TRegExMatch::TRegExMatch(const TString& re, int cflags)
: TRegExBase(re, cflags)
{
}
-bool TRegExMatch::Match(const char* str) const {
+bool TRegExMatch::Match(const char* str) const {
return Exec(str, nullptr, 0, 0) == 0;
}
-TRegExSubst::TRegExSubst(const char* re, int cflags)
- : TRegExBase(re, cflags)
+TRegExSubst::TRegExSubst(const char* re, int cflags)
+ : TRegExBase(re, cflags)
, Replacement(nullptr)
{
memset(Brfs, 0, sizeof(TBackReferences) * NMATCHES);
@@ -256,7 +256,7 @@ TString TRegExSubst::Replace(const char* str, int eflags) {
if (BrfsCount) {
if (Exec(str, PMatch, eflags) == 0) {
int i;
- for (i = 0; i < BrfsCount; i++) {
+ for (i = 0; i < BrfsCount; i++) {
s += TString(Replacement, Brfs[i].Beg, Brfs[i].End - Brfs[i].Beg);
if (Brfs[i].Refer >= 0 && Brfs[i].Refer < NMATCHES)
s += TString(str, PMatch[Brfs[i].Refer].rm_so, int(PMatch[Brfs[i].Refer].rm_eo - PMatch[Brfs[i].Refer].rm_so));
@@ -280,15 +280,15 @@ TString TRegExSubst::Replace(const char* str, int eflags) {
// {beg = 22, end = 25, Refer = -1} => "ccc"
// {beg = 0, end = 0, Refer = 0}
//***
-int TRegExSubst::ParseReplacement(const char* repl) {
+int TRegExSubst::ParseReplacement(const char* repl) {
Replacement = repl;
if (!Replacement || *Replacement == 0)
return 0;
- char* pos = (char*)Replacement;
+ char* pos = (char*)Replacement;
char* pos1 = nullptr;
char* pos2 = nullptr;
int i = 0;
- while (pos && *pos && i < NMATCHES) {
+ while (pos && *pos && i < NMATCHES) {
pos1 = strchr(pos, '$');
Brfs[i].Refer = -1;
pos2 = pos1;
@@ -296,11 +296,11 @@ int TRegExSubst::ParseReplacement(const char* repl) {
pos2 = pos1 + 1;
while (IsAsciiDigit(*pos2))
pos2++;
- if (pos2 > pos1 + 1) {
+ if (pos2 > pos1 + 1) {
Brfs[i].Refer = atol(TString(Replacement, pos1 + 1 - Replacement, pos2 - (pos1 + 1)).data());
} else {
pos1++;
- if (*pos2 == '$')
+ if (*pos2 == '$')
pos2++;
Brfs[i].Refer = -1;
}
diff --git a/library/cpp/regex/pcre/regexp.h b/library/cpp/regex/pcre/regexp.h
index c74d20b3ad..bc610bd2f3 100644
--- a/library/cpp/regex/pcre/regexp.h
+++ b/library/cpp/regex/pcre/regexp.h
@@ -1,16 +1,16 @@
#pragma once
#include <sys/types.h>
-
+
#include <util/system/defaults.h>
#include <util/generic/string.h>
#include <util/generic/yexception.h>
-
-#include <contrib/libs/pcre/pcre.h>
-#include <contrib/libs/pcre/pcreposix.h>
-
+
+#include <contrib/libs/pcre/pcre.h>
+#include <contrib/libs/pcre/pcreposix.h>
+
//THIS CODE LOOKS LIKE A TRASH, BUT WORKS.
-
+
#define NMATCHES 100
#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences
@@ -19,38 +19,38 @@ class TRegExBaseImpl;
class TRegExBase {
protected:
TSimpleIntrusivePtr<TRegExBaseImpl> Impl;
-
+
public:
TRegExBase(const char* regExpr = nullptr, int cflags = REG_EXTENDED);
TRegExBase(const TString& regExpr, int cflags = REG_EXTENDED);
virtual ~TRegExBase();
- int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const;
+ int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const;
void Compile(const TString& regExpr, int cflags = REG_EXTENDED);
bool IsCompiled() const;
int GetCompileOptions() const;
TString GetRegExpr() const;
};
-class TRegExMatch: public TRegExBase {
+class TRegExMatch: public TRegExBase {
public:
TRegExMatch(const char* regExpr = nullptr, int cflags = REG_NOSUB | REG_EXTENDED);
TRegExMatch(const TString& regExpr, int cflags = REG_NOSUB | REG_EXTENDED);
- bool Match(const char* str) const;
+ bool Match(const char* str) const;
};
-struct TBackReferences {
+struct TBackReferences {
int Beg;
int End;
int Refer;
};
-class TRegExSubst: public TRegExBase {
+class TRegExSubst: public TRegExBase {
private:
const char* Replacement;
- regmatch_t PMatch[NMATCHES];
+ regmatch_t PMatch[NMATCHES];
TBackReferences Brfs[NMATCHES];
int BrfsCount;
@@ -59,5 +59,5 @@ public:
TRegExSubst(const char* regExpr = nullptr, int cflags = REG_EXTENDED);
TString Replace(const char* str, int eflags = 0);
- int ParseReplacement(const char* replacement);
+ int ParseReplacement(const char* replacement);
};
diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp
index 6ace430a16..5184e801cc 100644
--- a/library/cpp/regex/pcre/regexp_ut.cpp
+++ b/library/cpp/regex/pcre/regexp_ut.cpp
@@ -11,17 +11,17 @@ struct TRegTest {
int CompileOptions;
int RunOptions;
- TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0)
- : Regexp(re)
- , Data(text)
- , Result(res)
- , CompileOptions(copts)
- , RunOptions(ropts)
- {
- }
+ TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0)
+ : Regexp(re)
+ , Data(text)
+ , Result(res)
+ , CompileOptions(copts)
+ , RunOptions(ropts)
+ {
+ }
};
-struct TSubstTest: public TRegTest {
+struct TSubstTest: public TRegTest {
const char* Replacement;
const char* Replacement2;
@@ -29,15 +29,15 @@ struct TSubstTest: public TRegTest {
: TRegTest(re, text, res, REG_EXTENDED, REGEXP_GLOBAL)
, Replacement(repl)
, Replacement2(repl2)
- {
- }
+ {
+ }
};
-const TRegTest REGTEST_DATA[] = {
- TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0),
+const TRegTest REGTEST_DATA[] = {
+ TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0),
TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL),
TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL),
- TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)};
+ TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)};
const TSubstTest SUBSTTEST_DATA[] = {
TSubstTest("([a-zA-Z]*[0-9]+) (_[a-z]+)", "Xxx123 534 ___124 bsd _A ZXC _L 141 _sd dsfg QWE123 _bbb", "141 XXX/_sd", "$1 XXX/$2", "$2$2$2 YY$1Y/$2")};
@@ -48,7 +48,7 @@ private:
private:
UNIT_TEST_SUITE(TRegexpTest);
- UNIT_TEST(TestRe)
+ UNIT_TEST(TestRe)
UNIT_TEST(TestSubst)
UNIT_TEST(TestOffEndOfBuffer);
UNIT_TEST_SUITE_END();
diff --git a/library/cpp/regex/pcre/ya.make b/library/cpp/regex/pcre/ya.make
index 4971c6f35a..d34911f103 100644
--- a/library/cpp/regex/pcre/ya.make
+++ b/library/cpp/regex/pcre/ya.make
@@ -1,4 +1,4 @@
-LIBRARY()
+LIBRARY()
OWNER(g:util)
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp
index 8645d6cd4f..2e507e4b67 100644
--- a/library/cpp/regex/pire/extraencodings.cpp
+++ b/library/cpp/regex/pire/extraencodings.cpp
@@ -8,73 +8,73 @@
#include "pire.h"
namespace NPire {
- namespace {
- // A one-byte encoding which is capable of transforming upper half of the character
- // table to/from Unicode chars.
- class TOneByte: public TEncoding {
- public:
- TOneByte(ECharset doccode) {
- Table_ = CodePageByCharset(doccode)->unicode;
- for (size_t i = 0; i < 256; ++i)
- Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
- }
+ namespace {
+ // A one-byte encoding which is capable of transforming upper half of the character
+ // table to/from Unicode chars.
+ class TOneByte: public TEncoding {
+ public:
+ TOneByte(ECharset doccode) {
+ Table_ = CodePageByCharset(doccode)->unicode;
+ for (size_t i = 0; i < 256; ++i)
+ Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
+ }
- wchar32 FromLocal(const char*& begin, const char* end) const override {
- if (begin != end)
- return Table_[static_cast<unsigned char>(*begin++)];
- else
- ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
- }
+ wchar32 FromLocal(const char*& begin, const char* end) const override {
+ if (begin != end)
+ return Table_[static_cast<unsigned char>(*begin++)];
+ else
+ ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
+ }
- TString ToLocal(wchar32 c) const override {
- THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
- if (i != Reverse_.end())
- return TString(1, i->second);
- else
- return TString();
- }
+ TString ToLocal(wchar32 c) const override {
+ THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
+ if (i != Reverse_.end())
+ return TString(1, i->second);
+ else
+ return TString();
+ }
- void AppendDot(TFsm& fsm) const override {
- fsm.AppendDot();
- }
+ void AppendDot(TFsm& fsm) const override {
+ fsm.AppendDot();
+ }
- private:
- const wchar32* Table_;
- THashMap<wchar32, char> Reverse_;
- };
+ private:
+ const wchar32* Table_;
+ THashMap<wchar32, char> Reverse_;
+ };
- template <unsigned N>
- struct TOneByteHelper: public TOneByte {
- inline TOneByteHelper()
- : TOneByte((ECharset)N)
- {
- }
- };
- }
+ template <unsigned N>
+ struct TOneByteHelper: public TOneByte {
+ inline TOneByteHelper()
+ : TOneByte((ECharset)N)
+ {
+ }
+ };
+ }
- namespace NEncodings {
- const NPire::TEncoding& Koi8r() {
- return *Singleton<TOneByteHelper<CODES_KOI8>>();
- }
+ namespace NEncodings {
+ const NPire::TEncoding& Koi8r() {
+ return *Singleton<TOneByteHelper<CODES_KOI8>>();
+ }
- const NPire::TEncoding& Cp1251() {
- return *Singleton<TOneByteHelper<CODES_WIN>>();
+ const NPire::TEncoding& Cp1251() {
+ return *Singleton<TOneByteHelper<CODES_WIN>>();
}
- const NPire::TEncoding& Get(ECharset encoding) {
- switch (encoding) {
- case CODES_WIN:
- return Cp1251();
- case CODES_KOI8:
- return Koi8r();
- case CODES_ASCII:
- return NPire::NEncodings::Latin1();
- case CODES_UTF8:
- return NPire::NEncodings::Utf8();
- default:
- ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
- }
- }
+ const NPire::TEncoding& Get(ECharset encoding) {
+ switch (encoding) {
+ case CODES_WIN:
+ return Cp1251();
+ case CODES_KOI8:
+ return Koi8r();
+ case CODES_ASCII:
+ return NPire::NEncodings::Latin1();
+ case CODES_UTF8:
+ return NPire::NEncodings::Utf8();
+ default:
+ ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
+ }
+ }
}
diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make
index 5a83468746..d4850f7b45 100644
--- a/library/cpp/regex/pire/inline/ya.make
+++ b/library/cpp/regex/pire/inline/ya.make
@@ -6,8 +6,8 @@ OWNER(
g:util
davenger
)
-
-PEERDIR(
+
+PEERDIR(
ADDINCL library/cpp/regex/pire
)
diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp
index 498a8abc25..f788beb85f 100644
--- a/library/cpp/regex/pire/pcre2pire.cpp
+++ b/library/cpp/regex/pire/pcre2pire.cpp
@@ -2,7 +2,7 @@
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
-TString Pcre2Pire(const TString& src) {
+TString Pcre2Pire(const TString& src) {
TVector<char> result;
result.reserve(src.size() + 1);
diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h
index b4d3b34205..46e45b9193 100644
--- a/library/cpp/regex/pire/pcre2pire.h
+++ b/library/cpp/regex/pire/pcre2pire.h
@@ -1,5 +1,5 @@
-#pragma once
-
+#pragma once
+
// Author: smikler@yandex-team.ru
#include <util/generic/string.h>
diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h
index 148301f39d..286fecd693 100644
--- a/library/cpp/regex/pire/pire.h
+++ b/library/cpp/regex/pire/pire.h
@@ -41,9 +41,9 @@ namespace NPire {
using TError = Pire::Error;
// Helper functions
- using Pire::LongestPrefix;
- using Pire::LongestSuffix;
- using Pire::Matches;
+ using Pire::LongestPrefix;
+ using Pire::LongestSuffix;
+ using Pire::Matches;
using Pire::MmappedScanner;
using Pire::Run;
using Pire::Runner;
@@ -55,8 +55,8 @@ namespace NPire {
using namespace Pire::Consts;
namespace NFeatures {
- using Pire::Features::AndNotSupport;
- using Pire::Features::Capture;
+ using Pire::Features::AndNotSupport;
+ using Pire::Features::Capture;
using Pire::Features::CaseInsensitive;
using Pire::Features::GlueSimilarGlyphs;
}
@@ -65,8 +65,8 @@ namespace NPire {
using Pire::Encodings::Latin1;
using Pire::Encodings::Utf8;
- const NPire::TEncoding& Koi8r();
- const NPire::TEncoding& Cp1251();
+ const NPire::TEncoding& Koi8r();
+ const NPire::TEncoding& Cp1251();
const NPire::TEncoding& Get(ECharset encoding);
}
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index d5424e359a..94bba4064b 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -1,7 +1,7 @@
#pragma once
-
-#include "pire.h"
-
+
+#include "pire.h"
+
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/recyr.hh>
#include <util/generic/maybe.h>
@@ -10,26 +10,26 @@
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
-namespace NRegExp {
+namespace NRegExp {
struct TMatcher;
-
+
struct TFsmBase {
struct TOptions {
inline TOptions& SetCaseInsensitive(bool v) noexcept {
CaseInsensitive = v;
return *this;
}
-
+
inline TOptions& SetSurround(bool v) noexcept {
Surround = v;
return *this;
}
-
+
inline TOptions& SetCapture(size_t pos) noexcept {
CapturePos = pos;
return *this;
- }
-
+ }
+
inline TOptions& SetCharset(ECharset charset) noexcept {
Charset = charset;
return *this;
@@ -68,64 +68,64 @@ namespace NRegExp {
if (opts.CaseInsensitive) {
lexer.AddFeature(NPire::NFeatures::CaseInsensitive());
- }
-
+ }
+
if (opts.CapturePos) {
lexer.AddFeature(NPire::NFeatures::Capture(*opts.CapturePos));
- }
-
+ }
+
if (opts.AndNotSupport) {
lexer.AddFeature(NPire::NFeatures::AndNotSupport());
}
switch (opts.Charset) {
- case CODES_UNKNOWN:
- break;
- case CODES_UTF8:
- lexer.SetEncoding(NPire::NEncodings::Utf8());
- break;
- case CODES_KOI8:
- lexer.SetEncoding(NPire::NEncodings::Koi8r());
- break;
- default:
- lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset));
- break;
+ case CODES_UNKNOWN:
+ break;
+ case CODES_UTF8:
+ lexer.SetEncoding(NPire::NEncodings::Utf8());
+ break;
+ case CODES_KOI8:
+ lexer.SetEncoding(NPire::NEncodings::Koi8r());
+ break;
+ default:
+ lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset));
+ break;
}
NPire::TFsm ret = lexer.Parse();
if (opts.Surround) {
ret.Surround();
- }
-
+ }
+
if (needDetermine) {
ret.Determine();
}
-
+
return ret;
}
};
-
+
template <class TScannerType>
class TFsmParser: public TFsmBase {
public:
typedef TScannerType TScanner;
-
+
public:
inline explicit TFsmParser(const TStringBuf& regexp,
const TOptions& opts = TOptions(), bool needDetermine = true)
: Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
{
}
-
+
inline const TScanner& GetScanner() const noexcept {
return Scanner;
}
-
+
static inline TFsmParser False() {
return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>());
}
-
+
inline explicit TFsmParser(const TScanner& compiled)
: Scanner(compiled)
{
@@ -135,12 +135,12 @@ namespace NRegExp {
private:
TScanner Scanner;
- };
-
+ };
+
class TFsm: public TFsmParser<NPire::TNonrelocScanner> {
public:
inline explicit TFsm(const TStringBuf& regexp,
- const TOptions& opts = TOptions())
+ const TOptions& opts = TOptions())
: TFsmParser<TScanner>(regexp, opts)
{
}
@@ -150,7 +150,7 @@ namespace NRegExp {
{
}
- static inline TFsm Glue(const TFsm& l, const TFsm& r) {
+ static inline TFsm Glue(const TFsm& l, const TFsm& r) {
return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner()));
}
@@ -160,23 +160,23 @@ namespace NRegExp {
}
};
- static inline TFsm operator|(const TFsm& l, const TFsm& r) {
- return TFsm::Glue(l, r);
- }
-
- struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> {
+ static inline TFsm operator|(const TFsm& l, const TFsm& r) {
+ return TFsm::Glue(l, r);
+ }
+
+ struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> {
inline explicit TCapturingFsm(const TStringBuf& regexp,
- TOptions opts = TOptions())
+ TOptions opts = TOptions())
: TFsmParser<TScanner>(regexp,
- opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) {
+ opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) {
}
-
+
inline TCapturingFsm(const TFsmParser<TScanner>& fsm)
: TFsmParser<TScanner>(fsm)
{
}
};
-
+
struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
inline explicit TSlowCapturingFsm(const TStringBuf& regexp,
TOptions opts = TOptions())
@@ -194,43 +194,43 @@ namespace NRegExp {
class TMatcherBase {
public:
typedef typename TFsm::TScanner::State TState;
-
+
public:
inline explicit TMatcherBase(const TFsm& fsm)
: Fsm(fsm)
{
Fsm.GetScanner().Initialize(State);
}
-
+
inline bool Final() const noexcept {
return GetScanner().Final(GetState());
}
-
+
protected:
inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept {
if (addBegin) {
NPire::Step(GetScanner(), State, NPire::BeginMark);
- }
+ }
NPire::Run(GetScanner(), State, data, data + len);
if (addEnd) {
NPire::Step(GetScanner(), State, NPire::EndMark);
}
}
-
+
inline const typename TFsm::TScanner& GetScanner() const noexcept {
return Fsm.GetScanner();
}
-
+
inline const TState& GetState() const noexcept {
return State;
}
-
+
private:
const TFsm& Fsm;
TState State;
- };
+ };
- struct TMatcher : TMatcherBase<TFsm> {
+ struct TMatcher : TMatcherBase<TFsm> {
inline explicit TMatcher(const TFsm& fsm)
: TMatcherBase<TFsm>(fsm)
{
@@ -334,4 +334,4 @@ namespace NRegExp {
return *this;
}
};
-}
+}
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index 294bc65fa7..e7206de9ad 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -1,21 +1,21 @@
#include <library/cpp/testing/unittest/registar.h>
-
+
#include <library/cpp/regex/pire/regexp.h>
#include <library/cpp/regex/pire/pcre2pire.h>
-
+
Y_UNIT_TEST_SUITE(TRegExp) {
- using namespace NRegExp;
-
+ using namespace NRegExp;
+
Y_UNIT_TEST(False) {
- UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final());
+ UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final());
UNIT_ASSERT(!TMatcher(TFsm::False()).Match(TStringBuf{}).Final());
- }
-
+ }
+
Y_UNIT_TEST(Surround) {
- UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
- UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final());
- }
-
+ UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final());
+ }
+
Y_UNIT_TEST(Boundaries) {
UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
@@ -29,13 +29,13 @@ Y_UNIT_TEST_SUITE(TRegExp) {
.Match(TStringBuf("q"), false, false)
.Match(TStringBuf("w"), false, false)
.Match(TStringBuf("b"), false, true)
- .Final());
+ .Final());
}
Y_UNIT_TEST(Case) {
- UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final());
- UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
- }
+ UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
+ }
Y_UNIT_TEST(UnicodeCase) {
UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final());
@@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(TRegExp) {
Y_UNIT_TEST(Capture3) {
TCapturingFsm fsm("http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
- TFsm::TOptions().SetCapture(2));
+ TFsm::TOptions().SetCapture(2));
TSearcher searcher(fsm);
searcher.Search("http://vkontakte.ru/id100500");
@@ -124,7 +124,7 @@ Y_UNIT_TEST_SUITE(TRegExp) {
Y_UNIT_TEST(Capture4) {
TCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
- TFsm::TOptions().SetCharset(CODES_UTF8));
+ TFsm::TOptions().SetCharset(CODES_UTF8));
TSearcher searcher(fsm);
searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
@@ -315,4 +315,4 @@ Y_UNIT_TEST_SUITE(TRegExp) {
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), "");
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))");
}
-}
+}
diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make
index 0277d88f8c..8776695f40 100644
--- a/library/cpp/regex/pire/ut/ya.make
+++ b/library/cpp/regex/pire/ut/ya.make
@@ -6,10 +6,10 @@ OWNER(
g:util
davenger
)
-
+
SET(PIRETESTSDIR contrib/libs/pire/ut)
-CFLAGS(-DPIRE_NO_CONFIG)
+CFLAGS(-DPIRE_NO_CONFIG)
PEERDIR(
library/cpp/regex/pire
@@ -18,11 +18,11 @@ PEERDIR(
SRCDIR(
${PIRETESTSDIR}
)
-
-ADDINCL(
- contrib/libs/pire/pire
- contrib/libs/pire/ut
-)
+
+ADDINCL(
+ contrib/libs/pire/pire
+ contrib/libs/pire/ut
+)
SRCS(
pire_ut.cpp
diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make
index 7d14c3b043..c857e6d18b 100644
--- a/library/cpp/regex/pire/ya.make
+++ b/library/cpp/regex/pire/ya.make
@@ -6,7 +6,7 @@ OWNER(
davenger
pg
)
-
+
CFLAGS(-DPIRE_NO_CONFIG)
SRCDIR(contrib/libs/pire/pire)
diff --git a/library/cpp/regex/ya.make b/library/cpp/regex/ya.make
index 71fc9a6a43..15b0d1aeda 100644
--- a/library/cpp/regex/ya.make
+++ b/library/cpp/regex/ya.make
@@ -1,14 +1,14 @@
-RECURSE(
- glob
+RECURSE(
+ glob
hyperscan
hyperscan/ut
- libregex
- pcre
- pire
- pire/inline
- pire/ut
+ libregex
+ pcre
+ pire
+ pire/inline
+ pire/ut
pire2hyperscan
pire2hyperscan/ut
regexp_classifier
regexp_classifier/ut
-)
+)