diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/regex | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 4 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 4 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 18 | ||||
-rw-r--r-- | library/cpp/regex/pcre/regexp.cpp | 122 | ||||
-rw-r--r-- | library/cpp/regex/pcre/regexp.h | 28 | ||||
-rw-r--r-- | library/cpp/regex/pcre/regexp_ut.cpp | 30 | ||||
-rw-r--r-- | library/cpp/regex/pcre/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/regex/pire/extraencodings.cpp | 116 | ||||
-rw-r--r-- | library/cpp/regex/pire/inline/ya.make | 4 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.cpp | 2 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.h | 4 | ||||
-rw-r--r-- | library/cpp/regex/pire/pire.h | 14 | ||||
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 110 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 36 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/ya.make | 14 | ||||
-rw-r--r-- | library/cpp/regex/pire/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/regex/ya.make | 16 |
17 files changed, 263 insertions, 263 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index ba321f9c29..ba025c72b1 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -255,7 +255,7 @@ namespace NHyperscan { hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase( db.Get(), &databaseBytes, - &databaseLength); + &databaseLength); TSerializedDatabase serialization(databaseBytes); if (status != HS_SUCCESS) { ythrow yexception() << "Failed to serialize hyperscan database"; @@ -268,7 +268,7 @@ namespace NHyperscan { hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase( serialization.begin(), serialization.size(), - &rawDb); + &rawDb); TDatabase db(rawDb); if (status != HS_SUCCESS) { if (status == HS_DB_PLATFORM_ERROR) { diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 1c8f404389..608bc87300 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -144,7 +144,7 @@ namespace NHyperscan { const TDatabase& db, const TScratch& scratch, const TStringBuf& text, - TCallback& callback // applied to index of matched regex + TCallback& callback // applied to index of matched regex ) { NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); } @@ -152,7 +152,7 @@ namespace NHyperscan { bool Matches( const TDatabase& db, const TScratch& scratch, - const TStringBuf& text); + const TStringBuf& text); TString Serialize(const TDatabase& db); diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..9410c8d6ba 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -23,14 +23,14 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { db, scratch, "abc", - callback); + callback); UNIT_ASSERT_EQUAL(foundId, 0); } Y_UNIT_TEST(Matches) { NHyperscan::TDatabase db = NHyperscan::Compile( "a.c", - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc")); UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo")); @@ -49,7 +49,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { { 42, 241, - }); + }); NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo")); @@ -65,7 +65,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { db, scratch, "fooBaR", - callback); + callback); UNIT_ASSERT_EQUAL(foundIds.size(), 2); UNIT_ASSERT(foundIds.contains(42)); UNIT_ASSERT(foundIds.contains(241)); @@ -82,13 +82,13 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { }, { 0, - }); + }); } Y_UNIT_TEST(Serialize) { NHyperscan::TDatabase db = NHyperscan::Compile( "foo", - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); TString serialization = Serialize(db); db.Reset(); TDatabase db2 = Deserialize(serialization); @@ -101,10 +101,10 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { Y_UNIT_TEST(GrowScratch) { NHyperscan::TDatabase db1 = NHyperscan::Compile( "foo", - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); NHyperscan::TDatabase db2 = NHyperscan::Compile( "longer\\w\\w\\wpattern", - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1); NHyperscan::GrowScratch(scratch, db2); UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo")); @@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { Y_UNIT_TEST(CloneScratch) { NHyperscan::TDatabase db = NHyperscan::Compile( "foo", - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db); NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1); scratch1.Reset(); diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp index 575c09cee4..e7108ae5e9 100644 --- a/library/cpp/regex/pcre/regexp.cpp +++ b/library/cpp/regex/pcre/regexp.cpp @@ -1,21 +1,21 @@ -#include "regexp.h" - +#include "regexp.h" + #include <util/generic/string.h> #include <util/string/ascii.h> #include <util/system/defaults.h> - + #include <cstdlib> #include <util/generic/noncopyable.h> - + class TGlobalImpl : TNonCopyable { private: - const char* Str; - regmatch_t* Pmatch; + const char* Str; + regmatch_t* Pmatch; int Options; int StrLen; int StartOffset, NotEmptyOpts, MatchPos; int MatchBuf[NMATCHES * 3]; - pcre* PregComp; + pcre* PregComp; enum StateCode { TGI_EXIT, @@ -26,25 +26,25 @@ private: private: void CopyResults(int count) { for (int i = 0; i < count; i++) { - Pmatch[MatchPos].rm_so = MatchBuf[2 * i]; - Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1]; + Pmatch[MatchPos].rm_so = MatchBuf[2 * i]; + Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1]; MatchPos++; if (MatchPos >= NMATCHES) { ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; } - } + } } int DoPcreExec(int opts) { int rc = pcre_exec( - PregComp, /* the compiled pattern */ - nullptr, /* no extra data - we didn't study the pattern */ - Str, /* the subject string */ - StrLen, /* the length of the subject */ - StartOffset, /* start at offset 0 in the subject */ - opts, /* default options */ - MatchBuf, /* output vector for substring information */ - NMATCHES); /* number of elements in the output vector */ + PregComp, /* the compiled pattern */ + nullptr, /* no extra data - we didn't study the pattern */ + Str, /* the subject string */ + StrLen, /* the length of the subject */ + StartOffset, /* start at offset 0 in the subject */ + opts, /* default options */ + MatchBuf, /* output vector for substring information */ + NMATCHES); /* number of elements in the output vector */ if (rc == 0) { ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; @@ -55,7 +55,7 @@ private: StateCode CheckEmptyCase() { if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string - if (MatchBuf[0] == StrLen) { // at the end + if (MatchBuf[0] == StrLen) { // at the end return TGI_EXIT; } NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string @@ -65,25 +65,25 @@ private: StateCode CheckNoMatch(int rc) { if (rc == PCRE_ERROR_NOMATCH) { - if (NotEmptyOpts == 0) { + if (NotEmptyOpts == 0) { return TGI_EXIT; } - - MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset" + + MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset" return TGI_CONTINUE; } return TGI_WALKTHROUGH; } public: - TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re) - : Str(st) - , Pmatch(&pma) - , Options(opts) - , StartOffset(0) - , NotEmptyOpts(0) - , MatchPos(0) - , PregComp(pc_re) + TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re) + : Str(st) + , Pmatch(&pma) + , Options(opts) + , StartOffset(0) + , NotEmptyOpts(0) + , MatchPos(0) + , PregComp(pc_re) { memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES); StrLen = strlen(Str); @@ -114,29 +114,29 @@ public: return 0; case TGI_WALKTHROUGH: default: - break; - } + break; + } if (rc < 0) { return rc; } CopyResults(rc); - } while (true); + } while (true); - return 0; + return 0; } - + private: }; -class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> { +class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> { friend class TRegExBase; protected: - int CompileOptions; + int CompileOptions; TString RegExpr; - regex_t Preg; + regex_t Preg; public: TRegExBaseImpl() @@ -159,7 +159,7 @@ public: } } - int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const { + int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const { if (!RegExpr) { ythrow yexception() << "Regular expression is not compiled"; } @@ -170,12 +170,12 @@ public: return regexec(&Preg, str, nmatches, pmatch, eflags); } else { int options = 0; - if ((eflags & REG_NOTBOL) != 0) - options |= PCRE_NOTBOL; - if ((eflags & REG_NOTEOL) != 0) - options |= PCRE_NOTEOL; + if ((eflags & REG_NOTBOL) != 0) + options |= PCRE_NOTBOL; + if ((eflags & REG_NOTEOL) != 0) + options |= PCRE_NOTEOL; - return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal(); + return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal(); } } @@ -195,12 +195,12 @@ bool TRegExBase::IsCompiled() const { return Impl && Impl->IsCompiled(); } -TRegExBase::TRegExBase(const char* re, int cflags) { +TRegExBase::TRegExBase(const char* re, int cflags) { if (re) { Compile(re, cflags); } } - + TRegExBase::TRegExBase(const TString& re, int cflags) { Compile(re, cflags); } @@ -211,8 +211,8 @@ TRegExBase::~TRegExBase() { void TRegExBase::Compile(const TString& re, int cflags) { Impl = new TRegExBaseImpl(re, cflags); } - -int TRegExBase::Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const { + +int TRegExBase::Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches) const { if (!Impl) ythrow yexception() << "!Regular expression is not compiled"; return Impl->Exec(str, pmatch, eflags, nmatches); @@ -230,22 +230,22 @@ TString TRegExBase::GetRegExpr() const { return Impl->RegExpr; } -TRegExMatch::TRegExMatch(const char* re, int cflags) - : TRegExBase(re, cflags) -{ -} +TRegExMatch::TRegExMatch(const char* re, int cflags) + : TRegExBase(re, cflags) +{ +} TRegExMatch::TRegExMatch(const TString& re, int cflags) : TRegExBase(re, cflags) { } -bool TRegExMatch::Match(const char* str) const { +bool TRegExMatch::Match(const char* str) const { return Exec(str, nullptr, 0, 0) == 0; } -TRegExSubst::TRegExSubst(const char* re, int cflags) - : TRegExBase(re, cflags) +TRegExSubst::TRegExSubst(const char* re, int cflags) + : TRegExBase(re, cflags) , Replacement(nullptr) { memset(Brfs, 0, sizeof(TBackReferences) * NMATCHES); @@ -256,7 +256,7 @@ TString TRegExSubst::Replace(const char* str, int eflags) { if (BrfsCount) { if (Exec(str, PMatch, eflags) == 0) { int i; - for (i = 0; i < BrfsCount; i++) { + for (i = 0; i < BrfsCount; i++) { s += TString(Replacement, Brfs[i].Beg, Brfs[i].End - Brfs[i].Beg); if (Brfs[i].Refer >= 0 && Brfs[i].Refer < NMATCHES) s += TString(str, PMatch[Brfs[i].Refer].rm_so, int(PMatch[Brfs[i].Refer].rm_eo - PMatch[Brfs[i].Refer].rm_so)); @@ -280,15 +280,15 @@ TString TRegExSubst::Replace(const char* str, int eflags) { // {beg = 22, end = 25, Refer = -1} => "ccc" // {beg = 0, end = 0, Refer = 0} //*** -int TRegExSubst::ParseReplacement(const char* repl) { +int TRegExSubst::ParseReplacement(const char* repl) { Replacement = repl; if (!Replacement || *Replacement == 0) return 0; - char* pos = (char*)Replacement; + char* pos = (char*)Replacement; char* pos1 = nullptr; char* pos2 = nullptr; int i = 0; - while (pos && *pos && i < NMATCHES) { + while (pos && *pos && i < NMATCHES) { pos1 = strchr(pos, '$'); Brfs[i].Refer = -1; pos2 = pos1; @@ -296,11 +296,11 @@ int TRegExSubst::ParseReplacement(const char* repl) { pos2 = pos1 + 1; while (IsAsciiDigit(*pos2)) pos2++; - if (pos2 > pos1 + 1) { + if (pos2 > pos1 + 1) { Brfs[i].Refer = atol(TString(Replacement, pos1 + 1 - Replacement, pos2 - (pos1 + 1)).data()); } else { pos1++; - if (*pos2 == '$') + if (*pos2 == '$') pos2++; Brfs[i].Refer = -1; } diff --git a/library/cpp/regex/pcre/regexp.h b/library/cpp/regex/pcre/regexp.h index bc610bd2f3..c74d20b3ad 100644 --- a/library/cpp/regex/pcre/regexp.h +++ b/library/cpp/regex/pcre/regexp.h @@ -1,16 +1,16 @@ #pragma once #include <sys/types.h> - + #include <util/system/defaults.h> #include <util/generic/string.h> #include <util/generic/yexception.h> - -#include <contrib/libs/pcre/pcre.h> -#include <contrib/libs/pcre/pcreposix.h> - + +#include <contrib/libs/pcre/pcre.h> +#include <contrib/libs/pcre/pcreposix.h> + //THIS CODE LOOKS LIKE A TRASH, BUT WORKS. - + #define NMATCHES 100 #define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences @@ -19,38 +19,38 @@ class TRegExBaseImpl; class TRegExBase { protected: TSimpleIntrusivePtr<TRegExBaseImpl> Impl; - + public: TRegExBase(const char* regExpr = nullptr, int cflags = REG_EXTENDED); TRegExBase(const TString& regExpr, int cflags = REG_EXTENDED); virtual ~TRegExBase(); - int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const; + int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const; void Compile(const TString& regExpr, int cflags = REG_EXTENDED); bool IsCompiled() const; int GetCompileOptions() const; TString GetRegExpr() const; }; -class TRegExMatch: public TRegExBase { +class TRegExMatch: public TRegExBase { public: TRegExMatch(const char* regExpr = nullptr, int cflags = REG_NOSUB | REG_EXTENDED); TRegExMatch(const TString& regExpr, int cflags = REG_NOSUB | REG_EXTENDED); - bool Match(const char* str) const; + bool Match(const char* str) const; }; -struct TBackReferences { +struct TBackReferences { int Beg; int End; int Refer; }; -class TRegExSubst: public TRegExBase { +class TRegExSubst: public TRegExBase { private: const char* Replacement; - regmatch_t PMatch[NMATCHES]; + regmatch_t PMatch[NMATCHES]; TBackReferences Brfs[NMATCHES]; int BrfsCount; @@ -59,5 +59,5 @@ public: TRegExSubst(const char* regExpr = nullptr, int cflags = REG_EXTENDED); TString Replace(const char* str, int eflags = 0); - int ParseReplacement(const char* replacement); + int ParseReplacement(const char* replacement); }; diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp index 5184e801cc..6ace430a16 100644 --- a/library/cpp/regex/pcre/regexp_ut.cpp +++ b/library/cpp/regex/pcre/regexp_ut.cpp @@ -11,17 +11,17 @@ struct TRegTest { int CompileOptions; int RunOptions; - TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0) - : Regexp(re) - , Data(text) - , Result(res) - , CompileOptions(copts) - , RunOptions(ropts) - { - } + TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0) + : Regexp(re) + , Data(text) + , Result(res) + , CompileOptions(copts) + , RunOptions(ropts) + { + } }; -struct TSubstTest: public TRegTest { +struct TSubstTest: public TRegTest { const char* Replacement; const char* Replacement2; @@ -29,15 +29,15 @@ struct TSubstTest: public TRegTest { : TRegTest(re, text, res, REG_EXTENDED, REGEXP_GLOBAL) , Replacement(repl) , Replacement2(repl2) - { - } + { + } }; -const TRegTest REGTEST_DATA[] = { - TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0), +const TRegTest REGTEST_DATA[] = { + TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0), TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL), TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL), - TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)}; + TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)}; const TSubstTest SUBSTTEST_DATA[] = { TSubstTest("([a-zA-Z]*[0-9]+) (_[a-z]+)", "Xxx123 534 ___124 bsd _A ZXC _L 141 _sd dsfg QWE123 _bbb", "141 XXX/_sd", "$1 XXX/$2", "$2$2$2 YY$1Y/$2")}; @@ -48,7 +48,7 @@ private: private: UNIT_TEST_SUITE(TRegexpTest); - UNIT_TEST(TestRe) + UNIT_TEST(TestRe) UNIT_TEST(TestSubst) UNIT_TEST(TestOffEndOfBuffer); UNIT_TEST_SUITE_END(); diff --git a/library/cpp/regex/pcre/ya.make b/library/cpp/regex/pcre/ya.make index d34911f103..4971c6f35a 100644 --- a/library/cpp/regex/pcre/ya.make +++ b/library/cpp/regex/pcre/ya.make @@ -1,4 +1,4 @@ -LIBRARY() +LIBRARY() OWNER(g:util) diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp index 2e507e4b67..8645d6cd4f 100644 --- a/library/cpp/regex/pire/extraencodings.cpp +++ b/library/cpp/regex/pire/extraencodings.cpp @@ -8,73 +8,73 @@ #include "pire.h" namespace NPire { - namespace { - // A one-byte encoding which is capable of transforming upper half of the character - // table to/from Unicode chars. - class TOneByte: public TEncoding { - public: - TOneByte(ECharset doccode) { - Table_ = CodePageByCharset(doccode)->unicode; - for (size_t i = 0; i < 256; ++i) - Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); - } + namespace { + // A one-byte encoding which is capable of transforming upper half of the character + // table to/from Unicode chars. + class TOneByte: public TEncoding { + public: + TOneByte(ECharset doccode) { + Table_ = CodePageByCharset(doccode)->unicode; + for (size_t i = 0; i < 256; ++i) + Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); + } - wchar32 FromLocal(const char*& begin, const char* end) const override { - if (begin != end) - return Table_[static_cast<unsigned char>(*begin++)]; - else - ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; - } + wchar32 FromLocal(const char*& begin, const char* end) const override { + if (begin != end) + return Table_[static_cast<unsigned char>(*begin++)]; + else + ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; + } - TString ToLocal(wchar32 c) const override { - THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); - if (i != Reverse_.end()) - return TString(1, i->second); - else - return TString(); - } + TString ToLocal(wchar32 c) const override { + THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); + if (i != Reverse_.end()) + return TString(1, i->second); + else + return TString(); + } - void AppendDot(TFsm& fsm) const override { - fsm.AppendDot(); - } + void AppendDot(TFsm& fsm) const override { + fsm.AppendDot(); + } - private: - const wchar32* Table_; - THashMap<wchar32, char> Reverse_; - }; + private: + const wchar32* Table_; + THashMap<wchar32, char> Reverse_; + }; - template <unsigned N> - struct TOneByteHelper: public TOneByte { - inline TOneByteHelper() - : TOneByte((ECharset)N) - { - } - }; - } + template <unsigned N> + struct TOneByteHelper: public TOneByte { + inline TOneByteHelper() + : TOneByte((ECharset)N) + { + } + }; + } - namespace NEncodings { - const NPire::TEncoding& Koi8r() { - return *Singleton<TOneByteHelper<CODES_KOI8>>(); - } + namespace NEncodings { + const NPire::TEncoding& Koi8r() { + return *Singleton<TOneByteHelper<CODES_KOI8>>(); + } - const NPire::TEncoding& Cp1251() { - return *Singleton<TOneByteHelper<CODES_WIN>>(); + const NPire::TEncoding& Cp1251() { + return *Singleton<TOneByteHelper<CODES_WIN>>(); } - const NPire::TEncoding& Get(ECharset encoding) { - switch (encoding) { - case CODES_WIN: - return Cp1251(); - case CODES_KOI8: - return Koi8r(); - case CODES_ASCII: - return NPire::NEncodings::Latin1(); - case CODES_UTF8: - return NPire::NEncodings::Utf8(); - default: - ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; - } - } + const NPire::TEncoding& Get(ECharset encoding) { + switch (encoding) { + case CODES_WIN: + return Cp1251(); + case CODES_KOI8: + return Koi8r(); + case CODES_ASCII: + return NPire::NEncodings::Latin1(); + case CODES_UTF8: + return NPire::NEncodings::Utf8(); + default: + ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; + } + } } diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make index d4850f7b45..5a83468746 100644 --- a/library/cpp/regex/pire/inline/ya.make +++ b/library/cpp/regex/pire/inline/ya.make @@ -6,8 +6,8 @@ OWNER( g:util davenger ) - -PEERDIR( + +PEERDIR( ADDINCL library/cpp/regex/pire ) diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp index f788beb85f..498a8abc25 100644 --- a/library/cpp/regex/pire/pcre2pire.cpp +++ b/library/cpp/regex/pire/pcre2pire.cpp @@ -2,7 +2,7 @@ #include <util/generic/vector.h> #include <util/generic/yexception.h> -TString Pcre2Pire(const TString& src) { +TString Pcre2Pire(const TString& src) { TVector<char> result; result.reserve(src.size() + 1); diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h index 46e45b9193..b4d3b34205 100644 --- a/library/cpp/regex/pire/pcre2pire.h +++ b/library/cpp/regex/pire/pcre2pire.h @@ -1,5 +1,5 @@ -#pragma once - +#pragma once + // Author: smikler@yandex-team.ru #include <util/generic/string.h> diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h index 286fecd693..148301f39d 100644 --- a/library/cpp/regex/pire/pire.h +++ b/library/cpp/regex/pire/pire.h @@ -41,9 +41,9 @@ namespace NPire { using TError = Pire::Error; // Helper functions - using Pire::LongestPrefix; - using Pire::LongestSuffix; - using Pire::Matches; + using Pire::LongestPrefix; + using Pire::LongestSuffix; + using Pire::Matches; using Pire::MmappedScanner; using Pire::Run; using Pire::Runner; @@ -55,8 +55,8 @@ namespace NPire { using namespace Pire::Consts; namespace NFeatures { - using Pire::Features::AndNotSupport; - using Pire::Features::Capture; + using Pire::Features::AndNotSupport; + using Pire::Features::Capture; using Pire::Features::CaseInsensitive; using Pire::Features::GlueSimilarGlyphs; } @@ -65,8 +65,8 @@ namespace NPire { using Pire::Encodings::Latin1; using Pire::Encodings::Utf8; - const NPire::TEncoding& Koi8r(); - const NPire::TEncoding& Cp1251(); + const NPire::TEncoding& Koi8r(); + const NPire::TEncoding& Cp1251(); const NPire::TEncoding& Get(ECharset encoding); } diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index 94bba4064b..d5424e359a 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -1,7 +1,7 @@ #pragma once - -#include "pire.h" - + +#include "pire.h" + #include <library/cpp/charset/doccodes.h> #include <library/cpp/charset/recyr.hh> #include <util/generic/maybe.h> @@ -10,26 +10,26 @@ #include <util/generic/vector.h> #include <util/generic/yexception.h> -namespace NRegExp { +namespace NRegExp { struct TMatcher; - + struct TFsmBase { struct TOptions { inline TOptions& SetCaseInsensitive(bool v) noexcept { CaseInsensitive = v; return *this; } - + inline TOptions& SetSurround(bool v) noexcept { Surround = v; return *this; } - + inline TOptions& SetCapture(size_t pos) noexcept { CapturePos = pos; return *this; - } - + } + inline TOptions& SetCharset(ECharset charset) noexcept { Charset = charset; return *this; @@ -68,64 +68,64 @@ namespace NRegExp { if (opts.CaseInsensitive) { lexer.AddFeature(NPire::NFeatures::CaseInsensitive()); - } - + } + if (opts.CapturePos) { lexer.AddFeature(NPire::NFeatures::Capture(*opts.CapturePos)); - } - + } + if (opts.AndNotSupport) { lexer.AddFeature(NPire::NFeatures::AndNotSupport()); } switch (opts.Charset) { - case CODES_UNKNOWN: - break; - case CODES_UTF8: - lexer.SetEncoding(NPire::NEncodings::Utf8()); - break; - case CODES_KOI8: - lexer.SetEncoding(NPire::NEncodings::Koi8r()); - break; - default: - lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset)); - break; + case CODES_UNKNOWN: + break; + case CODES_UTF8: + lexer.SetEncoding(NPire::NEncodings::Utf8()); + break; + case CODES_KOI8: + lexer.SetEncoding(NPire::NEncodings::Koi8r()); + break; + default: + lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset)); + break; } NPire::TFsm ret = lexer.Parse(); if (opts.Surround) { ret.Surround(); - } - + } + if (needDetermine) { ret.Determine(); } - + return ret; } }; - + template <class TScannerType> class TFsmParser: public TFsmBase { public: typedef TScannerType TScanner; - + public: inline explicit TFsmParser(const TStringBuf& regexp, const TOptions& opts = TOptions(), bool needDetermine = true) : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) { } - + inline const TScanner& GetScanner() const noexcept { return Scanner; } - + static inline TFsmParser False() { return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>()); } - + inline explicit TFsmParser(const TScanner& compiled) : Scanner(compiled) { @@ -135,12 +135,12 @@ namespace NRegExp { private: TScanner Scanner; - }; - + }; + class TFsm: public TFsmParser<NPire::TNonrelocScanner> { public: inline explicit TFsm(const TStringBuf& regexp, - const TOptions& opts = TOptions()) + const TOptions& opts = TOptions()) : TFsmParser<TScanner>(regexp, opts) { } @@ -150,7 +150,7 @@ namespace NRegExp { { } - static inline TFsm Glue(const TFsm& l, const TFsm& r) { + static inline TFsm Glue(const TFsm& l, const TFsm& r) { return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner())); } @@ -160,23 +160,23 @@ namespace NRegExp { } }; - static inline TFsm operator|(const TFsm& l, const TFsm& r) { - return TFsm::Glue(l, r); - } - - struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> { + static inline TFsm operator|(const TFsm& l, const TFsm& r) { + return TFsm::Glue(l, r); + } + + struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> { inline explicit TCapturingFsm(const TStringBuf& regexp, - TOptions opts = TOptions()) + TOptions opts = TOptions()) : TFsmParser<TScanner>(regexp, - opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) { + opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) { } - + inline TCapturingFsm(const TFsmParser<TScanner>& fsm) : TFsmParser<TScanner>(fsm) { } }; - + struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { inline explicit TSlowCapturingFsm(const TStringBuf& regexp, TOptions opts = TOptions()) @@ -194,43 +194,43 @@ namespace NRegExp { class TMatcherBase { public: typedef typename TFsm::TScanner::State TState; - + public: inline explicit TMatcherBase(const TFsm& fsm) : Fsm(fsm) { Fsm.GetScanner().Initialize(State); } - + inline bool Final() const noexcept { return GetScanner().Final(GetState()); } - + protected: inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept { if (addBegin) { NPire::Step(GetScanner(), State, NPire::BeginMark); - } + } NPire::Run(GetScanner(), State, data, data + len); if (addEnd) { NPire::Step(GetScanner(), State, NPire::EndMark); } } - + inline const typename TFsm::TScanner& GetScanner() const noexcept { return Fsm.GetScanner(); } - + inline const TState& GetState() const noexcept { return State; } - + private: const TFsm& Fsm; TState State; - }; + }; - struct TMatcher : TMatcherBase<TFsm> { + struct TMatcher : TMatcherBase<TFsm> { inline explicit TMatcher(const TFsm& fsm) : TMatcherBase<TFsm>(fsm) { @@ -334,4 +334,4 @@ namespace NRegExp { return *this; } }; -} +} diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index e7206de9ad..294bc65fa7 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -1,21 +1,21 @@ #include <library/cpp/testing/unittest/registar.h> - + #include <library/cpp/regex/pire/regexp.h> #include <library/cpp/regex/pire/pcre2pire.h> - + Y_UNIT_TEST_SUITE(TRegExp) { - using namespace NRegExp; - + using namespace NRegExp; + Y_UNIT_TEST(False) { - UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final()); + UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final()); UNIT_ASSERT(!TMatcher(TFsm::False()).Match(TStringBuf{}).Final()); - } - + } + Y_UNIT_TEST(Surround) { - UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); - UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final()); - } - + UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); + UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final()); + } + Y_UNIT_TEST(Boundaries) { UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); @@ -29,13 +29,13 @@ Y_UNIT_TEST_SUITE(TRegExp) { .Match(TStringBuf("q"), false, false) .Match(TStringBuf("w"), false, false) .Match(TStringBuf("b"), false, true) - .Final()); + .Final()); } Y_UNIT_TEST(Case) { - UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final()); - UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final()); - } + UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final()); + UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final()); + } Y_UNIT_TEST(UnicodeCase) { UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final()); @@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { Y_UNIT_TEST(Capture3) { TCapturingFsm fsm("http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", - TFsm::TOptions().SetCapture(2)); + TFsm::TOptions().SetCapture(2)); TSearcher searcher(fsm); searcher.Search("http://vkontakte.ru/id100500"); @@ -124,7 +124,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { Y_UNIT_TEST(Capture4) { TCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", - TFsm::TOptions().SetCharset(CODES_UTF8)); + TFsm::TOptions().SetCharset(CODES_UTF8)); TSearcher searcher(fsm); searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); @@ -315,4 +315,4 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); } -} +} diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make index 8776695f40..0277d88f8c 100644 --- a/library/cpp/regex/pire/ut/ya.make +++ b/library/cpp/regex/pire/ut/ya.make @@ -6,10 +6,10 @@ OWNER( g:util davenger ) - + SET(PIRETESTSDIR contrib/libs/pire/ut) -CFLAGS(-DPIRE_NO_CONFIG) +CFLAGS(-DPIRE_NO_CONFIG) PEERDIR( library/cpp/regex/pire @@ -18,11 +18,11 @@ PEERDIR( SRCDIR( ${PIRETESTSDIR} ) - -ADDINCL( - contrib/libs/pire/pire - contrib/libs/pire/ut -) + +ADDINCL( + contrib/libs/pire/pire + contrib/libs/pire/ut +) SRCS( pire_ut.cpp diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make index c857e6d18b..7d14c3b043 100644 --- a/library/cpp/regex/pire/ya.make +++ b/library/cpp/regex/pire/ya.make @@ -6,7 +6,7 @@ OWNER( davenger pg ) - + CFLAGS(-DPIRE_NO_CONFIG) SRCDIR(contrib/libs/pire/pire) diff --git a/library/cpp/regex/ya.make b/library/cpp/regex/ya.make index 15b0d1aeda..71fc9a6a43 100644 --- a/library/cpp/regex/ya.make +++ b/library/cpp/regex/ya.make @@ -1,14 +1,14 @@ -RECURSE( - glob +RECURSE( + glob hyperscan hyperscan/ut - libregex - pcre - pire - pire/inline - pire/ut + libregex + pcre + pire + pire/inline + pire/ut pire2hyperscan pire2hyperscan/ut regexp_classifier regexp_classifier/ut -) +) |