aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authorAlexander Gololobov <davenger@yandex-team.com>2022-02-10 16:47:37 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:37 +0300
commit39608cdb86363c75ce55b2b9a69841c3b71f22cf (patch)
tree4ec132c1665bd4d68e3628aa18d937c70d32413b /library/cpp/regex
parent54295b9bd4dc45c54d804084fd846d945148a7f0 (diff)
downloadydb-39608cdb86363c75ce55b2b9a69841c3b71f22cf.tar.gz
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/pire/extraencodings.cpp40
-rw-r--r--library/cpp/regex/pire/inline/ya.make16
-rw-r--r--library/cpp/regex/pire/pire.h80
-rw-r--r--library/cpp/regex/pire/regexp.h20
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp50
-rw-r--r--library/cpp/regex/pire/ut/ya.make28
-rw-r--r--library/cpp/regex/pire/ya.make38
7 files changed, 136 insertions, 136 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp
index 2e507e4b67..965a0c77de 100644
--- a/library/cpp/regex/pire/extraencodings.cpp
+++ b/library/cpp/regex/pire/extraencodings.cpp
@@ -1,13 +1,13 @@
-#include <util/system/defaults.h>
-#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/system/yassert.h>
#include <library/cpp/charset/codepage.h>
-#include <util/generic/singleton.h>
-#include <util/generic/yexception.h>
+#include <util/generic/singleton.h>
+#include <util/generic/yexception.h>
#include <library/cpp/charset/doccodes.h>
-#include "pire.h"
-
-namespace NPire {
+#include "pire.h"
+
+namespace NPire {
namespace {
// A one-byte encoding which is capable of transforming upper half of the character
// table to/from Unicode chars.
@@ -18,14 +18,14 @@ namespace NPire {
for (size_t i = 0; i < 256; ++i)
Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
}
-
+
wchar32 FromLocal(const char*& begin, const char* end) const override {
if (begin != end)
return Table_[static_cast<unsigned char>(*begin++)];
else
ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
}
-
+
TString ToLocal(wchar32 c) const override {
THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
if (i != Reverse_.end())
@@ -33,16 +33,16 @@ namespace NPire {
else
return TString();
}
-
+
void AppendDot(TFsm& fsm) const override {
fsm.AppendDot();
}
-
+
private:
const wchar32* Table_;
THashMap<wchar32, char> Reverse_;
};
-
+
template <unsigned N>
struct TOneByteHelper: public TOneByte {
inline TOneByteHelper()
@@ -51,16 +51,16 @@ namespace NPire {
}
};
}
-
+
namespace NEncodings {
const NPire::TEncoding& Koi8r() {
return *Singleton<TOneByteHelper<CODES_KOI8>>();
}
-
+
const NPire::TEncoding& Cp1251() {
return *Singleton<TOneByteHelper<CODES_WIN>>();
- }
-
+ }
+
const NPire::TEncoding& Get(ECharset encoding) {
switch (encoding) {
case CODES_WIN:
@@ -75,7 +75,7 @@ namespace NPire {
ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
}
}
-
- }
-
-}
+
+ }
+
+}
diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make
index d4850f7b45..78a44d80d7 100644
--- a/library/cpp/regex/pire/inline/ya.make
+++ b/library/cpp/regex/pire/inline/ya.make
@@ -1,5 +1,5 @@
PROGRAM(pire_inline)
-
+
CFLAGS(-DPIRE_NO_CONFIG)
OWNER(
@@ -9,14 +9,14 @@ OWNER(
PEERDIR(
ADDINCL library/cpp/regex/pire
-)
-
+)
+
SRCDIR(
contrib/libs/pire/pire
)
-
-SRCS(
+
+SRCS(
inline.l
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h
index 286fecd693..e8f6f7cfd1 100644
--- a/library/cpp/regex/pire/pire.h
+++ b/library/cpp/regex/pire/pire.h
@@ -1,19 +1,19 @@
-#pragma once
-
-#ifndef PIRE_NO_CONFIG
-#define PIRE_NO_CONFIG
-#endif
-
-#include <contrib/libs/pire/pire/pire.h>
-#include <contrib/libs/pire/pire/extra.h>
-
+#pragma once
+
+#ifndef PIRE_NO_CONFIG
+#define PIRE_NO_CONFIG
+#endif
+
+#include <contrib/libs/pire/pire/pire.h>
+#include <contrib/libs/pire/pire/extra.h>
+
#include <library/cpp/charset/doccodes.h>
-namespace NPire {
+namespace NPire {
using TChar = Pire::Char;
using Pire::MaxChar;
-
- // Scanner classes
+
+ // Scanner classes
using TScanner = Pire::Scanner;
using TNonrelocScanner = Pire::NonrelocScanner;
using TScannerNoMask = Pire::ScannerNoMask;
@@ -27,11 +27,11 @@ namespace NPire {
using TCapturingScanner = Pire::CapturingScanner;
using TSlowCapturingScanner = Pire::SlowCapturingScanner;
using TCountingScanner = Pire::CountingScanner;
-
+
template <typename T1, typename T2>
using TScannerPair = Pire::ScannerPair<T1, T2>;
- // Helper classes
+ // Helper classes
using TFsm = Pire::Fsm;
using TLexer = Pire::Lexer;
using TTerm = Pire::Term;
@@ -39,38 +39,38 @@ namespace NPire {
using TFeature = Pire::Feature;
using TFeaturePtr = Pire::Feature::Ptr;
using TError = Pire::Error;
-
- // Helper functions
+
+ // Helper functions
using Pire::LongestPrefix;
using Pire::LongestSuffix;
using Pire::Matches;
- using Pire::MmappedScanner;
- using Pire::Run;
+ using Pire::MmappedScanner;
+ using Pire::Run;
using Pire::Runner;
- using Pire::ShortestPrefix;
- using Pire::ShortestSuffix;
- using Pire::Step;
-
- using namespace Pire::SpecialChar;
- using namespace Pire::Consts;
-
- namespace NFeatures {
+ using Pire::ShortestPrefix;
+ using Pire::ShortestSuffix;
+ using Pire::Step;
+
+ using namespace Pire::SpecialChar;
+ using namespace Pire::Consts;
+
+ namespace NFeatures {
using Pire::Features::AndNotSupport;
using Pire::Features::Capture;
- using Pire::Features::CaseInsensitive;
- using Pire::Features::GlueSimilarGlyphs;
- }
-
- namespace NEncodings {
- using Pire::Encodings::Latin1;
- using Pire::Encodings::Utf8;
-
+ using Pire::Features::CaseInsensitive;
+ using Pire::Features::GlueSimilarGlyphs;
+ }
+
+ namespace NEncodings {
+ using Pire::Encodings::Latin1;
+ using Pire::Encodings::Utf8;
+
const NPire::TEncoding& Koi8r();
const NPire::TEncoding& Cp1251();
const NPire::TEncoding& Get(ECharset encoding);
- }
-
- namespace NTokenTypes {
- using namespace Pire::TokenTypes;
- }
-}
+ }
+
+ namespace NTokenTypes {
+ using namespace Pire::TokenTypes;
+ }
+}
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index 94bba4064b..aeb66a8a64 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -54,13 +54,13 @@ namespace NRegExp {
lexer.Assign(regexp.data(), regexp.data() + regexp.size());
} else {
TVector<wchar32> ucs4(regexp.size() + 1);
- size_t inRead = 0;
- size_t outWritten = 0;
+ size_t inRead = 0;
+ size_t outWritten = 0;
int recodeRes = RecodeToUnicode(opts.Charset, regexp.data(), ucs4.data(),
regexp.size(), regexp.size(), inRead, outWritten);
Y_ASSERT(recodeRes == RECODE_OK);
Y_ASSERT(outWritten < ucs4.size());
- ucs4[outWritten] = 0;
+ ucs4[outWritten] = 0;
lexer.Assign(ucs4.begin(),
ucs4.begin() + std::char_traits<wchar32>::length(ucs4.data()));
@@ -207,12 +207,12 @@ namespace NRegExp {
}
protected:
- inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept {
- if (addBegin) {
+ inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept {
+ if (addBegin) {
NPire::Step(GetScanner(), State, NPire::BeginMark);
}
NPire::Run(GetScanner(), State, data, data + len);
- if (addEnd) {
+ if (addEnd) {
NPire::Step(GetScanner(), State, NPire::EndMark);
}
}
@@ -236,8 +236,8 @@ namespace NRegExp {
{
}
- inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
- Run(data, len, addBegin, addEnd);
+ inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
+ Run(data, len, addBegin, addEnd);
return *this;
}
@@ -267,9 +267,9 @@ namespace NRegExp {
return GetState().Captured();
}
- inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept {
+ inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept {
Data = TStringBuf(data, len);
- Run(data, len, addBegin, addEnd);
+ Run(data, len, addBegin, addEnd);
return *this;
}
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index e7206de9ad..7c517bc583 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -17,41 +17,41 @@ Y_UNIT_TEST_SUITE(TRegExp) {
}
Y_UNIT_TEST(Boundaries) {
- UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
- UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final());
UNIT_ASSERT(TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final());
UNIT_ASSERT(TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final());
UNIT_ASSERT(!TMatcher(TFsm("qw$", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final());
UNIT_ASSERT(!TMatcher(TFsm("^qw", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final());
-
- UNIT_ASSERT(TMatcher(TFsm("^aqwb$", TFsm::TOptions().SetSurround(true)))
+
+ UNIT_ASSERT(TMatcher(TFsm("^aqwb$", TFsm::TOptions().SetSurround(true)))
.Match(TStringBuf("a"), true, false)
.Match(TStringBuf("q"), false, false)
.Match(TStringBuf("w"), false, false)
.Match(TStringBuf("b"), false, true)
.Final());
- }
-
+ }
+
Y_UNIT_TEST(Case) {
UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final());
UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
}
-
+
Y_UNIT_TEST(UnicodeCase) {
UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final());
UNIT_ASSERT(!TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(false))).Match("Ab").Final());
}
Y_UNIT_TEST(Utf) {
- NRegExp::TFsmBase::TOptions opts;
- opts.Charset = CODES_UTF8;
- opts.Surround = true;
- UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("wtf").Final());
- UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("чзн").Final());
- UNIT_ASSERT(TMatcher(TFsm("ч.*", opts)).Match("чзн").Final());
- UNIT_ASSERT(!TMatcher(TFsm("чзн", opts)).Match("чзх").Final());
- }
-
+ NRegExp::TFsmBase::TOptions opts;
+ opts.Charset = CODES_UTF8;
+ opts.Surround = true;
+ UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("wtf").Final());
+ UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("чзн").Final());
+ UNIT_ASSERT(TMatcher(TFsm("ч.*", opts)).Match("чзн").Final());
+ UNIT_ASSERT(!TMatcher(TFsm("чзн", opts)).Match("чзх").Final());
+ }
+
Y_UNIT_TEST(AndNot) {
NRegExp::TFsmBase::TOptions opts;
opts.AndNotSupport = true;
@@ -84,15 +84,15 @@ Y_UNIT_TEST_SUITE(TRegExp) {
}
Y_UNIT_TEST(Glue) {
- TFsm glued =
- TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true)) |
- TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false)) |
- TFsm("abc", TFsm::TOptions().SetCaseInsensitive(false));
- UNIT_ASSERT(TMatcher(glued).Match("Qw").Final());
- UNIT_ASSERT(TMatcher(glued).Match("Qw").Final());
- UNIT_ASSERT(TMatcher(glued).Match("abc").Final());
- UNIT_ASSERT(!TMatcher(glued).Match("Abc").Final());
- }
+ TFsm glued =
+ TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true)) |
+ TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false)) |
+ TFsm("abc", TFsm::TOptions().SetCaseInsensitive(false));
+ UNIT_ASSERT(TMatcher(glued).Match("Qw").Final());
+ UNIT_ASSERT(TMatcher(glued).Match("Qw").Final());
+ UNIT_ASSERT(TMatcher(glued).Match("abc").Final());
+ UNIT_ASSERT(!TMatcher(glued).Match("Abc").Final());
+ }
Y_UNIT_TEST(Capture1) {
TCapturingFsm fsm("here we have user_id=([a-z0-9]+);");
diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make
index 8776695f40..d0a2301816 100644
--- a/library/cpp/regex/pire/ut/ya.make
+++ b/library/cpp/regex/pire/ut/ya.make
@@ -1,20 +1,20 @@
# this test in not linked into build tree with ReCURSE and is built by unittest/library
UNITTEST()
-
+
OWNER(
g:util
davenger
)
SET(PIRETESTSDIR contrib/libs/pire/ut)
-
+
CFLAGS(-DPIRE_NO_CONFIG)
-
+
PEERDIR(
library/cpp/regex/pire
)
-
+
SRCDIR(
${PIRETESTSDIR}
)
@@ -23,22 +23,22 @@ ADDINCL(
contrib/libs/pire/pire
contrib/libs/pire/ut
)
-
-SRCS(
- pire_ut.cpp
- capture_ut.cpp
- count_ut.cpp
+
+SRCS(
+ pire_ut.cpp
+ capture_ut.cpp
+ count_ut.cpp
glyph_ut.cpp
- easy_ut.cpp
+ easy_ut.cpp
read_unicode_ut.cpp
- regexp_ut.cpp
+ regexp_ut.cpp
approx_matching_ut.cpp
-)
-
+)
+
SIZE(MEDIUM)
TIMEOUT(600)
PIRE_INLINE(inline_ut.cpp)
-END()
+END()
diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make
index c857e6d18b..0f788b35b5 100644
--- a/library/cpp/regex/pire/ya.make
+++ b/library/cpp/regex/pire/ya.make
@@ -1,5 +1,5 @@
-LIBRARY()
-
+LIBRARY()
+
OWNER(
g:util
g:antiinfra
@@ -8,33 +8,33 @@ OWNER(
)
CFLAGS(-DPIRE_NO_CONFIG)
-
+
SRCDIR(contrib/libs/pire/pire)
-
-SRCS(
+
+SRCS(
pcre2pire.cpp
- classes.cpp
- encoding.cpp
- fsm.cpp
- scanner_io.cpp
- easy.cpp
- scanners/null.cpp
- extra/capture.cpp
- extra/count.cpp
- extra/glyphs.cpp
- re_lexer.cpp
+ classes.cpp
+ encoding.cpp
+ fsm.cpp
+ scanner_io.cpp
+ easy.cpp
+ scanners/null.cpp
+ extra/capture.cpp
+ extra/count.cpp
+ extra/glyphs.cpp
+ re_lexer.cpp
re_parser.y
read_unicode.cpp
- extraencodings.cpp
+ extraencodings.cpp
approx_matching.cpp
half_final_fsm.cpp
minimize.h
-)
-
+)
+
PEERDIR(
library/cpp/charset
)
-END()
+END()
RECURSE_FOR_TESTS(ut)