diff options
author | Alexander Gololobov <davenger@yandex-team.com> | 2022-02-10 16:47:37 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:37 +0300 |
commit | 39608cdb86363c75ce55b2b9a69841c3b71f22cf (patch) | |
tree | 4ec132c1665bd4d68e3628aa18d937c70d32413b /library/cpp/regex | |
parent | 54295b9bd4dc45c54d804084fd846d945148a7f0 (diff) | |
download | ydb-39608cdb86363c75ce55b2b9a69841c3b71f22cf.tar.gz |
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/pire/extraencodings.cpp | 40 | ||||
-rw-r--r-- | library/cpp/regex/pire/inline/ya.make | 16 | ||||
-rw-r--r-- | library/cpp/regex/pire/pire.h | 80 | ||||
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 20 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 50 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/ya.make | 28 | ||||
-rw-r--r-- | library/cpp/regex/pire/ya.make | 38 |
7 files changed, 136 insertions, 136 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp index 2e507e4b67..965a0c77de 100644 --- a/library/cpp/regex/pire/extraencodings.cpp +++ b/library/cpp/regex/pire/extraencodings.cpp @@ -1,13 +1,13 @@ -#include <util/system/defaults.h> -#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/system/yassert.h> #include <library/cpp/charset/codepage.h> -#include <util/generic/singleton.h> -#include <util/generic/yexception.h> +#include <util/generic/singleton.h> +#include <util/generic/yexception.h> #include <library/cpp/charset/doccodes.h> -#include "pire.h" - -namespace NPire { +#include "pire.h" + +namespace NPire { namespace { // A one-byte encoding which is capable of transforming upper half of the character // table to/from Unicode chars. @@ -18,14 +18,14 @@ namespace NPire { for (size_t i = 0; i < 256; ++i) Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); } - + wchar32 FromLocal(const char*& begin, const char* end) const override { if (begin != end) return Table_[static_cast<unsigned char>(*begin++)]; else ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; } - + TString ToLocal(wchar32 c) const override { THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); if (i != Reverse_.end()) @@ -33,16 +33,16 @@ namespace NPire { else return TString(); } - + void AppendDot(TFsm& fsm) const override { fsm.AppendDot(); } - + private: const wchar32* Table_; THashMap<wchar32, char> Reverse_; }; - + template <unsigned N> struct TOneByteHelper: public TOneByte { inline TOneByteHelper() @@ -51,16 +51,16 @@ namespace NPire { } }; } - + namespace NEncodings { const NPire::TEncoding& Koi8r() { return *Singleton<TOneByteHelper<CODES_KOI8>>(); } - + const NPire::TEncoding& Cp1251() { return *Singleton<TOneByteHelper<CODES_WIN>>(); - } - + } + const NPire::TEncoding& Get(ECharset encoding) { switch (encoding) { case CODES_WIN: @@ -75,7 +75,7 @@ namespace NPire { ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; } } - - } - -} + + } + +} diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make index d4850f7b45..78a44d80d7 100644 --- a/library/cpp/regex/pire/inline/ya.make +++ b/library/cpp/regex/pire/inline/ya.make @@ -1,5 +1,5 @@ PROGRAM(pire_inline) - + CFLAGS(-DPIRE_NO_CONFIG) OWNER( @@ -9,14 +9,14 @@ OWNER( PEERDIR( ADDINCL library/cpp/regex/pire -) - +) + SRCDIR( contrib/libs/pire/pire ) - -SRCS( + +SRCS( inline.l -) - -END() +) + +END() diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h index 286fecd693..e8f6f7cfd1 100644 --- a/library/cpp/regex/pire/pire.h +++ b/library/cpp/regex/pire/pire.h @@ -1,19 +1,19 @@ -#pragma once - -#ifndef PIRE_NO_CONFIG -#define PIRE_NO_CONFIG -#endif - -#include <contrib/libs/pire/pire/pire.h> -#include <contrib/libs/pire/pire/extra.h> - +#pragma once + +#ifndef PIRE_NO_CONFIG +#define PIRE_NO_CONFIG +#endif + +#include <contrib/libs/pire/pire/pire.h> +#include <contrib/libs/pire/pire/extra.h> + #include <library/cpp/charset/doccodes.h> -namespace NPire { +namespace NPire { using TChar = Pire::Char; using Pire::MaxChar; - - // Scanner classes + + // Scanner classes using TScanner = Pire::Scanner; using TNonrelocScanner = Pire::NonrelocScanner; using TScannerNoMask = Pire::ScannerNoMask; @@ -27,11 +27,11 @@ namespace NPire { using TCapturingScanner = Pire::CapturingScanner; using TSlowCapturingScanner = Pire::SlowCapturingScanner; using TCountingScanner = Pire::CountingScanner; - + template <typename T1, typename T2> using TScannerPair = Pire::ScannerPair<T1, T2>; - // Helper classes + // Helper classes using TFsm = Pire::Fsm; using TLexer = Pire::Lexer; using TTerm = Pire::Term; @@ -39,38 +39,38 @@ namespace NPire { using TFeature = Pire::Feature; using TFeaturePtr = Pire::Feature::Ptr; using TError = Pire::Error; - - // Helper functions + + // Helper functions using Pire::LongestPrefix; using Pire::LongestSuffix; using Pire::Matches; - using Pire::MmappedScanner; - using Pire::Run; + using Pire::MmappedScanner; + using Pire::Run; using Pire::Runner; - using Pire::ShortestPrefix; - using Pire::ShortestSuffix; - using Pire::Step; - - using namespace Pire::SpecialChar; - using namespace Pire::Consts; - - namespace NFeatures { + using Pire::ShortestPrefix; + using Pire::ShortestSuffix; + using Pire::Step; + + using namespace Pire::SpecialChar; + using namespace Pire::Consts; + + namespace NFeatures { using Pire::Features::AndNotSupport; using Pire::Features::Capture; - using Pire::Features::CaseInsensitive; - using Pire::Features::GlueSimilarGlyphs; - } - - namespace NEncodings { - using Pire::Encodings::Latin1; - using Pire::Encodings::Utf8; - + using Pire::Features::CaseInsensitive; + using Pire::Features::GlueSimilarGlyphs; + } + + namespace NEncodings { + using Pire::Encodings::Latin1; + using Pire::Encodings::Utf8; + const NPire::TEncoding& Koi8r(); const NPire::TEncoding& Cp1251(); const NPire::TEncoding& Get(ECharset encoding); - } - - namespace NTokenTypes { - using namespace Pire::TokenTypes; - } -} + } + + namespace NTokenTypes { + using namespace Pire::TokenTypes; + } +} diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index 94bba4064b..aeb66a8a64 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -54,13 +54,13 @@ namespace NRegExp { lexer.Assign(regexp.data(), regexp.data() + regexp.size()); } else { TVector<wchar32> ucs4(regexp.size() + 1); - size_t inRead = 0; - size_t outWritten = 0; + size_t inRead = 0; + size_t outWritten = 0; int recodeRes = RecodeToUnicode(opts.Charset, regexp.data(), ucs4.data(), regexp.size(), regexp.size(), inRead, outWritten); Y_ASSERT(recodeRes == RECODE_OK); Y_ASSERT(outWritten < ucs4.size()); - ucs4[outWritten] = 0; + ucs4[outWritten] = 0; lexer.Assign(ucs4.begin(), ucs4.begin() + std::char_traits<wchar32>::length(ucs4.data())); @@ -207,12 +207,12 @@ namespace NRegExp { } protected: - inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept { - if (addBegin) { + inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept { + if (addBegin) { NPire::Step(GetScanner(), State, NPire::BeginMark); } NPire::Run(GetScanner(), State, data, data + len); - if (addEnd) { + if (addEnd) { NPire::Step(GetScanner(), State, NPire::EndMark); } } @@ -236,8 +236,8 @@ namespace NRegExp { { } - inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { - Run(data, len, addBegin, addEnd); + inline TMatcher& Match(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { + Run(data, len, addBegin, addEnd); return *this; } @@ -267,9 +267,9 @@ namespace NRegExp { return GetState().Captured(); } - inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept { + inline TSearcher& Search(const char* data, size_t len, bool addBegin = true, bool addEnd = true) noexcept { Data = TStringBuf(data, len); - Run(data, len, addBegin, addEnd); + Run(data, len, addBegin, addEnd); return *this; } diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index e7206de9ad..7c517bc583 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -17,41 +17,41 @@ Y_UNIT_TEST_SUITE(TRegExp) { } Y_UNIT_TEST(Boundaries) { - UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); - UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); + UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); + UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); UNIT_ASSERT(TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final()); UNIT_ASSERT(TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final()); UNIT_ASSERT(!TMatcher(TFsm("qw$", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final()); UNIT_ASSERT(!TMatcher(TFsm("^qw", TFsm::TOptions().SetSurround(true))).Match(TStringBuf("aqwb"), true, true).Final()); - - UNIT_ASSERT(TMatcher(TFsm("^aqwb$", TFsm::TOptions().SetSurround(true))) + + UNIT_ASSERT(TMatcher(TFsm("^aqwb$", TFsm::TOptions().SetSurround(true))) .Match(TStringBuf("a"), true, false) .Match(TStringBuf("q"), false, false) .Match(TStringBuf("w"), false, false) .Match(TStringBuf("b"), false, true) .Final()); - } - + } + Y_UNIT_TEST(Case) { UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final()); UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final()); } - + Y_UNIT_TEST(UnicodeCase) { UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final()); UNIT_ASSERT(!TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(false))).Match("Ab").Final()); } Y_UNIT_TEST(Utf) { - NRegExp::TFsmBase::TOptions opts; - opts.Charset = CODES_UTF8; - opts.Surround = true; - UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("wtf").Final()); - UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("чзн").Final()); - UNIT_ASSERT(TMatcher(TFsm("ч.*", opts)).Match("чзн").Final()); - UNIT_ASSERT(!TMatcher(TFsm("чзн", opts)).Match("чзх").Final()); - } - + NRegExp::TFsmBase::TOptions opts; + opts.Charset = CODES_UTF8; + opts.Surround = true; + UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("wtf").Final()); + UNIT_ASSERT(TMatcher(TFsm(".*", opts)).Match("чзн").Final()); + UNIT_ASSERT(TMatcher(TFsm("ч.*", opts)).Match("чзн").Final()); + UNIT_ASSERT(!TMatcher(TFsm("чзн", opts)).Match("чзх").Final()); + } + Y_UNIT_TEST(AndNot) { NRegExp::TFsmBase::TOptions opts; opts.AndNotSupport = true; @@ -84,15 +84,15 @@ Y_UNIT_TEST_SUITE(TRegExp) { } Y_UNIT_TEST(Glue) { - TFsm glued = - TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true)) | - TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false)) | - TFsm("abc", TFsm::TOptions().SetCaseInsensitive(false)); - UNIT_ASSERT(TMatcher(glued).Match("Qw").Final()); - UNIT_ASSERT(TMatcher(glued).Match("Qw").Final()); - UNIT_ASSERT(TMatcher(glued).Match("abc").Final()); - UNIT_ASSERT(!TMatcher(glued).Match("Abc").Final()); - } + TFsm glued = + TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true)) | + TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false)) | + TFsm("abc", TFsm::TOptions().SetCaseInsensitive(false)); + UNIT_ASSERT(TMatcher(glued).Match("Qw").Final()); + UNIT_ASSERT(TMatcher(glued).Match("Qw").Final()); + UNIT_ASSERT(TMatcher(glued).Match("abc").Final()); + UNIT_ASSERT(!TMatcher(glued).Match("Abc").Final()); + } Y_UNIT_TEST(Capture1) { TCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make index 8776695f40..d0a2301816 100644 --- a/library/cpp/regex/pire/ut/ya.make +++ b/library/cpp/regex/pire/ut/ya.make @@ -1,20 +1,20 @@ # this test in not linked into build tree with ReCURSE and is built by unittest/library UNITTEST() - + OWNER( g:util davenger ) SET(PIRETESTSDIR contrib/libs/pire/ut) - + CFLAGS(-DPIRE_NO_CONFIG) - + PEERDIR( library/cpp/regex/pire ) - + SRCDIR( ${PIRETESTSDIR} ) @@ -23,22 +23,22 @@ ADDINCL( contrib/libs/pire/pire contrib/libs/pire/ut ) - -SRCS( - pire_ut.cpp - capture_ut.cpp - count_ut.cpp + +SRCS( + pire_ut.cpp + capture_ut.cpp + count_ut.cpp glyph_ut.cpp - easy_ut.cpp + easy_ut.cpp read_unicode_ut.cpp - regexp_ut.cpp + regexp_ut.cpp approx_matching_ut.cpp -) - +) + SIZE(MEDIUM) TIMEOUT(600) PIRE_INLINE(inline_ut.cpp) -END() +END() diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make index c857e6d18b..0f788b35b5 100644 --- a/library/cpp/regex/pire/ya.make +++ b/library/cpp/regex/pire/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - +LIBRARY() + OWNER( g:util g:antiinfra @@ -8,33 +8,33 @@ OWNER( ) CFLAGS(-DPIRE_NO_CONFIG) - + SRCDIR(contrib/libs/pire/pire) - -SRCS( + +SRCS( pcre2pire.cpp - classes.cpp - encoding.cpp - fsm.cpp - scanner_io.cpp - easy.cpp - scanners/null.cpp - extra/capture.cpp - extra/count.cpp - extra/glyphs.cpp - re_lexer.cpp + classes.cpp + encoding.cpp + fsm.cpp + scanner_io.cpp + easy.cpp + scanners/null.cpp + extra/capture.cpp + extra/count.cpp + extra/glyphs.cpp + re_lexer.cpp re_parser.y read_unicode.cpp - extraencodings.cpp + extraencodings.cpp approx_matching.cpp half_final_fsm.cpp minimize.h -) - +) + PEERDIR( library/cpp/charset ) -END() +END() RECURSE_FOR_TESTS(ut) |