diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/regex/pire | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/pire')
-rw-r--r-- | library/cpp/regex/pire/extraencodings.cpp | 116 | ||||
-rw-r--r-- | library/cpp/regex/pire/inline/ya.make | 4 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.cpp | 2 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.h | 4 | ||||
-rw-r--r-- | library/cpp/regex/pire/pire.h | 14 | ||||
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 110 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 36 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/ya.make | 14 | ||||
-rw-r--r-- | library/cpp/regex/pire/ya.make | 2 |
9 files changed, 151 insertions, 151 deletions
diff --git a/library/cpp/regex/pire/extraencodings.cpp b/library/cpp/regex/pire/extraencodings.cpp index 8645d6cd4f..2e507e4b67 100644 --- a/library/cpp/regex/pire/extraencodings.cpp +++ b/library/cpp/regex/pire/extraencodings.cpp @@ -8,73 +8,73 @@ #include "pire.h" namespace NPire { - namespace { - // A one-byte encoding which is capable of transforming upper half of the character - // table to/from Unicode chars. - class TOneByte: public TEncoding { - public: - TOneByte(ECharset doccode) { - Table_ = CodePageByCharset(doccode)->unicode; - for (size_t i = 0; i < 256; ++i) - Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); - } + namespace { + // A one-byte encoding which is capable of transforming upper half of the character + // table to/from Unicode chars. + class TOneByte: public TEncoding { + public: + TOneByte(ECharset doccode) { + Table_ = CodePageByCharset(doccode)->unicode; + for (size_t i = 0; i < 256; ++i) + Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i))); + } - wchar32 FromLocal(const char*& begin, const char* end) const override { - if (begin != end) - return Table_[static_cast<unsigned char>(*begin++)]; - else - ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; - } + wchar32 FromLocal(const char*& begin, const char* end) const override { + if (begin != end) + return Table_[static_cast<unsigned char>(*begin++)]; + else + ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()"; + } - TString ToLocal(wchar32 c) const override { - THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); - if (i != Reverse_.end()) - return TString(1, i->second); - else - return TString(); - } + TString ToLocal(wchar32 c) const override { + THashMap<wchar32, char>::const_iterator i = Reverse_.find(c); + if (i != Reverse_.end()) + return TString(1, i->second); + else + return TString(); + } - void AppendDot(TFsm& fsm) const override { - fsm.AppendDot(); - } + void AppendDot(TFsm& fsm) const override { + fsm.AppendDot(); + } - private: - const wchar32* Table_; - THashMap<wchar32, char> Reverse_; - }; + private: + const wchar32* Table_; + THashMap<wchar32, char> Reverse_; + }; - template <unsigned N> - struct TOneByteHelper: public TOneByte { - inline TOneByteHelper() - : TOneByte((ECharset)N) - { - } - }; - } + template <unsigned N> + struct TOneByteHelper: public TOneByte { + inline TOneByteHelper() + : TOneByte((ECharset)N) + { + } + }; + } - namespace NEncodings { - const NPire::TEncoding& Koi8r() { - return *Singleton<TOneByteHelper<CODES_KOI8>>(); - } + namespace NEncodings { + const NPire::TEncoding& Koi8r() { + return *Singleton<TOneByteHelper<CODES_KOI8>>(); + } - const NPire::TEncoding& Cp1251() { - return *Singleton<TOneByteHelper<CODES_WIN>>(); + const NPire::TEncoding& Cp1251() { + return *Singleton<TOneByteHelper<CODES_WIN>>(); } - const NPire::TEncoding& Get(ECharset encoding) { - switch (encoding) { - case CODES_WIN: - return Cp1251(); - case CODES_KOI8: - return Koi8r(); - case CODES_ASCII: - return NPire::NEncodings::Latin1(); - case CODES_UTF8: - return NPire::NEncodings::Utf8(); - default: - ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; - } - } + const NPire::TEncoding& Get(ECharset encoding) { + switch (encoding) { + case CODES_WIN: + return Cp1251(); + case CODES_KOI8: + return Koi8r(); + case CODES_ASCII: + return NPire::NEncodings::Latin1(); + case CODES_UTF8: + return NPire::NEncodings::Utf8(); + default: + ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding; + } + } } diff --git a/library/cpp/regex/pire/inline/ya.make b/library/cpp/regex/pire/inline/ya.make index 5a83468746..d4850f7b45 100644 --- a/library/cpp/regex/pire/inline/ya.make +++ b/library/cpp/regex/pire/inline/ya.make @@ -6,8 +6,8 @@ OWNER( g:util davenger ) - -PEERDIR( + +PEERDIR( ADDINCL library/cpp/regex/pire ) diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp index 498a8abc25..f788beb85f 100644 --- a/library/cpp/regex/pire/pcre2pire.cpp +++ b/library/cpp/regex/pire/pcre2pire.cpp @@ -2,7 +2,7 @@ #include <util/generic/vector.h> #include <util/generic/yexception.h> -TString Pcre2Pire(const TString& src) { +TString Pcre2Pire(const TString& src) { TVector<char> result; result.reserve(src.size() + 1); diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h index b4d3b34205..46e45b9193 100644 --- a/library/cpp/regex/pire/pcre2pire.h +++ b/library/cpp/regex/pire/pcre2pire.h @@ -1,5 +1,5 @@ -#pragma once - +#pragma once + // Author: smikler@yandex-team.ru #include <util/generic/string.h> diff --git a/library/cpp/regex/pire/pire.h b/library/cpp/regex/pire/pire.h index 148301f39d..286fecd693 100644 --- a/library/cpp/regex/pire/pire.h +++ b/library/cpp/regex/pire/pire.h @@ -41,9 +41,9 @@ namespace NPire { using TError = Pire::Error; // Helper functions - using Pire::LongestPrefix; - using Pire::LongestSuffix; - using Pire::Matches; + using Pire::LongestPrefix; + using Pire::LongestSuffix; + using Pire::Matches; using Pire::MmappedScanner; using Pire::Run; using Pire::Runner; @@ -55,8 +55,8 @@ namespace NPire { using namespace Pire::Consts; namespace NFeatures { - using Pire::Features::AndNotSupport; - using Pire::Features::Capture; + using Pire::Features::AndNotSupport; + using Pire::Features::Capture; using Pire::Features::CaseInsensitive; using Pire::Features::GlueSimilarGlyphs; } @@ -65,8 +65,8 @@ namespace NPire { using Pire::Encodings::Latin1; using Pire::Encodings::Utf8; - const NPire::TEncoding& Koi8r(); - const NPire::TEncoding& Cp1251(); + const NPire::TEncoding& Koi8r(); + const NPire::TEncoding& Cp1251(); const NPire::TEncoding& Get(ECharset encoding); } diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index d5424e359a..94bba4064b 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -1,7 +1,7 @@ #pragma once - -#include "pire.h" - + +#include "pire.h" + #include <library/cpp/charset/doccodes.h> #include <library/cpp/charset/recyr.hh> #include <util/generic/maybe.h> @@ -10,26 +10,26 @@ #include <util/generic/vector.h> #include <util/generic/yexception.h> -namespace NRegExp { +namespace NRegExp { struct TMatcher; - + struct TFsmBase { struct TOptions { inline TOptions& SetCaseInsensitive(bool v) noexcept { CaseInsensitive = v; return *this; } - + inline TOptions& SetSurround(bool v) noexcept { Surround = v; return *this; } - + inline TOptions& SetCapture(size_t pos) noexcept { CapturePos = pos; return *this; - } - + } + inline TOptions& SetCharset(ECharset charset) noexcept { Charset = charset; return *this; @@ -68,64 +68,64 @@ namespace NRegExp { if (opts.CaseInsensitive) { lexer.AddFeature(NPire::NFeatures::CaseInsensitive()); - } - + } + if (opts.CapturePos) { lexer.AddFeature(NPire::NFeatures::Capture(*opts.CapturePos)); - } - + } + if (opts.AndNotSupport) { lexer.AddFeature(NPire::NFeatures::AndNotSupport()); } switch (opts.Charset) { - case CODES_UNKNOWN: - break; - case CODES_UTF8: - lexer.SetEncoding(NPire::NEncodings::Utf8()); - break; - case CODES_KOI8: - lexer.SetEncoding(NPire::NEncodings::Koi8r()); - break; - default: - lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset)); - break; + case CODES_UNKNOWN: + break; + case CODES_UTF8: + lexer.SetEncoding(NPire::NEncodings::Utf8()); + break; + case CODES_KOI8: + lexer.SetEncoding(NPire::NEncodings::Koi8r()); + break; + default: + lexer.SetEncoding(NPire::NEncodings::Get(opts.Charset)); + break; } NPire::TFsm ret = lexer.Parse(); if (opts.Surround) { ret.Surround(); - } - + } + if (needDetermine) { ret.Determine(); } - + return ret; } }; - + template <class TScannerType> class TFsmParser: public TFsmBase { public: typedef TScannerType TScanner; - + public: inline explicit TFsmParser(const TStringBuf& regexp, const TOptions& opts = TOptions(), bool needDetermine = true) : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) { } - + inline const TScanner& GetScanner() const noexcept { return Scanner; } - + static inline TFsmParser False() { return TFsmParser(NPire::TFsm::MakeFalse().Compile<TScanner>()); } - + inline explicit TFsmParser(const TScanner& compiled) : Scanner(compiled) { @@ -135,12 +135,12 @@ namespace NRegExp { private: TScanner Scanner; - }; - + }; + class TFsm: public TFsmParser<NPire::TNonrelocScanner> { public: inline explicit TFsm(const TStringBuf& regexp, - const TOptions& opts = TOptions()) + const TOptions& opts = TOptions()) : TFsmParser<TScanner>(regexp, opts) { } @@ -150,7 +150,7 @@ namespace NRegExp { { } - static inline TFsm Glue(const TFsm& l, const TFsm& r) { + static inline TFsm Glue(const TFsm& l, const TFsm& r) { return TFsm(TScanner::Glue(l.GetScanner(), r.GetScanner())); } @@ -160,23 +160,23 @@ namespace NRegExp { } }; - static inline TFsm operator|(const TFsm& l, const TFsm& r) { - return TFsm::Glue(l, r); - } - - struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> { + static inline TFsm operator|(const TFsm& l, const TFsm& r) { + return TFsm::Glue(l, r); + } + + struct TCapturingFsm : TFsmParser<NPire::TCapturingScanner> { inline explicit TCapturingFsm(const TStringBuf& regexp, - TOptions opts = TOptions()) + TOptions opts = TOptions()) : TFsmParser<TScanner>(regexp, - opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) { + opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1)) { } - + inline TCapturingFsm(const TFsmParser<TScanner>& fsm) : TFsmParser<TScanner>(fsm) { } }; - + struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { inline explicit TSlowCapturingFsm(const TStringBuf& regexp, TOptions opts = TOptions()) @@ -194,43 +194,43 @@ namespace NRegExp { class TMatcherBase { public: typedef typename TFsm::TScanner::State TState; - + public: inline explicit TMatcherBase(const TFsm& fsm) : Fsm(fsm) { Fsm.GetScanner().Initialize(State); } - + inline bool Final() const noexcept { return GetScanner().Final(GetState()); } - + protected: inline void Run(const char* data, size_t len, bool addBegin, bool addEnd) noexcept { if (addBegin) { NPire::Step(GetScanner(), State, NPire::BeginMark); - } + } NPire::Run(GetScanner(), State, data, data + len); if (addEnd) { NPire::Step(GetScanner(), State, NPire::EndMark); } } - + inline const typename TFsm::TScanner& GetScanner() const noexcept { return Fsm.GetScanner(); } - + inline const TState& GetState() const noexcept { return State; } - + private: const TFsm& Fsm; TState State; - }; + }; - struct TMatcher : TMatcherBase<TFsm> { + struct TMatcher : TMatcherBase<TFsm> { inline explicit TMatcher(const TFsm& fsm) : TMatcherBase<TFsm>(fsm) { @@ -334,4 +334,4 @@ namespace NRegExp { return *this; } }; -} +} diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index 294bc65fa7..e7206de9ad 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -1,21 +1,21 @@ #include <library/cpp/testing/unittest/registar.h> - + #include <library/cpp/regex/pire/regexp.h> #include <library/cpp/regex/pire/pcre2pire.h> - + Y_UNIT_TEST_SUITE(TRegExp) { - using namespace NRegExp; - + using namespace NRegExp; + Y_UNIT_TEST(False) { - UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final()); + UNIT_ASSERT(!TMatcher(TFsm::False()).Match("").Final()); UNIT_ASSERT(!TMatcher(TFsm::False()).Match(TStringBuf{}).Final()); - } - + } + Y_UNIT_TEST(Surround) { - UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); - UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final()); - } - + UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); + UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetSurround(false))).Match("aqwb").Final()); + } + Y_UNIT_TEST(Boundaries) { UNIT_ASSERT(!TMatcher(TFsm("qwb$", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); UNIT_ASSERT(!TMatcher(TFsm("^aqw", TFsm::TOptions().SetSurround(true))).Match("aqwb").Final()); @@ -29,13 +29,13 @@ Y_UNIT_TEST_SUITE(TRegExp) { .Match(TStringBuf("q"), false, false) .Match(TStringBuf("w"), false, false) .Match(TStringBuf("b"), false, true) - .Final()); + .Final()); } Y_UNIT_TEST(Case) { - UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final()); - UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final()); - } + UNIT_ASSERT(TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(true))).Match("Qw").Final()); + UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final()); + } Y_UNIT_TEST(UnicodeCase) { UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final()); @@ -114,7 +114,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { Y_UNIT_TEST(Capture3) { TCapturingFsm fsm("http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", - TFsm::TOptions().SetCapture(2)); + TFsm::TOptions().SetCapture(2)); TSearcher searcher(fsm); searcher.Search("http://vkontakte.ru/id100500"); @@ -124,7 +124,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { Y_UNIT_TEST(Capture4) { TCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", - TFsm::TOptions().SetCharset(CODES_UTF8)); + TFsm::TOptions().SetCharset(CODES_UTF8)); TSearcher searcher(fsm); searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); @@ -315,4 +315,4 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); } -} +} diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make index 0277d88f8c..8776695f40 100644 --- a/library/cpp/regex/pire/ut/ya.make +++ b/library/cpp/regex/pire/ut/ya.make @@ -6,10 +6,10 @@ OWNER( g:util davenger ) - + SET(PIRETESTSDIR contrib/libs/pire/ut) -CFLAGS(-DPIRE_NO_CONFIG) +CFLAGS(-DPIRE_NO_CONFIG) PEERDIR( library/cpp/regex/pire @@ -18,11 +18,11 @@ PEERDIR( SRCDIR( ${PIRETESTSDIR} ) - -ADDINCL( - contrib/libs/pire/pire - contrib/libs/pire/ut -) + +ADDINCL( + contrib/libs/pire/pire + contrib/libs/pire/ut +) SRCS( pire_ut.cpp diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make index 7d14c3b043..c857e6d18b 100644 --- a/library/cpp/regex/pire/ya.make +++ b/library/cpp/regex/pire/ya.make @@ -6,7 +6,7 @@ OWNER( davenger pg ) - + CFLAGS(-DPIRE_NO_CONFIG) SRCDIR(contrib/libs/pire/pire) |