diff options
author | thegeorg <thegeorg@yandex-team.com> | 2025-06-25 00:23:21 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2025-06-25 00:38:34 +0300 |
commit | e7147783ae6a23ee6675fa9f8ca6f43c6af17bc3 (patch) | |
tree | 454e5df12108188dd07fff8193566892d22e5909 /library/cpp/regex/pire/ut/common.h | |
parent | ebc5e196362b795c9a1ac8efa9d5a997cf07b1a4 (diff) | |
download | ydb-e7147783ae6a23ee6675fa9f8ca6f43c6af17bc3.tar.gz |
pire was achived on GitHub, move the code into library/cpp/regex/pire
commit_hash:018daf4645e87c4e0b31e1191af4e75e48f6d958
Diffstat (limited to 'library/cpp/regex/pire/ut/common.h')
-rw-r--r-- | library/cpp/regex/pire/ut/common.h | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/library/cpp/regex/pire/ut/common.h b/library/cpp/regex/pire/ut/common.h new file mode 100644 index 00000000000..d79eedafb73 --- /dev/null +++ b/library/cpp/regex/pire/ut/common.h @@ -0,0 +1,224 @@ +/* + * common.h -- + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_TEST_COMMON_H_INCLUDED +#define PIRE_TEST_COMMON_H_INCLUDED + +#include <stdio.h> +#include <pire.h> +#include <stub/stl.h> +#include <stub/defaults.h> +#include <stub/lexical_cast.h> +#include "stub/cppunit.h" + +using namespace Pire; + +/***************************************************************************** +* Helpers +*****************************************************************************/ + +inline Pire::Fsm ParseRegexp(const char* str, const char* options = "", const Pire::Encoding** enc = 0) +{ + Pire::Lexer lexer; + TVector<wchar32> ucs4; + + bool surround = true; + for (; *options; ++options) { + if (*options == 'i') + lexer.AddFeature(Pire::Features::CaseInsensitive()); + else if (*options == 'u') + lexer.SetEncoding(Pire::Encodings::Utf8()); + else if (*options == 'n') + surround = false; + else if (*options == 'a') + lexer.AddFeature(Pire::Features::AndNotSupport()); + else + throw std::invalid_argument("Unknown option: " + ystring(1, *options)); + } + + if (enc) + *enc = &lexer.Encoding(); + + lexer.Encoding().FromLocal(str, str + strlen(str), std::back_inserter(ucs4)); + lexer.Assign(ucs4.begin(), ucs4.end()); + + Pire::Fsm fsm = lexer.Parse(); + if (surround) + fsm.Surround(); + return fsm; +} + +inline bool HasError(const char* regexp) { + try { + ParseRegexp(regexp); + return false; + } catch (Pire::Error& ex) { + return true; + } +} + +struct Scanners { + Pire::Scanner fast; + Pire::NonrelocScanner nonreloc; + Pire::SimpleScanner simple; + Pire::SlowScanner slow; + Pire::ScannerNoMask fastNoMask; + Pire::NonrelocScannerNoMask nonrelocNoMask; + Pire::HalfFinalScanner halfFinal; + Pire::HalfFinalScannerNoMask halfFinalNoMask; + Pire::NonrelocHalfFinalScanner nonrelocHalfFinal; + Pire::NonrelocHalfFinalScannerNoMask nonrelocHalfFinalNoMask; + + Scanners(const Pire::Fsm& fsm, size_t distance = 0) + : fast(Pire::Fsm(fsm).Compile<Pire::Scanner>(distance)) + , nonreloc(Pire::Fsm(fsm).Compile<Pire::NonrelocScanner>(distance)) + , simple(Pire::Fsm(fsm).Compile<Pire::SimpleScanner>(distance)) + , slow(Pire::Fsm(fsm).Compile<Pire::SlowScanner>(distance)) + , fastNoMask(Pire::Fsm(fsm).Compile<Pire::ScannerNoMask>(distance)) + , nonrelocNoMask(Pire::Fsm(fsm).Compile<Pire::NonrelocScannerNoMask>(distance)) + , halfFinal(Pire::Fsm(fsm).Compile<Pire::HalfFinalScanner>(distance)) + , halfFinalNoMask(Pire::Fsm(fsm).Compile<Pire::HalfFinalScannerNoMask>(distance)) + , nonrelocHalfFinal(Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScanner>(distance)) + , nonrelocHalfFinalNoMask(Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScannerNoMask>(distance)) + {} + + Scanners(const char* str, const char* options = "") + { + Pire::Fsm fsm = ParseRegexp(str, options); + fast = Pire::Fsm(fsm).Compile<Pire::Scanner>(); + nonreloc = Pire::Fsm(fsm).Compile<Pire::NonrelocScanner>(); + simple = Pire::Fsm(fsm).Compile<Pire::SimpleScanner>(); + slow = Pire::Fsm(fsm).Compile<Pire::SlowScanner>(); + fastNoMask = Pire::Fsm(fsm).Compile<Pire::ScannerNoMask>(); + nonrelocNoMask = Pire::Fsm(fsm).Compile<Pire::NonrelocScannerNoMask>(); + halfFinal = Pire::Fsm(fsm).Compile<Pire::HalfFinalScanner>(); + halfFinalNoMask = Pire::Fsm(fsm).Compile<Pire::HalfFinalScannerNoMask>(); + nonrelocHalfFinal = Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScanner>(); + nonrelocHalfFinalNoMask = Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScannerNoMask>(); + } +}; + +#ifdef PIRE_DEBUG + +template <class Scanner> +inline ystring DbgState(const Scanner& scanner, typename Scanner::State state) +{ + return ToString(scanner.StateIndex(state)) + (scanner.Final(state) ? ystring(" [final]") : ystring()); +} +/* +inline ystring DbgState(const Pire::SimpleScanner& scanner, Pire::SimpleScanner::State state) +{ + return ToString(scanner.StateIndex(state)) + (scanner.Final(state) ? ystring(" [final]") : ystring()); +} +*/ +inline ystring DbgState(const Pire::SlowScanner& scanner, const Pire::SlowScanner::State& state) +{ + return ystring("(") + Join(state.states.begin(), state.states.end(), ", ") + ystring(")") + (scanner.Final(state) ? ystring(" [final]") : ystring()); +} + +template<class Scanner> +void DbgRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end) +{ + for (; begin != end; ++begin) { + char tmp[8]; + if (*begin >= 32) { + tmp[0] = *begin; + tmp[1] = 0; + } else + snprintf(tmp, sizeof(tmp)-1, "\\%03o", (unsigned char) *begin); + std::clog << DbgState(scanner, state) << " --[" << tmp << "]--> "; + scanner.Next(state, (unsigned char) *begin); + std::clog << DbgState(scanner, state) << "\n"; + } +} + +#define Run DbgRun +#endif + +template<class Scanner> +typename Scanner::State RunRegexp(const Scanner& scanner, const ystring& str) +{ + PIRE_IFDEBUG(std::clog << "--- checking against " << str << "\n"); + + typename Scanner::State state; + scanner.Initialize(state); + Step(scanner, state, BeginMark); + Run(scanner, state, str.c_str(), str.c_str() + str.length()); + Step(scanner, state, EndMark); + return state; +} + +template<class Scanner> +typename Scanner::State RunRegexp(const Scanner& scanner, const char* str) +{ + return RunRegexp(scanner, ystring(str)); +} + +template<class Scanner> +bool Matches(const Scanner& scanner, const ystring& str) +{ + auto state = RunRegexp(scanner, str); + auto result = scanner.AcceptedRegexps(state); + return result.first != result.second; +} + +template<class Scanner> +bool Matches(const Scanner& scanner, const char* str) +{ + return Matches(scanner, ystring(str)); +} + +#define SCANNER(fsm) for (Scanners m_scanners(fsm), *m_flag = &m_scanners; m_flag; m_flag = 0) +#define APPROXIMATE_SCANNER(fsm, distance) for (Scanners m_scanners(fsm, distance), *m_flag = &m_scanners; m_flag; m_flag = 0) +#define REGEXP(pattern) for (Scanners m_scanners(pattern), *m_flag = &m_scanners; m_flag; m_flag = 0) +#define REGEXP2(pattern,flags) for (Scanners m_scanners(pattern, flags), *m_flag = &m_scanners; m_flag; m_flag = 0) +#define ACCEPTS(str) \ + do {\ + UNIT_ASSERT(Matches(m_scanners.fast, str));\ + UNIT_ASSERT(Matches(m_scanners.nonreloc, str));\ + UNIT_ASSERT(Matches(m_scanners.simple, str));\ + UNIT_ASSERT(Matches(m_scanners.slow, str));\ + UNIT_ASSERT(Matches(m_scanners.fastNoMask, str));\ + UNIT_ASSERT(Matches(m_scanners.nonrelocNoMask, str));\ + UNIT_ASSERT(Matches(m_scanners.halfFinal, str));\ + UNIT_ASSERT(Matches(m_scanners.halfFinalNoMask, str));\ + UNIT_ASSERT(Matches(m_scanners.nonrelocHalfFinal, str));\ + UNIT_ASSERT(Matches(m_scanners.nonrelocHalfFinalNoMask, str));\ + } while (false) + +#define DENIES(str) \ + do {\ + UNIT_ASSERT(!Matches(m_scanners.fast, str));\ + UNIT_ASSERT(!Matches(m_scanners.nonreloc, str));\ + UNIT_ASSERT(!Matches(m_scanners.simple, str));\ + UNIT_ASSERT(!Matches(m_scanners.slow, str));\ + UNIT_ASSERT(!Matches(m_scanners.fastNoMask, str));\ + UNIT_ASSERT(!Matches(m_scanners.nonrelocNoMask, str));\ + UNIT_ASSERT(!Matches(m_scanners.halfFinal, str));\ + UNIT_ASSERT(!Matches(m_scanners.halfFinalNoMask, str));\ + UNIT_ASSERT(!Matches(m_scanners.nonrelocHalfFinal, str));\ + UNIT_ASSERT(!Matches(m_scanners.nonrelocHalfFinalNoMask, str));\ + } while (false) + + +#endif |