diff options
author | Alexander Gololobov <davenger@yandex-team.com> | 2022-02-10 16:47:37 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:37 +0300 |
commit | 39608cdb86363c75ce55b2b9a69841c3b71f22cf (patch) | |
tree | 4ec132c1665bd4d68e3628aa18d937c70d32413b /contrib/libs | |
parent | 54295b9bd4dc45c54d804084fd846d945148a7f0 (diff) | |
download | ydb-39608cdb86363c75ce55b2b9a69841c3b71f22cf.tar.gz |
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs')
52 files changed, 6689 insertions, 6689 deletions
diff --git a/contrib/libs/grpc/src/cpp/common/channel_arguments.cc b/contrib/libs/grpc/src/cpp/common/channel_arguments.cc index 5a5dd91b5e..998bd5699b 100644 --- a/contrib/libs/grpc/src/cpp/common/channel_arguments.cc +++ b/contrib/libs/grpc/src/cpp/common/channel_arguments.cc @@ -65,15 +65,15 @@ ChannelArguments::ChannelArguments(const ChannelArguments& other) } } -ChannelArguments::~ChannelArguments() { +ChannelArguments::~ChannelArguments() { grpc_core::ExecCtx exec_ctx; for (auto& arg : args_) { if (arg.type == GRPC_ARG_POINTER) { arg.value.pointer.vtable->destroy(arg.value.pointer.p); - } - } -} - + } + } +} + void ChannelArguments::Swap(ChannelArguments& other) { args_.swap(other.args_); strings_.swap(other.strings_); @@ -190,7 +190,7 @@ void ChannelArguments::SetPointerWithVtable( arg.type = GRPC_ARG_POINTER; strings_.push_back(key); arg.key = const_cast<char*>(strings_.back().c_str()); - arg.value.pointer.p = vtable->copy(value); + arg.value.pointer.p = vtable->copy(value); arg.value.pointer.vtable = vtable; args_.push_back(arg); } diff --git a/contrib/libs/pire/Makefile.am b/contrib/libs/pire/Makefile.am index a9e8908fb6..31eb7b3e7c 100644 --- a/contrib/libs/pire/Makefile.am +++ b/contrib/libs/pire/Makefile.am @@ -1,2 +1,2 @@ -ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = pire tests pkg samples +ACLOCAL_AMFLAGS = -I m4 +SUBDIRS = pire tests pkg samples diff --git a/contrib/libs/pire/README b/contrib/libs/pire/README index 1791486f8e..13ed2d6c7a 100644 --- a/contrib/libs/pire/README +++ b/contrib/libs/pire/README @@ -1,6 +1,6 @@ -This is PIRE, Perl Incompatible Regular Expressions library. - -For detailed information about what it is, how to build and use it, -see http://wiki.yandex-team.ru/DmitrijjProkopcev/pire . - -Please report bugs to dprokoptsev@yandex-team.ru or davenger@yandex-team.ru. +This is PIRE, Perl Incompatible Regular Expressions library. + +For detailed information about what it is, how to build and use it, +see http://wiki.yandex-team.ru/DmitrijjProkopcev/pire . + +Please report bugs to dprokoptsev@yandex-team.ru or davenger@yandex-team.ru. diff --git a/contrib/libs/pire/configure.ac b/contrib/libs/pire/configure.ac index 49f235129c..2068c63a7e 100644 --- a/contrib/libs/pire/configure.ac +++ b/contrib/libs/pire/configure.ac @@ -1,47 +1,47 @@ -AC_PREREQ([2.63]) -AC_INIT([pire], [0.0.2], [dprokoptsev@yandex-team.ru]) -AM_INIT_AUTOMAKE([foreign -Wall]) -AC_CONFIG_SRCDIR([pire/classes.cpp]) -AC_CONFIG_HEADERS([config.h]) -AC_CONFIG_MACRO_DIR([m4]) - -AC_LANG_CPLUSPLUS - -# Require neccessary binaries to build ourselves -AC_PROG_CXX -AC_PROG_CC -AC_PROG_LEX -AC_PROG_YACC -AC_PROG_LIBTOOL - -# Check for cppunit -AM_PATH_CPPUNIT([0.0.0],[with_unittests=yes],[ - AC_WARN([cppunit not found. Unit tests will not compile and run.]) - with_unittests=no -]) -AM_CONDITIONAL([WITH_UNITTESTS], [test x"$with_unittests" = xyes]) - -# Just for conscience' sake -AC_CHECK_HEADERS([stdlib.h string.h sys/time.h]) -AC_HEADER_STDBOOL -AC_C_INLINE -AC_TYPE_SIZE_T -AC_CHECK_TYPES([ptrdiff_t]) -AC_FUNC_ERROR_AT_LINE -AC_FUNC_MALLOC -AC_CHECK_FUNCS([memset strchr]) - -# Require little-endian platform -AC_C_BIGENDIAN -if test x"$ac_cv_c_bigendian" = xyes; then - AC_ERROR([pire has not been ported to big-endian platforms yet.]) -fi - -# Optional features -AC_ARG_ENABLE([extra], AS_HELP_STRING([--enable-extra], [Add extra functionality (capturing scanner, etc...)])) -AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Make Pire dump all constructed FSMs to std::clog (useless unless debugging Pire)])) -AM_CONDITIONAL([ENABLE_EXTRA], [test x"$enable_extra" = xyes]) -AM_CONDITIONAL([ENABLE_DEBUG], [test x"$enable_debug" = xyes]) - -AC_CONFIG_FILES([Makefile pire/Makefile tests/Makefile pkg/Makefile samples/Makefile samples/bench/Makefile]) -AC_OUTPUT +AC_PREREQ([2.63]) +AC_INIT([pire], [0.0.2], [dprokoptsev@yandex-team.ru]) +AM_INIT_AUTOMAKE([foreign -Wall]) +AC_CONFIG_SRCDIR([pire/classes.cpp]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_MACRO_DIR([m4]) + +AC_LANG_CPLUSPLUS + +# Require neccessary binaries to build ourselves +AC_PROG_CXX +AC_PROG_CC +AC_PROG_LEX +AC_PROG_YACC +AC_PROG_LIBTOOL + +# Check for cppunit +AM_PATH_CPPUNIT([0.0.0],[with_unittests=yes],[ + AC_WARN([cppunit not found. Unit tests will not compile and run.]) + with_unittests=no +]) +AM_CONDITIONAL([WITH_UNITTESTS], [test x"$with_unittests" = xyes]) + +# Just for conscience' sake +AC_CHECK_HEADERS([stdlib.h string.h sys/time.h]) +AC_HEADER_STDBOOL +AC_C_INLINE +AC_TYPE_SIZE_T +AC_CHECK_TYPES([ptrdiff_t]) +AC_FUNC_ERROR_AT_LINE +AC_FUNC_MALLOC +AC_CHECK_FUNCS([memset strchr]) + +# Require little-endian platform +AC_C_BIGENDIAN +if test x"$ac_cv_c_bigendian" = xyes; then + AC_ERROR([pire has not been ported to big-endian platforms yet.]) +fi + +# Optional features +AC_ARG_ENABLE([extra], AS_HELP_STRING([--enable-extra], [Add extra functionality (capturing scanner, etc...)])) +AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Make Pire dump all constructed FSMs to std::clog (useless unless debugging Pire)])) +AM_CONDITIONAL([ENABLE_EXTRA], [test x"$enable_extra" = xyes]) +AM_CONDITIONAL([ENABLE_DEBUG], [test x"$enable_debug" = xyes]) + +AC_CONFIG_FILES([Makefile pire/Makefile tests/Makefile pkg/Makefile samples/Makefile samples/bench/Makefile]) +AC_OUTPUT diff --git a/contrib/libs/pire/pire/Makefile.am b/contrib/libs/pire/pire/Makefile.am index 09ef211704..f2d09a2fb7 100644 --- a/contrib/libs/pire/pire/Makefile.am +++ b/contrib/libs/pire/pire/Makefile.am @@ -1,121 +1,121 @@ - -AM_CXXFLAGS = -Wall -if ENABLE_DEBUG -AM_CXXFLAGS += -DPIRE_DEBUG -endif -if ENABLE_CHECKED -AM_CXXFLAGS += -DPIRE_CHECKED -endif - -lib_LTLIBRARIES = libpire.la -libpire_la_SOURCES = \ - align.h \ - any.h \ - classes.cpp \ - defs.h \ - determine.h \ - encoding.cpp \ - encoding.h \ - extra.h \ - fsm.cpp \ - fsm.h \ - fwd.h \ - glue.cpp \ - glue.h \ + +AM_CXXFLAGS = -Wall +if ENABLE_DEBUG +AM_CXXFLAGS += -DPIRE_DEBUG +endif +if ENABLE_CHECKED +AM_CXXFLAGS += -DPIRE_CHECKED +endif + +lib_LTLIBRARIES = libpire.la +libpire_la_SOURCES = \ + align.h \ + any.h \ + classes.cpp \ + defs.h \ + determine.h \ + encoding.cpp \ + encoding.h \ + extra.h \ + fsm.cpp \ + fsm.h \ + fwd.h \ + glue.cpp \ + glue.h \ minimize.h \ half_final_fsm.cpp \ half_final_fsm.h \ - partition.h \ - pire.h \ - re_lexer.cpp \ - re_lexer.h \ - run.h \ - scanner_io.cpp \ - vbitset.h \ - re_parser.ypp \ + partition.h \ + pire.h \ + re_lexer.cpp \ + re_lexer.h \ + run.h \ + scanner_io.cpp \ + vbitset.h \ + re_parser.ypp \ scanners/half_final.h \ - scanners/loaded.h \ - scanners/multi.h \ - scanners/slow.h \ - scanners/simple.h \ - scanners/common.h \ - scanners/pair.h \ - stub/stl.h \ - stub/lexical_cast.h \ - stub/saveload.h \ - stub/singleton.h \ - stub/utf8.cpp \ - stub/utf8.h \ - stub/noncopyable.h \ - stub/codepage_h.h \ - stub/doccodes_h.h \ - stub/unidata_h.h \ - stub/unidata_cpp.h - -if ENABLE_EXTRA -libpire_la_SOURCES += \ - extra/capture.cpp \ - extra/capture.h \ - extra/count.cpp \ - extra/count.h \ - extra/glyphs.cpp \ - extra/glyphs.h -endif - -pire_hdrdir = $(includedir)/pire -pire_hdr_HEADERS = \ - align.h \ - any.h \ - defs.h \ - determine.h \ - encoding.h \ - extra.h \ - fsm.h \ - fwd.h \ - glue.h \ + scanners/loaded.h \ + scanners/multi.h \ + scanners/slow.h \ + scanners/simple.h \ + scanners/common.h \ + scanners/pair.h \ + stub/stl.h \ + stub/lexical_cast.h \ + stub/saveload.h \ + stub/singleton.h \ + stub/utf8.cpp \ + stub/utf8.h \ + stub/noncopyable.h \ + stub/codepage_h.h \ + stub/doccodes_h.h \ + stub/unidata_h.h \ + stub/unidata_cpp.h + +if ENABLE_EXTRA +libpire_la_SOURCES += \ + extra/capture.cpp \ + extra/capture.h \ + extra/count.cpp \ + extra/count.h \ + extra/glyphs.cpp \ + extra/glyphs.h +endif + +pire_hdrdir = $(includedir)/pire +pire_hdr_HEADERS = \ + align.h \ + any.h \ + defs.h \ + determine.h \ + encoding.h \ + extra.h \ + fsm.h \ + fwd.h \ + glue.h \ minimize.h \ half_final_fsm.h \ - partition.h \ - pire.h \ - re_lexer.h \ - re_parser.h \ - run.h \ - static_assert.h \ - vbitset.h - -if ENABLE_EXTRA -pire_extradir = $(includedir)/pire/extra -pire_extra_HEADERS = \ - extra/capture.h \ - extra/count.h \ - extra/glyphs.h -endif - -pire_scannersdir = $(includedir)/pire/scanners -pire_scanners_HEADERS = \ - scanners/common.h \ + partition.h \ + pire.h \ + re_lexer.h \ + re_parser.h \ + run.h \ + static_assert.h \ + vbitset.h + +if ENABLE_EXTRA +pire_extradir = $(includedir)/pire/extra +pire_extra_HEADERS = \ + extra/capture.h \ + extra/count.h \ + extra/glyphs.h +endif + +pire_scannersdir = $(includedir)/pire/scanners +pire_scanners_HEADERS = \ + scanners/common.h \ scanners/half_final.h \ - scanners/multi.h \ - scanners/slow.h \ - scanners/simple.h \ - scanners/loaded.h \ - scanners/pair.h - -pire_stubdir = $(includedir)/pire/stub -pire_stub_HEADERS = \ - stub/stl.h \ - stub/defaults.h \ - stub/singleton.h \ - stub/saveload.h \ - stub/lexical_cast.h - -bin_PROGRAMS = pire_inline - -pire_inline_SOURCES = inline.lpp stub/hacks.h stub/memstreams.h -pire_inline_LDADD = libpire.la - -BUILT_SOURCES = re_parser.h re_parser.cpp -CLEANFILES = re_parser.h re_parser.cpp - -AM_YFLAGS = -d - + scanners/multi.h \ + scanners/slow.h \ + scanners/simple.h \ + scanners/loaded.h \ + scanners/pair.h + +pire_stubdir = $(includedir)/pire/stub +pire_stub_HEADERS = \ + stub/stl.h \ + stub/defaults.h \ + stub/singleton.h \ + stub/saveload.h \ + stub/lexical_cast.h + +bin_PROGRAMS = pire_inline + +pire_inline_SOURCES = inline.lpp stub/hacks.h stub/memstreams.h +pire_inline_LDADD = libpire.la + +BUILT_SOURCES = re_parser.h re_parser.cpp +CLEANFILES = re_parser.h re_parser.cpp + +AM_YFLAGS = -d + diff --git a/contrib/libs/pire/pire/align.h b/contrib/libs/pire/pire/align.h index fea084b598..c1941b7120 100644 --- a/contrib/libs/pire/pire/align.h +++ b/contrib/libs/pire/pire/align.h @@ -1,103 +1,103 @@ -/* - * align.h -- functions for positioning streams and memory pointers - * to word boundaries - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * align.h -- functions for positioning streams and memory pointers + * to word boundaries * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_ALIGN_H -#define PIRE_ALIGN_H - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_ALIGN_H +#define PIRE_ALIGN_H + #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/saveload.h> -#include "platform.h" - -namespace Pire { - - namespace Impl { - - template<class T> - inline T AlignUp(T t, size_t bound) - { - return (T) (((size_t) t + (bound-1)) & ~(bound-1)); - } - - template<class T> - inline T AlignDown(T t, size_t bound) - { - return (T) ((size_t) t & ~(bound-1)); - } - - inline void AlignSave(yostream* s, size_t size) - { - size_t tail = AlignUp(size, sizeof(size_t)) - size; - if (tail) { - static const char buf[sizeof(MaxSizeWord)] = {0}; - SavePodArray(s, buf, tail); - } - } - - inline void AlignLoad(yistream* s, size_t size) - { - size_t tail = AlignUp(size, sizeof(size_t)) - size; - if (tail) { - char buf[sizeof(MaxSizeWord)]; - LoadPodArray(s, buf, tail); - } - } - - template<class T> - inline void AlignedSaveArray(yostream* s, const T* array, size_t count) - { - SavePodArray(s, array, count); - AlignSave(s, sizeof(*array) * count); - } - - template<class T> - inline void AlignedLoadArray(yistream* s, T* array, size_t count) - { - LoadPodArray(s, array, count); - AlignLoad(s, sizeof(*array) * count); - } - - template<class T> - inline bool IsAligned(T t, size_t bound) - { - return ((size_t) t & (bound-1)) == 0; - } - - inline const void* AlignPtr(const size_t*& p, size_t& size) - { - if (!IsAligned(p, sizeof(size_t))) { - const size_t* next = AlignUp(p, sizeof(size_t)); - if (next > p+size) - throw Error("EOF reached in NPire::Impl::align"); - size -= (next - p); - p = next; - } - return (const void*) p; - } - - } - -} - -#endif +#include "platform.h" + +namespace Pire { + + namespace Impl { + + template<class T> + inline T AlignUp(T t, size_t bound) + { + return (T) (((size_t) t + (bound-1)) & ~(bound-1)); + } + + template<class T> + inline T AlignDown(T t, size_t bound) + { + return (T) ((size_t) t & ~(bound-1)); + } + + inline void AlignSave(yostream* s, size_t size) + { + size_t tail = AlignUp(size, sizeof(size_t)) - size; + if (tail) { + static const char buf[sizeof(MaxSizeWord)] = {0}; + SavePodArray(s, buf, tail); + } + } + + inline void AlignLoad(yistream* s, size_t size) + { + size_t tail = AlignUp(size, sizeof(size_t)) - size; + if (tail) { + char buf[sizeof(MaxSizeWord)]; + LoadPodArray(s, buf, tail); + } + } + + template<class T> + inline void AlignedSaveArray(yostream* s, const T* array, size_t count) + { + SavePodArray(s, array, count); + AlignSave(s, sizeof(*array) * count); + } + + template<class T> + inline void AlignedLoadArray(yistream* s, T* array, size_t count) + { + LoadPodArray(s, array, count); + AlignLoad(s, sizeof(*array) * count); + } + + template<class T> + inline bool IsAligned(T t, size_t bound) + { + return ((size_t) t & (bound-1)) == 0; + } + + inline const void* AlignPtr(const size_t*& p, size_t& size) + { + if (!IsAligned(p, sizeof(size_t))) { + const size_t* next = AlignUp(p, sizeof(size_t)); + if (next > p+size) + throw Error("EOF reached in NPire::Impl::align"); + size -= (next - p); + p = next; + } + return (const void*) p; + } + + } + +} + +#endif diff --git a/contrib/libs/pire/pire/any.h b/contrib/libs/pire/pire/any.h index 4646d25781..f1c8ab9ab9 100644 --- a/contrib/libs/pire/pire/any.h +++ b/contrib/libs/pire/pire/any.h @@ -1,131 +1,131 @@ -/* - * any.h -- a wrapper capable of holding a value of arbitrary type. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * any.h -- a wrapper capable of holding a value of arbitrary type. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_ANY_H -#define PIRE_ANY_H - - -#include <typeinfo> - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_ANY_H +#define PIRE_ANY_H + + +#include <typeinfo> + #include <contrib/libs/pire/pire/stub/stl.h> - -namespace Pire { - -class Any { - -public: + +namespace Pire { + +class Any { + +public: Any() = default; - - Any(const Any& any) - { - if (any.h) - h = any.h->Duplicate(); - } - - Any& operator= (Any any) - { - any.Swap(*this); - return *this; - } - - template <class T> - Any(const T& t) - : h(new Holder<T>(t)) - { - } - - bool Empty() const { - return !h; - } - template <class T> - bool IsA() const { - return h && h->IsA(typeid(T)); - } - - template <class T> - T& As() - { - if (h && IsA<T>()) - return *reinterpret_cast<T*>(h->Ptr()); - else - throw Pire::Error("type mismatch"); - } - - template <class T> - const T& As() const - { - if (h && IsA<T>()) - return *reinterpret_cast<const T*>(h->Ptr()); - else - throw Pire::Error("type mismatch"); - } - + + Any(const Any& any) + { + if (any.h) + h = any.h->Duplicate(); + } + + Any& operator= (Any any) + { + any.Swap(*this); + return *this; + } + + template <class T> + Any(const T& t) + : h(new Holder<T>(t)) + { + } + + bool Empty() const { + return !h; + } + template <class T> + bool IsA() const { + return h && h->IsA(typeid(T)); + } + + template <class T> + T& As() + { + if (h && IsA<T>()) + return *reinterpret_cast<T*>(h->Ptr()); + else + throw Pire::Error("type mismatch"); + } + + template <class T> + const T& As() const + { + if (h && IsA<T>()) + return *reinterpret_cast<const T*>(h->Ptr()); + else + throw Pire::Error("type mismatch"); + } + void Swap(Any& a) noexcept { - DoSwap(h, a.h); - } - -private: - - struct AbstractHolder { - virtual ~AbstractHolder() { - } + DoSwap(h, a.h); + } + +private: + + struct AbstractHolder { + virtual ~AbstractHolder() { + } virtual THolder<AbstractHolder> Duplicate() const = 0; - virtual bool IsA(const std::type_info& id) const = 0; - virtual void* Ptr() = 0; - virtual const void* Ptr() const = 0; - }; - - template <class T> - struct Holder: public AbstractHolder { - Holder(T t) - : d(t) - { - } + virtual bool IsA(const std::type_info& id) const = 0; + virtual void* Ptr() = 0; + virtual const void* Ptr() const = 0; + }; + + template <class T> + struct Holder: public AbstractHolder { + Holder(T t) + : d(t) + { + } THolder<AbstractHolder> Duplicate() const { return THolder<AbstractHolder>(new Holder<T>(d)); - } - bool IsA(const std::type_info& id) const { - return id == typeid(T); - } - void* Ptr() { - return &d; - } - const void* Ptr() const { - return &d; - } - private: - T d; - }; - + } + bool IsA(const std::type_info& id) const { + return id == typeid(T); + } + void* Ptr() { + return &d; + } + const void* Ptr() const { + return &d; + } + private: + T d; + }; + THolder<AbstractHolder> h; -}; - -} - -namespace std { - inline void swap(Pire::Any& a, Pire::Any& b) { - a.Swap(b); - } -} - -#endif +}; + +} + +namespace std { + inline void swap(Pire::Any& a, Pire::Any& b) { + a.Swap(b); + } +} + +#endif diff --git a/contrib/libs/pire/pire/classes.cpp b/contrib/libs/pire/pire/classes.cpp index bbf021737d..d928d76866 100644 --- a/contrib/libs/pire/pire/classes.cpp +++ b/contrib/libs/pire/pire/classes.cpp @@ -1,152 +1,152 @@ -/* - * classes.cpp -- implementation for Pire::CharClasses feature. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * classes.cpp -- implementation for Pire::CharClasses feature. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/singleton.h> #include <contrib/libs/pire/pire/stub/noncopyable.h> #include <contrib/libs/pire/pire/stub/utf8.h> -#include "re_lexer.h" - -namespace Pire { - -namespace { - - class CharClassesTable: private NonCopyable { - private: - class CharClass { - public: - CharClass() {} - explicit CharClass(wchar32 ch) { m_bounds.push_back(ymake_pair(ch, ch)); } - CharClass(wchar32 lower, wchar32 upper) { m_bounds.push_back(ymake_pair(lower, upper)); } - - CharClass& operator |= (const CharClass& cc) - { - std::copy(cc.m_bounds.begin(), cc.m_bounds.end(), std::back_inserter(m_bounds)); - return *this; - } - - CharClass operator | (const CharClass& cc) const - { - CharClass r(*this); - r |= cc; - return r; - } - +#include "re_lexer.h" + +namespace Pire { + +namespace { + + class CharClassesTable: private NonCopyable { + private: + class CharClass { + public: + CharClass() {} + explicit CharClass(wchar32 ch) { m_bounds.push_back(ymake_pair(ch, ch)); } + CharClass(wchar32 lower, wchar32 upper) { m_bounds.push_back(ymake_pair(lower, upper)); } + + CharClass& operator |= (const CharClass& cc) + { + std::copy(cc.m_bounds.begin(), cc.m_bounds.end(), std::back_inserter(m_bounds)); + return *this; + } + + CharClass operator | (const CharClass& cc) const + { + CharClass r(*this); + r |= cc; + return r; + } + TSet<wchar32> ToSet() const - { + { TSet<wchar32> ret; for (auto&& bound : m_bounds) for (wchar32 c = bound.first; c <= bound.second; ++c) - ret.insert(c); - return ret; - } - - private: + ret.insert(c); + return ret; + } + + private: TVector<ypair<wchar32, wchar32> > m_bounds; - }; - - public: - bool Has(wchar32 wc) const - { - return (m_classes.find(to_lower(wc & ~ControlMask)) != m_classes.end()); - } - + }; + + public: + bool Has(wchar32 wc) const + { + return (m_classes.find(to_lower(wc & ~ControlMask)) != m_classes.end()); + } + TSet<wchar32> Get(wchar32 wc) const - { + { auto it = m_classes.find(to_lower(wc & ~ControlMask)); - if (it == m_classes.end()) - throw Error("Unknown character class"); - return it->second.ToSet(); - } - - CharClassesTable() - { - m_classes['l'] = CharClass('A', 'Z') | CharClass('a', 'z'); - m_classes['c'] - = CharClass(0x0410, 0x044F) // Russian capital A to Russan capital YA, Russian small A to Russian small YA - | CharClass(0x0401) // Russian capital Yo - | CharClass(0x0451) // Russian small Yo - ; - - m_classes['w'] = m_classes['l'] | m_classes['c']; - m_classes['d'] = CharClass('0', '9'); - m_classes['s'] - = CharClass(' ') | CharClass('\t') | CharClass('\r') | CharClass('\n') - | CharClass(0x00A0) // Non-breaking space - ; - - // A few special classes which do not have any negation - m_classes['n'] = CharClass('\n'); - m_classes['r'] = CharClass('\r'); - m_classes['t'] = CharClass('\t'); - } - + if (it == m_classes.end()) + throw Error("Unknown character class"); + return it->second.ToSet(); + } + + CharClassesTable() + { + m_classes['l'] = CharClass('A', 'Z') | CharClass('a', 'z'); + m_classes['c'] + = CharClass(0x0410, 0x044F) // Russian capital A to Russan capital YA, Russian small A to Russian small YA + | CharClass(0x0401) // Russian capital Yo + | CharClass(0x0451) // Russian small Yo + ; + + m_classes['w'] = m_classes['l'] | m_classes['c']; + m_classes['d'] = CharClass('0', '9'); + m_classes['s'] + = CharClass(' ') | CharClass('\t') | CharClass('\r') | CharClass('\n') + | CharClass(0x00A0) // Non-breaking space + ; + + // A few special classes which do not have any negation + m_classes['n'] = CharClass('\n'); + m_classes['r'] = CharClass('\r'); + m_classes['t'] = CharClass('\t'); + } + TMap<wchar32, CharClass> m_classes; - }; - - class CharClassesImpl: public Feature { - public: - CharClassesImpl(): m_table(Singleton<CharClassesTable>()) {} - int Priority() const { return 10; } - - void Alter(Term& t) - { - if (t.Value().IsA<Term::CharacterRange>()) { - const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>(); - typedef Term::CharacterRange::first_type CharSet; - const CharSet& old = range.first; - CharSet altered; - bool pos = false; - bool neg = false; + }; + + class CharClassesImpl: public Feature { + public: + CharClassesImpl(): m_table(Singleton<CharClassesTable>()) {} + int Priority() const { return 10; } + + void Alter(Term& t) + { + if (t.Value().IsA<Term::CharacterRange>()) { + const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>(); + typedef Term::CharacterRange::first_type CharSet; + const CharSet& old = range.first; + CharSet altered; + bool pos = false; + bool neg = false; for (auto&& i : old) if (i.size() == 1 && (i[0] & ControlMask) == Control && m_table->Has(i[0])) { if (is_upper(i[0] & ~ControlMask)) - neg = true; - else - pos = true; - + neg = true; + else + pos = true; + TSet<wchar32> klass = m_table->Get(i[0]); for (auto&& j : klass) altered.insert(Term::String(1, j)); - } else + } else altered.insert(i); - - if (neg && (pos || range.second)) - Error("Positive and negative character ranges mixed"); - t = Term(t.Type(), Term::CharacterRange(altered, neg || range.second)); - } - } - - private: - CharClassesTable* m_table; - }; - -} - -namespace Features { + + if (neg && (pos || range.second)) + Error("Positive and negative character ranges mixed"); + t = Term(t.Type(), Term::CharacterRange(altered, neg || range.second)); + } + } + + private: + CharClassesTable* m_table; + }; + +} + +namespace Features { Feature::Ptr CharClasses() { return Feature::Ptr(new CharClassesImpl); } -} - -} - +} + +} + diff --git a/contrib/libs/pire/pire/defs.h b/contrib/libs/pire/pire/defs.h index 19d785d7d7..c1e7780ef9 100644 --- a/contrib/libs/pire/pire/defs.h +++ b/contrib/libs/pire/pire/defs.h @@ -1,112 +1,112 @@ -/* - * defs.h -- common Pire definitions. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_DEFS_H -#define PIRE_DEFS_H - -#ifndef PIRE_NO_CONFIG -#include <pire/config.h> -#endif -#include <stdlib.h> - -#if defined(_MSC_VER) -#define PIRE_HAVE_DECLSPEC_ALIGN -#else -#define PIRE_HAVE_ALIGNAS -#endif - -#define PIRE_HAVE_LAMBDAS - -namespace Pire { - -#ifdef PIRE_DEBUG -# define PIRE_IFDEBUG(x) x -#else -# define PIRE_IFDEBUG(x) -#endif - -#ifdef PIRE_CHECKED -# define PIRE_IF_CHECKED(e) e -#else -# define PIRE_IF_CHECKED(e) -#endif - - - typedef unsigned short Char; - - namespace SpecialChar { - enum { - Epsilon = 257, - BeginMark = 258, - EndMark = 259, - - // Actual size of input alphabet - MaxCharUnaligned = 260, - - // Size of letter transition tables, must be a multiple of the machine word size - MaxChar = (MaxCharUnaligned + (sizeof(void*)-1)) & ~(sizeof(void*)-1) - }; - } - - using namespace SpecialChar; - - namespace Impl { -#ifndef PIRE_WORDS_BIGENDIAN - inline size_t ToLittleEndian(size_t val) { return val; } -#else - template<unsigned N> - inline size_t SwapBytes(size_t val) - { - static const size_t Mask = (1 << (N/2)) - 1; - return ((SwapBytes<N/2>(val) & Mask) << (N/2)) | SwapBytes<N/2>(val >> (N/2)); - } - - template<> - inline size_t SwapBytes<8>(size_t val) { return val & 0xFF; } - - inline size_t ToLittleEndian(size_t val) { return SwapBytes<sizeof(val)*8>(val); } -#endif - - struct Struct { void* p; }; - } -} - -#ifndef PIRE_ALIGNED_DECL -# if defined(PIRE_HAVE_ALIGNAS) -# define PIRE_ALIGNED_DECL(x) alignas(::Pire::Impl::Struct) static const char x[] -# elif defined(PIRE_HAVE_ATTR_ALIGNED) -# define PIRE_ALIGNED_DECL(x) static const char x[] __attribute__((aligned(sizeof(void*)))) -# elif defined(PIRE_HAVE_DECLSPEC_ALIGN) -# define PIRE_ALIGNED_DECL(x) __declspec(align(8)) static const char x[] -# endif -#endif - -#ifndef PIRE_LITERAL -# if defined(PIRE_HAVE_LAMBDAS) -# define PIRE_LITERAL(data) ([]() -> const char* { PIRE_ALIGNED_DECL(__pire_regexp__) = data; return __pire_regexp__; })() -# elif defined(PIRE_HAVE_SCOPED_EXPR) -# define PIRE_LITERAL(data) ({ PIRE_ALIGNED_DECL(__pire_regexp__) = data; __pire_regexp__; }) -# endif -#endif - -#endif +/* + * defs.h -- common Pire definitions. + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_DEFS_H +#define PIRE_DEFS_H + +#ifndef PIRE_NO_CONFIG +#include <pire/config.h> +#endif +#include <stdlib.h> + +#if defined(_MSC_VER) +#define PIRE_HAVE_DECLSPEC_ALIGN +#else +#define PIRE_HAVE_ALIGNAS +#endif + +#define PIRE_HAVE_LAMBDAS + +namespace Pire { + +#ifdef PIRE_DEBUG +# define PIRE_IFDEBUG(x) x +#else +# define PIRE_IFDEBUG(x) +#endif + +#ifdef PIRE_CHECKED +# define PIRE_IF_CHECKED(e) e +#else +# define PIRE_IF_CHECKED(e) +#endif + + + typedef unsigned short Char; + + namespace SpecialChar { + enum { + Epsilon = 257, + BeginMark = 258, + EndMark = 259, + + // Actual size of input alphabet + MaxCharUnaligned = 260, + + // Size of letter transition tables, must be a multiple of the machine word size + MaxChar = (MaxCharUnaligned + (sizeof(void*)-1)) & ~(sizeof(void*)-1) + }; + } + + using namespace SpecialChar; + + namespace Impl { +#ifndef PIRE_WORDS_BIGENDIAN + inline size_t ToLittleEndian(size_t val) { return val; } +#else + template<unsigned N> + inline size_t SwapBytes(size_t val) + { + static const size_t Mask = (1 << (N/2)) - 1; + return ((SwapBytes<N/2>(val) & Mask) << (N/2)) | SwapBytes<N/2>(val >> (N/2)); + } + + template<> + inline size_t SwapBytes<8>(size_t val) { return val & 0xFF; } + + inline size_t ToLittleEndian(size_t val) { return SwapBytes<sizeof(val)*8>(val); } +#endif + + struct Struct { void* p; }; + } +} + +#ifndef PIRE_ALIGNED_DECL +# if defined(PIRE_HAVE_ALIGNAS) +# define PIRE_ALIGNED_DECL(x) alignas(::Pire::Impl::Struct) static const char x[] +# elif defined(PIRE_HAVE_ATTR_ALIGNED) +# define PIRE_ALIGNED_DECL(x) static const char x[] __attribute__((aligned(sizeof(void*)))) +# elif defined(PIRE_HAVE_DECLSPEC_ALIGN) +# define PIRE_ALIGNED_DECL(x) __declspec(align(8)) static const char x[] +# endif +#endif + +#ifndef PIRE_LITERAL +# if defined(PIRE_HAVE_LAMBDAS) +# define PIRE_LITERAL(data) ([]() -> const char* { PIRE_ALIGNED_DECL(__pire_regexp__) = data; return __pire_regexp__; })() +# elif defined(PIRE_HAVE_SCOPED_EXPR) +# define PIRE_LITERAL(data) ({ PIRE_ALIGNED_DECL(__pire_regexp__) = data; __pire_regexp__; }) +# endif +#endif + +#endif diff --git a/contrib/libs/pire/pire/determine.h b/contrib/libs/pire/pire/determine.h index fb48fdd0b3..ddadfa1c75 100644 --- a/contrib/libs/pire/pire/determine.h +++ b/contrib/libs/pire/pire/determine.h @@ -1,145 +1,145 @@ -/* - * determine.h -- the FSM determination routine. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * determine.h -- the FSM determination routine. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_DETERMINE_H -#define PIRE_DETERMINE_H - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_DETERMINE_H +#define PIRE_DETERMINE_H + #include <contrib/libs/pire/pire/stub/stl.h> -#include "partition.h" - -namespace Pire { - namespace Impl { - - /** - * An interface of a determination task. - * You don't have to derive from this class; it is just a start point template. - */ - class DetermineTask { - private: - struct ImplementationSpecific1; - struct ImplementationSpecific2; - - public: - /// A type representing a new state (may be a set of old states, a pair of them, etc...) - typedef ImplementationSpecific1 State; - - /// A type of letter equivalence classes table. - typedef Partition<char, ImplementationSpecific2> LettersTbl; - - /// A container used for storing map of states to thier indices. +#include "partition.h" + +namespace Pire { + namespace Impl { + + /** + * An interface of a determination task. + * You don't have to derive from this class; it is just a start point template. + */ + class DetermineTask { + private: + struct ImplementationSpecific1; + struct ImplementationSpecific2; + + public: + /// A type representing a new state (may be a set of old states, a pair of them, etc...) + typedef ImplementationSpecific1 State; + + /// A type of letter equivalence classes table. + typedef Partition<char, ImplementationSpecific2> LettersTbl; + + /// A container used for storing map of states to thier indices. typedef TMap<State, size_t> InvStates; - - /// Should return used letters' partition. - const LettersTbl& Letters() const; - - /// Should return initial state (surprise!) - State Initial() const; - - /// Should calculate next state, given the current state and a letter. - State Next(State state, Char letter) const; - - /// Should return true iff the state need to be processed. - bool IsRequired(const State& /*state*/) const { return true; } - - /// Called when the set of new states is closed. + + /// Should return used letters' partition. + const LettersTbl& Letters() const; + + /// Should return initial state (surprise!) + State Initial() const; + + /// Should calculate next state, given the current state and a letter. + State Next(State state, Char letter) const; + + /// Should return true iff the state need to be processed. + bool IsRequired(const State& /*state*/) const { return true; } + + /// Called when the set of new states is closed. void AcceptStates(const TVector<State>& newstates); - - /// Called for each transition from one new state to another. - void Connect(size_t from, size_t to, Char letter); - - typedef bool Result; - Result Success() { return true; } - Result Failure() { return false; } - }; - - /** - * A helper function for FSM determining and all determine-like algorithms - * like scanners' agglutination. - * - * Given an indirectly specified automaton (through Task::Initial() and Task::Next() - * functions, see above), performs a breadth-first traversal, finding and enumerating - * all effectively reachable states. Then passes all found states and transitions - * between them back to the task. - * - * Initial state is always placed at zero position. - * - * Please note that the function does not take care of any payload (including final flags); - * it is the task's responsibility to agglutinate them properly. - * - * Returns task.Succeed() if everything was done; task.Failure() if maximum limit of state count was reached. - */ - template<class Task> - typename Task::Result Determine(Task& task, size_t maxSize) - { - typedef typename Task::State State; - typedef typename Task::InvStates InvStates; + + /// Called for each transition from one new state to another. + void Connect(size_t from, size_t to, Char letter); + + typedef bool Result; + Result Success() { return true; } + Result Failure() { return false; } + }; + + /** + * A helper function for FSM determining and all determine-like algorithms + * like scanners' agglutination. + * + * Given an indirectly specified automaton (through Task::Initial() and Task::Next() + * functions, see above), performs a breadth-first traversal, finding and enumerating + * all effectively reachable states. Then passes all found states and transitions + * between them back to the task. + * + * Initial state is always placed at zero position. + * + * Please note that the function does not take care of any payload (including final flags); + * it is the task's responsibility to agglutinate them properly. + * + * Returns task.Succeed() if everything was done; task.Failure() if maximum limit of state count was reached. + */ + template<class Task> + typename Task::Result Determine(Task& task, size_t maxSize) + { + typedef typename Task::State State; + typedef typename Task::InvStates InvStates; typedef TDeque< TVector<size_t> > TransitionTable; - + TVector<State> states; - InvStates invstates; - TransitionTable transitions; + InvStates invstates; + TransitionTable transitions; TVector<size_t> stateIndices; - - states.push_back(task.Initial()); - invstates.insert(typename InvStates::value_type(states[0], 0)); - - for (size_t stateIdx = 0; stateIdx < states.size(); ++stateIdx) { - if (!task.IsRequired(states[stateIdx])) - continue; - TransitionTable::value_type row(task.Letters().Size()); + + states.push_back(task.Initial()); + invstates.insert(typename InvStates::value_type(states[0], 0)); + + for (size_t stateIdx = 0; stateIdx < states.size(); ++stateIdx) { + if (!task.IsRequired(states[stateIdx])) + continue; + TransitionTable::value_type row(task.Letters().Size()); for (auto&& letter : task.Letters()) { State newState = task.Next(states[stateIdx], letter.first); auto i = invstates.find(newState); - if (i == invstates.end()) { - if (!maxSize--) - return task.Failure(); - i = invstates.insert(typename InvStates::value_type(newState, states.size())).first; - states.push_back(newState); - } + if (i == invstates.end()) { + if (!maxSize--) + return task.Failure(); + i = invstates.insert(typename InvStates::value_type(newState, states.size())).first; + states.push_back(newState); + } row[letter.second.first] = i->second; - } - transitions.push_back(row); - stateIndices.push_back(stateIdx); - } - + } + transitions.push_back(row); + stateIndices.push_back(stateIdx); + } + TVector<Char> invletters(task.Letters().Size()); for (auto&& letter : task.Letters()) invletters[letter.second.first] = letter.first; - - task.AcceptStates(states); - size_t from = 0; - for (TransitionTable::iterator i = transitions.begin(), ie = transitions.end(); i != ie; ++i, ++from) { + + task.AcceptStates(states); + size_t from = 0; + for (TransitionTable::iterator i = transitions.begin(), ie = transitions.end(); i != ie; ++i, ++from) { TVector<Char>::iterator l = invletters.begin(); - for (TransitionTable::value_type::iterator j = i->begin(), je = i->end(); j != je; ++j, ++l) - task.Connect(stateIndices[from], *j, *l); - } - return task.Success(); - } + for (TransitionTable::value_type::iterator j = i->begin(), je = i->end(); j != je; ++j, ++l) + task.Connect(stateIndices[from], *j, *l); + } + return task.Success(); + } // Faster transition table representation for determined FSM typedef TVector<size_t> DeterminedTransitions; - } -} - -#endif + } +} + +#endif diff --git a/contrib/libs/pire/pire/easy.cpp b/contrib/libs/pire/pire/easy.cpp index bcb56c693b..61e4384fab 100644 --- a/contrib/libs/pire/pire/easy.cpp +++ b/contrib/libs/pire/pire/easy.cpp @@ -1,33 +1,33 @@ -/* - * easy.cpp -- static variables for Pire Easy facilities. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * easy.cpp -- static variables for Pire Easy facilities. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#include "easy.h" - -namespace Pire { - -const Option<const Encoding&> UTF8(&Pire::Encodings::Utf8); -const Option<const Encoding&> LATIN1(&Pire::Encodings::Latin1); - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#include "easy.h" + +namespace Pire { + +const Option<const Encoding&> UTF8(&Pire::Encodings::Utf8); +const Option<const Encoding&> LATIN1(&Pire::Encodings::Latin1); + const Option<Feature::Ptr> I(&Pire::Features::CaseInsensitive); const Option<Feature::Ptr> ANDNOT(&Pire::Features::AndNotSupport); - -} + +} diff --git a/contrib/libs/pire/pire/easy.h b/contrib/libs/pire/pire/easy.h index c70e965353..a784252c5f 100644 --- a/contrib/libs/pire/pire/easy.h +++ b/contrib/libs/pire/pire/easy.h @@ -1,249 +1,249 @@ -/* - * easy.h -- Pire Easy facilities. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * easy.h -- Pire Easy facilities. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -/** - * For those who never reads documentation, does not need any mysterious features - * there is a fast and easy way to start using Pire. - * - * Just type: - * - * Pire::Regexp sc("pattern of (my regexp)*", Pire::UTF8 | Pire::I); - * if (sc.Matches("pattern of my regexp")) - * std::cout << "Hooray!" << std::endl; - * - * Or, to go more crazy: - * - * if ("pattern of my regexp" ==~ sc) - * std::cout << "What a perversion..." << std::endl; - * - * Scanner's constructor takes a pattern and a "bitwise ORed" combination of "flags". - * Available "flags" are: - * I - case insensitivity; - * ANDNOT - support for additional operations (& and ~) inside the pattern; - * UTF8 - treat pattern input sequence as UTF-8 (surprise!) - * LATIN1 - guess what? - * - * (In fact, those are not "flags" and not "bitwise ORed". See code for details.) - */ - -#ifndef PIRE_EASY_H_INCLUDED -#define PIRE_EASY_H_INCLUDED - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +/** + * For those who never reads documentation, does not need any mysterious features + * there is a fast and easy way to start using Pire. + * + * Just type: + * + * Pire::Regexp sc("pattern of (my regexp)*", Pire::UTF8 | Pire::I); + * if (sc.Matches("pattern of my regexp")) + * std::cout << "Hooray!" << std::endl; + * + * Or, to go more crazy: + * + * if ("pattern of my regexp" ==~ sc) + * std::cout << "What a perversion..." << std::endl; + * + * Scanner's constructor takes a pattern and a "bitwise ORed" combination of "flags". + * Available "flags" are: + * I - case insensitivity; + * ANDNOT - support for additional operations (& and ~) inside the pattern; + * UTF8 - treat pattern input sequence as UTF-8 (surprise!) + * LATIN1 - guess what? + * + * (In fact, those are not "flags" and not "bitwise ORed". See code for details.) + */ + +#ifndef PIRE_EASY_H_INCLUDED +#define PIRE_EASY_H_INCLUDED + #include <iterator> #include <contrib/libs/pire/pire/stub/stl.h> -#include "pire.h" -#include "vbitset.h" - -namespace Pire { - -template<class Arg> class Option; - -class Options { -public: - Options(): m_encoding(&Pire::Encodings::Latin1()) {} - ~Options() { Clear(); } - - void Add(const Pire::Encoding& encoding) { m_encoding = &encoding; } +#include "pire.h" +#include "vbitset.h" + +namespace Pire { + +template<class Arg> class Option; + +class Options { +public: + Options(): m_encoding(&Pire::Encodings::Latin1()) {} + ~Options() { Clear(); } + + void Add(const Pire::Encoding& encoding) { m_encoding = &encoding; } void Add(Feature::Ptr&& feature) { m_features.push_back(std::move(feature)); } - - struct Proxy { - Options* const o; - /*implicit*/ Proxy(Options* opts): o(opts) {} - }; - operator Proxy() { return Proxy(this); } - - Options(Options& o): m_encoding(o.m_encoding) { m_features.swap(o.m_features); } + + struct Proxy { + Options* const o; + /*implicit*/ Proxy(Options* opts): o(opts) {} + }; + operator Proxy() { return Proxy(this); } + + Options(Options& o): m_encoding(o.m_encoding) { m_features.swap(o.m_features); } Options& operator = (Options& o) { m_encoding = o.m_encoding; m_features = std::move(o.m_features); o.Clear(); return *this; } - - Options(Proxy p): m_encoding(p.o->m_encoding) { m_features.swap(p.o->m_features); } + + Options(Proxy p): m_encoding(p.o->m_encoding) { m_features.swap(p.o->m_features); } Options& operator = (Proxy p) { m_encoding = p.o->m_encoding; m_features = std::move(p.o->m_features); p.o->Clear(); return *this; } - - void Apply(Lexer& lexer) - { - lexer.SetEncoding(*m_encoding); + + void Apply(Lexer& lexer) + { + lexer.SetEncoding(*m_encoding); for (auto&& i : m_features) { lexer.AddFeature(i); i = 0; - } - m_features.clear(); - } - - template<class ArgT> - /*implicit*/ Options(const Option<ArgT>& option); - - const Pire::Encoding& Encoding() const { return *m_encoding; } - -private: - const Pire::Encoding* m_encoding; + } + m_features.clear(); + } + + template<class ArgT> + /*implicit*/ Options(const Option<ArgT>& option); + + const Pire::Encoding& Encoding() const { return *m_encoding; } + +private: + const Pire::Encoding* m_encoding; TVector<Feature::Ptr> m_features; - - void Clear() - { - m_features.clear(); - } -}; - -template<class Arg> -class Option { -public: - typedef Arg (*Ctor)(); - - Option(Ctor ctor): m_ctor(ctor) {} - - friend Options operator | (Options::Proxy options, const Option<Arg>& self) - { - Options ret(options); - ret.Add((*self.m_ctor)()); - return ret; - } - - template<class Arg2> - friend Options operator | (const Option<Arg2>& a, const Option<Arg>& b) - { - return Options() | a | b; - } - -private: - Ctor m_ctor; -}; - - -extern const Option<const Encoding&> UTF8; -extern const Option<const Encoding&> LATIN1; - + + void Clear() + { + m_features.clear(); + } +}; + +template<class Arg> +class Option { +public: + typedef Arg (*Ctor)(); + + Option(Ctor ctor): m_ctor(ctor) {} + + friend Options operator | (Options::Proxy options, const Option<Arg>& self) + { + Options ret(options); + ret.Add((*self.m_ctor)()); + return ret; + } + + template<class Arg2> + friend Options operator | (const Option<Arg2>& a, const Option<Arg>& b) + { + return Options() | a | b; + } + +private: + Ctor m_ctor; +}; + + +extern const Option<const Encoding&> UTF8; +extern const Option<const Encoding&> LATIN1; + extern const Option<Feature::Ptr> I; extern const Option<Feature::Ptr> ANDNOT; - - -class Regexp { -public: - template<class Pattern> - explicit Regexp(Pattern pattern, Options options = Options()) - { - Init(PatternBounds(pattern), options); - } - - template<class Pattern, class Arg> - Regexp(Pattern pattern, Option<Arg> option) - { - Init(PatternBounds(pattern), Options() | option); - } - - explicit Regexp(Scanner sc): m_scanner(sc) {} - explicit Regexp(SlowScanner ssc): m_slow(ssc) {} - + + +class Regexp { +public: + template<class Pattern> + explicit Regexp(Pattern pattern, Options options = Options()) + { + Init(PatternBounds(pattern), options); + } + + template<class Pattern, class Arg> + Regexp(Pattern pattern, Option<Arg> option) + { + Init(PatternBounds(pattern), Options() | option); + } + + explicit Regexp(Scanner sc): m_scanner(sc) {} + explicit Regexp(SlowScanner ssc): m_slow(ssc) {} + bool Matches(TStringBuf buf) const - { - if (!m_scanner.Empty()) + { + if (!m_scanner.Empty()) return Runner(m_scanner).Begin().Run(buf).End(); - else + else return Runner(m_slow).Begin().Run(buf).End(); - } + } bool Matches(const char* begin, const char* end) const - { + { return Matches(TStringBuf(begin, end)); - } - - /// A helper class allowing '==~' operator for regexps - class MatchProxy { - public: - MatchProxy(const Regexp& re): m_re(&re) {} - friend bool operator == (const char* str, const MatchProxy& re) { return re.m_re->Matches(str); } - friend bool operator == (const ystring& str, const MatchProxy& re) { return re.m_re->Matches(str); } - - private: - const Regexp* m_re; - }; - MatchProxy operator ~() const { return MatchProxy(*this); } - -private: - Scanner m_scanner; - SlowScanner m_slow; - - ypair<const char*, const char*> PatternBounds(const ystring& pattern) - { - static const char c = 0; - return pattern.empty() ? ymake_pair(&c, &c) : ymake_pair(pattern.c_str(), pattern.c_str() + pattern.size()); - } - - ypair<const char*, const char*> PatternBounds(const char* pattern) - { - return ymake_pair(pattern, pattern + strlen(pattern)); - } - - void Init(ypair<const char*, const char*> rawPattern, Options options) - { + } + + /// A helper class allowing '==~' operator for regexps + class MatchProxy { + public: + MatchProxy(const Regexp& re): m_re(&re) {} + friend bool operator == (const char* str, const MatchProxy& re) { return re.m_re->Matches(str); } + friend bool operator == (const ystring& str, const MatchProxy& re) { return re.m_re->Matches(str); } + + private: + const Regexp* m_re; + }; + MatchProxy operator ~() const { return MatchProxy(*this); } + +private: + Scanner m_scanner; + SlowScanner m_slow; + + ypair<const char*, const char*> PatternBounds(const ystring& pattern) + { + static const char c = 0; + return pattern.empty() ? ymake_pair(&c, &c) : ymake_pair(pattern.c_str(), pattern.c_str() + pattern.size()); + } + + ypair<const char*, const char*> PatternBounds(const char* pattern) + { + return ymake_pair(pattern, pattern + strlen(pattern)); + } + + void Init(ypair<const char*, const char*> rawPattern, Options options) + { TVector<wchar32> pattern; - options.Encoding().FromLocal(rawPattern.first, rawPattern.second, std::back_inserter(pattern)); - - Lexer lexer(pattern); - options.Apply(lexer); - Fsm fsm = lexer.Parse(); - - if (!BeginsWithCircumflex(fsm)) - fsm.PrependAnything(); - fsm.AppendAnything(); - - if (fsm.Determine()) - m_scanner = fsm.Compile<Scanner>(); - else - m_slow = fsm.Compile<SlowScanner>(); - } - - static bool BeginsWithCircumflex(const Fsm& fsm) - { - typedef Fsm::StatesSet Set; + options.Encoding().FromLocal(rawPattern.first, rawPattern.second, std::back_inserter(pattern)); + + Lexer lexer(pattern); + options.Apply(lexer); + Fsm fsm = lexer.Parse(); + + if (!BeginsWithCircumflex(fsm)) + fsm.PrependAnything(); + fsm.AppendAnything(); + + if (fsm.Determine()) + m_scanner = fsm.Compile<Scanner>(); + else + m_slow = fsm.Compile<SlowScanner>(); + } + + static bool BeginsWithCircumflex(const Fsm& fsm) + { + typedef Fsm::StatesSet Set; TDeque<size_t> queue; - BitSet handled(fsm.Size()); - - queue.push_back(fsm.Initial()); - handled.Set(fsm.Initial()); - - while (!queue.empty()) { - Set s = fsm.Destinations(queue.front(), SpecialChar::Epsilon); + BitSet handled(fsm.Size()); + + queue.push_back(fsm.Initial()); + handled.Set(fsm.Initial()); + + while (!queue.empty()) { + Set s = fsm.Destinations(queue.front(), SpecialChar::Epsilon); for (auto&& i : s) { if (!handled.Test(i)) { handled.Set(i); queue.push_back(i); - } - } - + } + } + TSet<Char> lets = fsm.OutgoingLetters(queue.front()); - lets.erase(SpecialChar::Epsilon); - lets.erase(SpecialChar::BeginMark); - if (!lets.empty()) - return false; - - queue.pop_front(); - } - - return true; - } -}; - -}; - -#endif + lets.erase(SpecialChar::Epsilon); + lets.erase(SpecialChar::BeginMark); + if (!lets.empty()) + return false; + + queue.pop_front(); + } + + return true; + } +}; + +}; + +#endif diff --git a/contrib/libs/pire/pire/encoding.cpp b/contrib/libs/pire/pire/encoding.cpp index 842e2b534d..37ea1225bb 100644 --- a/contrib/libs/pire/pire/encoding.cpp +++ b/contrib/libs/pire/pire/encoding.cpp @@ -1,134 +1,134 @@ -/* - * encoding.cpp -- implementation of the encodings shipped with Pire. +/* + * encoding.cpp -- implementation of the encodings shipped with Pire. + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <stdexcept> + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <stdexcept> #include <util/charset/utf8.h> -#include <utility> +#include <utility> #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/stub/utf8.h> #include <contrib/libs/pire/pire/stub/singleton.h> -#include "encoding.h" -#include "fsm.h" - - -namespace Pire { - -namespace { - - class Latin1: public Encoding { - public: +#include "encoding.h" +#include "fsm.h" + + +namespace Pire { + +namespace { + + class Latin1: public Encoding { + public: Latin1() : Encoding() {} - wchar32 FromLocal(const char*& begin, const char* end) const - { - if (begin == end) - throw Error("EOF reached in Pire::Latin1::fromLocal()"); - else if (static_cast<unsigned char>(*begin) >= 0x80) - throw Error("Pire::Latin1::fromLocal(): wrong character encountered (>=0x80)"); - else - return (wchar32) *begin++; - } - - ystring ToLocal(wchar32 ch) const - { - if (ch < 0x80) - return ystring(1, (char) ch); - else - return ystring(); - } - - void AppendDot(Fsm& fsm) const { fsm.AppendDot(); } - }; - - namespace UtfRanges { - - static const size_t MaxLen = 4; + wchar32 FromLocal(const char*& begin, const char* end) const + { + if (begin == end) + throw Error("EOF reached in Pire::Latin1::fromLocal()"); + else if (static_cast<unsigned char>(*begin) >= 0x80) + throw Error("Pire::Latin1::fromLocal(): wrong character encountered (>=0x80)"); + else + return (wchar32) *begin++; + } + + ystring ToLocal(wchar32 ch) const + { + if (ch < 0x80) + return ystring(1, (char) ch); + else + return ystring(); + } + + void AppendDot(Fsm& fsm) const { fsm.AppendDot(); } + }; + + namespace UtfRanges { + + static const size_t MaxLen = 4; static const size_t First[MaxLen][2] = { {0x00, 0x80}, {0xC0, 0xE0}, {0xE0, 0xF0}, {0xF0, 0xF8} - }; + }; static const size_t Next[2] = {0x80, 0xC0}; - } + } + - - class Utf8: public Encoding { - public: + class Utf8: public Encoding { + public: Utf8() : Encoding() {} - wchar32 FromLocal(const char*& begin, const char* end) const - { - wchar32 rune; - size_t len; + wchar32 FromLocal(const char*& begin, const char* end) const + { + wchar32 rune; + size_t len; if (SafeReadUTF8Char(rune, len, reinterpret_cast<const unsigned char*>(begin), reinterpret_cast<const unsigned char*>(end)) != RECODE_OK) - throw Error("Error reading UTF8 sequence"); - begin += len; - return rune; - } - - ystring ToLocal(wchar32 c) const - { + throw Error("Error reading UTF8 sequence"); + begin += len; + return rune; + } + + ystring ToLocal(wchar32 c) const + { ystring ret(UTF8RuneLenByUCS(c), ' '); - size_t len; - unsigned char* p = (unsigned char*) &*ret.begin(); + size_t len; + unsigned char* p = (unsigned char*) &*ret.begin(); if (SafeWriteUTF8Char(c, len, p, p + ret.size()) != RECODE_OK) Y_ASSERT(!"Pire::UTF8::toLocal(): Internal error"); - return ret; - } - - void AppendDot(Fsm& fsm) const - { - size_t last = fsm.Resize(fsm.Size() + UtfRanges::MaxLen); - for (size_t i = 0; i < UtfRanges::MaxLen; ++i) + return ret; + } + + void AppendDot(Fsm& fsm) const + { + size_t last = fsm.Resize(fsm.Size() + UtfRanges::MaxLen); + for (size_t i = 0; i < UtfRanges::MaxLen; ++i) for (size_t letter = UtfRanges::First[i][0]; letter < UtfRanges::First[i][1]; ++letter) - fsm.ConnectFinal(fsm.Size() - i - 1, letter); - for (size_t i = 0; i < UtfRanges::MaxLen - 1; ++i) + fsm.ConnectFinal(fsm.Size() - i - 1, letter); + for (size_t i = 0; i < UtfRanges::MaxLen - 1; ++i) for (size_t letter = UtfRanges::Next[0]; letter < UtfRanges::Next[1]; ++letter) - fsm.Connect(last + i, last + i + 1, letter); - fsm.ClearFinal(); - fsm.SetFinal(fsm.Size() - 1, true); - fsm.SetIsDetermined(false); - } - }; -} - -namespace Encodings { - + fsm.Connect(last + i, last + i + 1, letter); + fsm.ClearFinal(); + fsm.SetFinal(fsm.Size() - 1, true); + fsm.SetIsDetermined(false); + } + }; +} + +namespace Encodings { + const Encoding& Utf8() { static const Pire::Utf8 utf8; return utf8; } - + const Encoding& Latin1() { static const Pire::Latin1 latin1; return latin1; } -} - -} +} + +} diff --git a/contrib/libs/pire/pire/encoding.h b/contrib/libs/pire/pire/encoding.h index b2c8bb9b41..b4117afa45 100644 --- a/contrib/libs/pire/pire/encoding.h +++ b/contrib/libs/pire/pire/encoding.h @@ -1,71 +1,71 @@ -/* - * encoding.h -- the interface of Encoding. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * encoding.h -- the interface of Encoding. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_ENCODING_H -#define PIRE_ENCODING_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_ENCODING_H +#define PIRE_ENCODING_H + + #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/stub/stl.h> - -namespace Pire { - -class Fsm; - -class Encoding { -public: - virtual ~Encoding() {} - - /// Should read bytes from @p begin and return the corresponding Unicode - /// character, advancing @p begin. - virtual wchar32 FromLocal(const char*& begin, const char* end) const = 0; - - /// Opposite to FromLocal(), transforms given Unicode character into - /// the string in the encoding. - virtual ystring ToLocal(wchar32 c) const = 0; - - /// Given the FSM, should append the representation of a dot in the ecoding - /// to that FSM. - virtual void AppendDot(Fsm&) const = 0; - - template<class OutputIter> - OutputIter FromLocal(const char* begin, const char* end, OutputIter iter) const - { - while (begin != end) { - *iter = FromLocal(begin, end); - ++iter; - } - return iter; - } -}; - -namespace Encodings { - const Encoding& Latin1(); - const Encoding& Utf8(); - -}; - - -}; - -#endif + +namespace Pire { + +class Fsm; + +class Encoding { +public: + virtual ~Encoding() {} + + /// Should read bytes from @p begin and return the corresponding Unicode + /// character, advancing @p begin. + virtual wchar32 FromLocal(const char*& begin, const char* end) const = 0; + + /// Opposite to FromLocal(), transforms given Unicode character into + /// the string in the encoding. + virtual ystring ToLocal(wchar32 c) const = 0; + + /// Given the FSM, should append the representation of a dot in the ecoding + /// to that FSM. + virtual void AppendDot(Fsm&) const = 0; + + template<class OutputIter> + OutputIter FromLocal(const char* begin, const char* end, OutputIter iter) const + { + while (begin != end) { + *iter = FromLocal(begin, end); + ++iter; + } + return iter; + } +}; + +namespace Encodings { + const Encoding& Latin1(); + const Encoding& Utf8(); + +}; + + +}; + +#endif diff --git a/contrib/libs/pire/pire/extra.h b/contrib/libs/pire/pire/extra.h index 2e4358acdd..373607838d 100644 --- a/contrib/libs/pire/pire/extra.h +++ b/contrib/libs/pire/pire/extra.h @@ -1,33 +1,33 @@ -/* - * extra.h -- a single include file, which enables additional features, - * unnecessary for major part of users. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * extra.h -- a single include file, which enables additional features, + * unnecessary for major part of users. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_EXTRA_H -#define PIRE_EXTRA_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_EXTRA_H +#define PIRE_EXTRA_H + + #include <contrib/libs/pire/pire/extra/capture.h> #include <contrib/libs/pire/pire/extra/count.h> #include <contrib/libs/pire/pire/extra/glyphs.h> - -#endif + +#endif diff --git a/contrib/libs/pire/pire/extra/capture.cpp b/contrib/libs/pire/pire/extra/capture.cpp index fb4cdf6d81..ea9e287f00 100644 --- a/contrib/libs/pire/pire/extra/capture.cpp +++ b/contrib/libs/pire/pire/extra/capture.cpp @@ -1,48 +1,48 @@ -/* - * capture.cpp -- a helper for compiling CapturingScanner - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * capture.cpp -- a helper for compiling CapturingScanner * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <stdexcept> - -#include "capture.h" + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <stdexcept> -namespace Pire { - -namespace { - class CaptureImpl: public Feature { - public: - CaptureImpl(size_t pos) - : State(0) - , Pos(pos) - , Level(0) +#include "capture.h" + +namespace Pire { + +namespace { + class CaptureImpl: public Feature { + public: + CaptureImpl(size_t pos) + : State(0) + , Pos(pos) + , Level(0) , StateRepetition(NoRepetition) - {} - + {} + bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; } - Term Lex() - { + Term Lex() + { wchar32 c = GetChar(); if (!Accepts(c)) - Error("How did we get here?!.."); + Error("How did we get here?!.."); if (c != '(') { wchar32 next = PeekChar(); if (next == '?') { @@ -53,13 +53,13 @@ namespace { StateRepetition = GreedyRepetition; } else if (State == 0 && Pos > 1) - --Pos; - else if (State == 0 && Pos == 1) { - State = 1; - Level = 0; - } else if (State == 1) { - ++Level; - } + --Pos; + else if (State == 0 && Pos == 1) { + State = 1; + Level = 0; + } else if (State == 1) { + ++Level; + } if (c == '(') return Term(TokenTypes::Open); else if (c == '+') @@ -72,24 +72,24 @@ namespace { UngetChar(c); return Term(0); } - } - - void Parenthesized(Fsm& fsm) - { + } + + void Parenthesized(Fsm& fsm) + { if (StateRepetition != NoRepetition) { bool greedy = (StateRepetition == GreedyRepetition); SetRepetitionMark(fsm, greedy); StateRepetition = NoRepetition; } else if (State == 1 && Level == 0) { - SetCaptureMark(fsm); - State = 2; - } else if (State == 1 && Level > 0) - --Level; - } - private: - unsigned State; - size_t Pos; - size_t Level; + SetCaptureMark(fsm); + State = 2; + } else if (State == 1 && Level > 0) + --Level; + } + private: + unsigned State; + size_t Pos; + size_t Level; RepetitionTypes StateRepetition; void SetRepetitionMark(Fsm& fsm, bool greedy) @@ -108,28 +108,28 @@ namespace { fsm.SetIsDetermined(false); } - void SetCaptureMark(Fsm& fsm) - { - fsm.Resize(fsm.Size() + 2); - fsm.Connect(fsm.Size() - 2, fsm.Initial()); - fsm.ConnectFinal(fsm.Size() - 1); - - fsm.SetOutput(fsm.Size() - 2, fsm.Initial(), CapturingScanner::BeginCapture); - for (size_t state = 0; state < fsm.Size() - 2; ++state) - if (fsm.IsFinal(state)) - fsm.SetOutput(state, fsm.Size() - 1, CapturingScanner::EndCapture); - - fsm.SetInitial(fsm.Size() - 2); - fsm.ClearFinal(); - fsm.SetFinal(fsm.Size() - 1, true); - fsm.SetIsDetermined(false); - } - - void FinishBuild() {} - }; -} - -namespace Features { + void SetCaptureMark(Fsm& fsm) + { + fsm.Resize(fsm.Size() + 2); + fsm.Connect(fsm.Size() - 2, fsm.Initial()); + fsm.ConnectFinal(fsm.Size() - 1); + + fsm.SetOutput(fsm.Size() - 2, fsm.Initial(), CapturingScanner::BeginCapture); + for (size_t state = 0; state < fsm.Size() - 2; ++state) + if (fsm.IsFinal(state)) + fsm.SetOutput(state, fsm.Size() - 1, CapturingScanner::EndCapture); + + fsm.SetInitial(fsm.Size() - 2); + fsm.ClearFinal(); + fsm.SetFinal(fsm.Size() - 1, true); + fsm.SetIsDetermined(false); + } + + void FinishBuild() {} + }; +} + +namespace Features { Feature::Ptr Capture(size_t pos) { return Feature::Ptr(new CaptureImpl(pos)); } -}; -} +}; +} diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h index 8399914a67..1c7ada9b56 100644 --- a/contrib/libs/pire/pire/extra/capture.h +++ b/contrib/libs/pire/pire/extra/capture.h @@ -1,30 +1,30 @@ -/* - * capture.h -- definition of CapturingScanner - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * capture.h -- definition of CapturingScanner * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_EXTRA_CAPTURE_H -#define PIRE_EXTRA_CAPTURE_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_EXTRA_CAPTURE_H +#define PIRE_EXTRA_CAPTURE_H + + #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/scanners/loaded.h> #include <contrib/libs/pire/pire/scanners/multi.h> @@ -32,77 +32,77 @@ #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/re_lexer.h> #include <contrib/libs/pire/pire/run.h> - + #include <array> -namespace Pire { - -/** -* A capturing scanner. -* Requires source FSM to be deterministic, matches input string -* against a single regexp (taking O(strlen(str)) time) and -* captures a substring between a single pair of parentheses. -* -* Requires regexp pattern to satisfy certain conditions -* (I still do not know exactly what they are :) ) -*/ -class CapturingScanner: public LoadedScanner { -public: - enum { - NoAction = 0, - BeginCapture = 1, - EndCapture = 2, - - FinalFlag = 1 - }; - - class State { - public: - bool Captured() const { return (m_begin != npos) && (m_end != npos); } - size_t Begin() const { return m_begin; } - size_t End() const { return m_end; } - private: - static const size_t npos = static_cast<size_t>(-1); - size_t m_state; - size_t m_begin; - size_t m_end; - size_t m_counter; - friend class CapturingScanner; - -#ifdef PIRE_DEBUG - friend yostream& operator << (yostream& s, const State& state) - { - s << state.m_state; - if (state.m_begin != State::npos || state.m_end != npos) { - s << " ["; - if (state.m_begin != State::npos) - s << 'b'; - if (state.m_end != State::npos) - s << 'e'; - s << "]"; - } - return s; - } -#endif - }; - - void Initialize(State& state) const - { - state.m_state = m.initial; - state.m_begin = state.m_end = State::npos; - state.m_counter = 0; - } - - void TakeAction(State& s, Action a) const - { +namespace Pire { + +/** +* A capturing scanner. +* Requires source FSM to be deterministic, matches input string +* against a single regexp (taking O(strlen(str)) time) and +* captures a substring between a single pair of parentheses. +* +* Requires regexp pattern to satisfy certain conditions +* (I still do not know exactly what they are :) ) +*/ +class CapturingScanner: public LoadedScanner { +public: + enum { + NoAction = 0, + BeginCapture = 1, + EndCapture = 2, + + FinalFlag = 1 + }; + + class State { + public: + bool Captured() const { return (m_begin != npos) && (m_end != npos); } + size_t Begin() const { return m_begin; } + size_t End() const { return m_end; } + private: + static const size_t npos = static_cast<size_t>(-1); + size_t m_state; + size_t m_begin; + size_t m_end; + size_t m_counter; + friend class CapturingScanner; + +#ifdef PIRE_DEBUG + friend yostream& operator << (yostream& s, const State& state) + { + s << state.m_state; + if (state.m_begin != State::npos || state.m_end != npos) { + s << " ["; + if (state.m_begin != State::npos) + s << 'b'; + if (state.m_end != State::npos) + s << 'e'; + s << "]"; + } + return s; + } +#endif + }; + + void Initialize(State& state) const + { + state.m_state = m.initial; + state.m_begin = state.m_end = State::npos; + state.m_counter = 0; + } + + void TakeAction(State& s, Action a) const + { if ((a & BeginCapture) && !s.Captured()) s.m_begin = s.m_counter - 1; else if (a & EndCapture) { if (s.m_end == State::npos) s.m_end = s.m_counter - 1; } - } - + } + Char Translate(Char ch) const { return m_letters[static_cast<size_t>(ch)]; @@ -117,47 +117,47 @@ public: return x.action; } - Action Next(State& s, Char c) const - { + Action Next(State& s, Char c) const + { return NextTranslated(s, Translate(c)); - } - - Action Next(const State& current, State& n, Char c) const - { - n = current; - return Next(n, c); - } - - bool CanStop(const State& s) const - { - return Final(s); - } - - bool Final(const State& s) const { return m_tags[(reinterpret_cast<Transition*>(s.m_state) - m_jumps) / m.lettersCount] & FinalFlag; } - - bool Dead(const State&) const { return false; } - - CapturingScanner() {} - CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {} + } + + Action Next(const State& current, State& n, Char c) const + { + n = current; + return Next(n, c); + } + + bool CanStop(const State& s) const + { + return Final(s); + } + + bool Final(const State& s) const { return m_tags[(reinterpret_cast<Transition*>(s.m_state) - m_jumps) / m.lettersCount] & FinalFlag; } + + bool Dead(const State&) const { return false; } + + CapturingScanner() {} + CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {} explicit CapturingScanner(Fsm& fsm, size_t distance = 0) - { + { if (distance) { fsm = CreateApproxFsm(fsm, distance); } - fsm.Canonize(); - Init(fsm.Size(), fsm.Letters(), fsm.Initial()); - BuildScanner(fsm, *this); - } - - void Swap(CapturingScanner& s) { LoadedScanner::Swap(s); } - CapturingScanner& operator = (const CapturingScanner& s) { CapturingScanner(s).Swap(*this); return *this; } - - size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } - -private: - - friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&); -}; + fsm.Canonize(); + Init(fsm.Size(), fsm.Letters(), fsm.Initial()); + BuildScanner(fsm, *this); + } + + void Swap(CapturingScanner& s) { LoadedScanner::Swap(s); } + CapturingScanner& operator = (const CapturingScanner& s) { CapturingScanner(s).Swap(*this); return *this; } + + size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } + +private: + + friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&); +}; enum RepetitionTypes { // They are sorted by their priorities NonGreedyRepetition, @@ -582,11 +582,11 @@ public: } }; -namespace Features { +namespace Features { Feature::Ptr Capture(size_t pos); -} - -} - - -#endif +} + +} + + +#endif diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp index 468ff61d92..f79dba506c 100644 --- a/contrib/libs/pire/pire/extra/count.cpp +++ b/contrib/libs/pire/pire/extra/count.cpp @@ -1,26 +1,26 @@ -/* - * count.cpp -- CountingScanner compiling routine +/* + * count.cpp -- CountingScanner compiling routine + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + #include "count.h" #include <contrib/libs/pire/pire/fsm.h> @@ -31,8 +31,8 @@ #include <contrib/libs/pire/pire/stub/stl.h> #include <tuple> - -namespace Pire { + +namespace Pire { namespace Impl { @@ -740,103 +740,103 @@ void CountingFsm::SwapTaskOutputs(CountingFsmTask& task) { } -namespace { - Pire::Fsm FsmForDot() { Pire::Fsm f; f.AppendDot(); return f; } - Pire::Fsm FsmForChar(Pire::Char c) { Pire::Fsm f; f.AppendSpecial(c); return f; } -} - -CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep) -{ - Fsm res = re; - res.Surround(); - Fsm sep_re = ((sep & ~res) /* | Fsm()*/) + re; - sep_re.Determine(); - - Fsm dup = sep_re; - for (size_t i = 0; i < dup.Size(); ++i) - dup.SetTag(i, Matched); - size_t oldsize = sep_re.Size(); - sep_re.Import(dup); - for (Fsm::FinalTable::const_iterator i = sep_re.Finals().begin(), ie = sep_re.Finals().end(); i != ie; ++i) - if (*i < oldsize) - sep_re.Connect(*i, oldsize + *i); - - sep_re |= (FsmForDot() | FsmForChar(Pire::BeginMark) | FsmForChar(Pire::EndMark)); - - // Make a full Cartesian product of two sep_res - sep_re.Determine(); - sep_re.Unsparse(); +namespace { + Pire::Fsm FsmForDot() { Pire::Fsm f; f.AppendDot(); return f; } + Pire::Fsm FsmForChar(Pire::Char c) { Pire::Fsm f; f.AppendSpecial(c); return f; } +} + +CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep) +{ + Fsm res = re; + res.Surround(); + Fsm sep_re = ((sep & ~res) /* | Fsm()*/) + re; + sep_re.Determine(); + + Fsm dup = sep_re; + for (size_t i = 0; i < dup.Size(); ++i) + dup.SetTag(i, Matched); + size_t oldsize = sep_re.Size(); + sep_re.Import(dup); + for (Fsm::FinalTable::const_iterator i = sep_re.Finals().begin(), ie = sep_re.Finals().end(); i != ie; ++i) + if (*i < oldsize) + sep_re.Connect(*i, oldsize + *i); + + sep_re |= (FsmForDot() | FsmForChar(Pire::BeginMark) | FsmForChar(Pire::EndMark)); + + // Make a full Cartesian product of two sep_res + sep_re.Determine(); + sep_re.Unsparse(); TSet<size_t> dead = sep_re.DeadStates(); - - PIRE_IFDEBUG(Cdbg << "=== Original FSM ===" << Endl << sep_re << ">>> " << sep_re.Size() << " states, dead: [" << Join(dead.begin(), dead.end(), ", ") << "]" << Endl); - - Fsm sq; - - typedef ypair<size_t, size_t> NewState; + + PIRE_IFDEBUG(Cdbg << "=== Original FSM ===" << Endl << sep_re << ">>> " << sep_re.Size() << " states, dead: [" << Join(dead.begin(), dead.end(), ", ") << "]" << Endl); + + Fsm sq; + + typedef ypair<size_t, size_t> NewState; TVector<NewState> states; TMap<NewState, size_t> invstates; - - states.push_back(NewState(sep_re.Initial(), sep_re.Initial())); - invstates.insert(ymake_pair(states.back(), states.size() - 1)); - - // TODO: this loop reminds me a general determination task... - for (size_t curstate = 0; curstate < states.size(); ++curstate) { - - unsigned long tag = sep_re.Tag(states[curstate].first); - if (tag) - sq.SetTag(curstate, tag); - sq.SetFinal(curstate, sep_re.IsFinal(states[curstate].first)); - - PIRE_IFDEBUG(Cdbg << "State " << curstate << " = (" << states[curstate].first << ", " << states[curstate].second << ")" << Endl); - for (Fsm::LettersTbl::ConstIterator lit = sep_re.Letters().Begin(), lie = sep_re.Letters().End(); lit != lie; ++lit) { - - Char letter = lit->first; - - const Fsm::StatesSet& mr = sep_re.Destinations(states[curstate].first, letter); - const Fsm::StatesSet& br = sep_re.Destinations(states[curstate].second, letter); - - if (mr.size() != 1) + + states.push_back(NewState(sep_re.Initial(), sep_re.Initial())); + invstates.insert(ymake_pair(states.back(), states.size() - 1)); + + // TODO: this loop reminds me a general determination task... + for (size_t curstate = 0; curstate < states.size(); ++curstate) { + + unsigned long tag = sep_re.Tag(states[curstate].first); + if (tag) + sq.SetTag(curstate, tag); + sq.SetFinal(curstate, sep_re.IsFinal(states[curstate].first)); + + PIRE_IFDEBUG(Cdbg << "State " << curstate << " = (" << states[curstate].first << ", " << states[curstate].second << ")" << Endl); + for (Fsm::LettersTbl::ConstIterator lit = sep_re.Letters().Begin(), lie = sep_re.Letters().End(); lit != lie; ++lit) { + + Char letter = lit->first; + + const Fsm::StatesSet& mr = sep_re.Destinations(states[curstate].first, letter); + const Fsm::StatesSet& br = sep_re.Destinations(states[curstate].second, letter); + + if (mr.size() != 1) Y_ASSERT(!"Wrong transition size for main"); - if (br.size() != 1) + if (br.size() != 1) Y_ASSERT(!"Wrong transition size for backup"); - - NewState ns(*mr.begin(), *br.begin()); + + NewState ns(*mr.begin(), *br.begin()); PIRE_IFDEBUG(NewState savedNs = ns); - unsigned long outputs = 0; - - PIRE_IFDEBUG(ystring dbgout); - if (dead.find(ns.first) != dead.end()) { - PIRE_IFDEBUG(dbgout = ((sep_re.Tag(ns.first) & Matched) ? ", ++cur" : ", max <- cur")); - outputs = DeadFlag | (sep_re.Tag(ns.first) & Matched); - ns.first = ns.second; - } - if (sep_re.IsFinal(ns.first) || (sep_re.IsFinal(ns.second) && !(sep_re.Tag(ns.first) & Matched))) - ns.second = sep_re.Initial(); - - PIRE_IFDEBUG(if (ns != savedNs) Cdbg << "Diverted transition to (" << savedNs.first << ", " << savedNs.second << ") on " << (char) letter << " to (" << ns.first << ", " << ns.second << ")" << dbgout << Endl); - + unsigned long outputs = 0; + + PIRE_IFDEBUG(ystring dbgout); + if (dead.find(ns.first) != dead.end()) { + PIRE_IFDEBUG(dbgout = ((sep_re.Tag(ns.first) & Matched) ? ", ++cur" : ", max <- cur")); + outputs = DeadFlag | (sep_re.Tag(ns.first) & Matched); + ns.first = ns.second; + } + if (sep_re.IsFinal(ns.first) || (sep_re.IsFinal(ns.second) && !(sep_re.Tag(ns.first) & Matched))) + ns.second = sep_re.Initial(); + + PIRE_IFDEBUG(if (ns != savedNs) Cdbg << "Diverted transition to (" << savedNs.first << ", " << savedNs.second << ") on " << (char) letter << " to (" << ns.first << ", " << ns.second << ")" << dbgout << Endl); + TMap<NewState, size_t>::iterator nsi = invstates.find(ns); - if (nsi == invstates.end()) { - PIRE_IFDEBUG(Cdbg << "New state " << states.size() << " = (" << ns.first << ", " << ns.second << ")" << Endl); - states.push_back(ns); - nsi = invstates.insert(ymake_pair(states.back(), states.size() - 1)).first; - sq.Resize(states.size()); - } - + if (nsi == invstates.end()) { + PIRE_IFDEBUG(Cdbg << "New state " << states.size() << " = (" << ns.first << ", " << ns.second << ")" << Endl); + states.push_back(ns); + nsi = invstates.insert(ymake_pair(states.back(), states.size() - 1)).first; + sq.Resize(states.size()); + } + for (TVector<Char>::const_iterator li = lit->second.second.begin(), le = lit->second.second.end(); li != le; ++li) - sq.Connect(curstate, nsi->second, *li); - if (outputs) - sq.SetOutput(curstate, nsi->second, outputs); - } - } - - sq.Determine(); - - PIRE_IFDEBUG(Cdbg << "=== FSM ===" << Endl << sq << Endl); - Init(sq.Size(), sq.Letters(), sq.Initial(), 1); - BuildScanner(sq, *this); -} - + sq.Connect(curstate, nsi->second, *li); + if (outputs) + sq.SetOutput(curstate, nsi->second, outputs); + } + } + + sq.Determine(); + + PIRE_IFDEBUG(Cdbg << "=== FSM ===" << Endl << sq << Endl); + Init(sq.Size(), sq.Letters(), sq.Initial(), 1); + BuildScanner(sq, *this); +} + namespace Impl { template <class AdvancedScanner> AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) { @@ -848,7 +848,7 @@ AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* if (simple) { *simple = countingFsm.Simple(); } - + const auto& determined = countingFsm.Determined(); const auto& letters = countingFsm.Letters(); @@ -877,11 +877,11 @@ NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& } -namespace Impl { - +namespace Impl { + template<class Scanner> class CountingScannerGlueTask: public ScannerGlueCommon<Scanner> { -public: +public: using typename ScannerGlueCommon<Scanner>::State; using TAction = typename Scanner::Action; using InternalState = typename Scanner::InternalState; @@ -889,36 +889,36 @@ public: CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs) : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters)) - { - } + { + } void AcceptStates(const TVector<State>& states) - { - States = states; + { + States = states; this->SetSc(THolder<Scanner>(new Scanner)); this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount()); - for (size_t i = 0; i < states.size(); ++i) + for (size_t i = 0; i < states.size(); ++i) this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3)); - } + } - void Connect(size_t from, size_t to, Char letter) - { + void Connect(size_t from, size_t to, Char letter) + { this->Sc().SetJump(from, letter, to, Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount())); - } + } protected: TVector<State> States; TAction Action(const Scanner& sc, InternalState state, Char letter) const - { + { size_t state_index = sc.StateIdx(state); size_t transition_index = sc.TransitionIndex(state_index, letter); const auto& tr = sc.m_jumps[transition_index]; return tr.action; - } -}; - + } +}; + class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> { public: using ActionIndex = NoGlueLimitCountingScanner::ActionIndex; @@ -980,18 +980,18 @@ private: }; -} +} -CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */) -{ +CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */) +{ if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { return CountingScanner(); } static constexpr size_t DefMaxSize = 250000; Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs); - return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); -} - + return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); +} + AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */) { if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { @@ -1000,7 +1000,7 @@ AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScan static constexpr size_t DefMaxSize = 250000; Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); -} +} NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */) { diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h index bd1526b98d..deaa4c2314 100644 --- a/contrib/libs/pire/pire/extra/count.h +++ b/contrib/libs/pire/pire/extra/count.h @@ -1,38 +1,38 @@ -/* - * count.h -- definition of the counting scanner +/* + * count.h -- definition of the counting scanner + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_EXTRA_COUNT_H -#define PIRE_EXTRA_COUNT_H - + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_EXTRA_COUNT_H +#define PIRE_EXTRA_COUNT_H + #include <contrib/libs/pire/pire/scanners/loaded.h> #include <contrib/libs/pire/pire/fsm.h> - + #include <algorithm> -namespace Pire { -class Fsm; +namespace Pire { +class Fsm; -namespace Impl { +namespace Impl { template<class T> class ScannerGlueCommon; @@ -43,8 +43,8 @@ namespace Impl { template <class AdvancedScanner> AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple); -}; - +}; + template<size_t I> class IncrementPerformer { public: @@ -110,38 +110,38 @@ public: } }; -/** - * A scanner which counts occurences of the - * given regexp separated by another regexp - * in input text. - */ +/** + * A scanner which counts occurences of the + * given regexp separated by another regexp + * in input text. + */ template<class DerivedScanner, class State> class BaseCountingScanner: public LoadedScanner { -public: - enum { - IncrementAction = 1, - ResetAction = 2, - - FinalFlag = 0, - DeadFlag = 1, - }; - - void Initialize(State& state) const - { - state.m_state = m.initial; - memset(&state.m_current, 0, sizeof(state.m_current)); - memset(&state.m_total, 0, sizeof(state.m_total)); - state.m_updatedMask = 0; - } - +public: + enum { + IncrementAction = 1, + ResetAction = 2, + + FinalFlag = 0, + DeadFlag = 1, + }; + + void Initialize(State& state) const + { + state.m_state = m.initial; + memset(&state.m_current, 0, sizeof(state.m_current)); + memset(&state.m_total, 0, sizeof(state.m_total)); + state.m_updatedMask = 0; + } + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION void TakeAction(State& s, Action a) const { static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a); } - bool CanStop(const State&) const { return false; } - + bool CanStop(const State&) const { return false; } + Char Translate(Char ch) const { return m_letters[static_cast<size_t>(ch)]; @@ -154,55 +154,55 @@ public: return x.action; } - Action Next(State& s, Char c) const - { + Action Next(State& s, Char c) const + { return NextTranslated(s, Translate(c)); - } - - Action Next(const State& current, State& n, Char c) const - { - n = current; - return Next(n, c); - } - - bool Final(const State& /*state*/) const { return false; } - - bool Dead(const State&) const { return false; } - + } + + Action Next(const State& current, State& n, Char c) const + { + n = current; + return Next(n, c); + } + + bool Final(const State& /*state*/) const { return false; } + + bool Dead(const State&) const { return false; } + using LoadedScanner::Swap; - size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } - + size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } + protected: - using LoadedScanner::Init; + using LoadedScanner::Init; using LoadedScanner::InternalState; - + template<size_t ActualReCount> - void PerformIncrement(State& s, Action mask) const - { - if (mask) { + void PerformIncrement(State& s, Action mask) const + { + if (mask) { IncrementPerformer<ActualReCount>::Do(s, mask); - s.m_updatedMask |= ((size_t)mask) << MAX_RE_COUNT; - } - } - + s.m_updatedMask |= ((size_t)mask) << MAX_RE_COUNT; + } + } + template<size_t ActualReCount> - void PerformReset(State& s, Action mask) const - { - mask &= s.m_updatedMask; - if (mask) { + void PerformReset(State& s, Action mask) const + { + mask &= s.m_updatedMask; + if (mask) { ResetPerformer<ActualReCount>::Do(s, mask); s.m_updatedMask &= (Action)~mask; - } - } - - void Next(InternalState& s, Char c) const - { + } + } + + void Next(InternalState& s, Char c) const + { Transition x = reinterpret_cast<const Transition*>(s)[Translate(c)]; - s += SignExtend(x.shift); - } + s += SignExtend(x.shift); + } }; - + template <size_t MAX_RE_COUNT> class CountingState { public: @@ -258,21 +258,21 @@ public: } private: - Action RemapAction(Action action) - { - if (action == (Matched | DeadFlag)) - return 1; - else if (action == DeadFlag) - return 1 << MAX_RE_COUNT; - else - return 0; - } - - friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&); - friend class Impl::ScannerGlueCommon<CountingScanner>; + Action RemapAction(Action action) + { + if (action == (Matched | DeadFlag)) + return 1; + else if (action == DeadFlag) + return 1 << MAX_RE_COUNT; + else + return 0; + } + + friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&); + friend class Impl::ScannerGlueCommon<CountingScanner>; friend class Impl::CountingScannerGlueTask<CountingScanner>; -}; - +}; + class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { public: using State = CountingState<MAX_RE_COUNT>; @@ -329,10 +329,10 @@ public: ++m_current[regexp_id]; m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]); } - + template<size_t I> friend class IncrementPerformer; - + template<size_t I> friend class ResetPerformer; @@ -352,7 +352,7 @@ private: s << state.m_current[i] << '/' << state.m_total[i] << ' '; return s << ')'; } -#endif +#endif }; diff --git a/contrib/libs/pire/pire/extra/glyphs.cpp b/contrib/libs/pire/pire/extra/glyphs.cpp index a14d2baa56..9bf7d1bd65 100644 --- a/contrib/libs/pire/pire/extra/glyphs.cpp +++ b/contrib/libs/pire/pire/extra/glyphs.cpp @@ -1,144 +1,144 @@ -/* - * glyphs.cpp -- implementation for the GlueSimilarGlyphs feature. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * glyphs.cpp -- implementation for the GlueSimilarGlyphs feature. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <algorithm> -#include <map> -#include <list> -#include <set> -#include <vector> -#include <utility> - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <algorithm> +#include <map> +#include <list> +#include <set> +#include <vector> +#include <utility> + #include <contrib/libs/pire/pire/stub/singleton.h> #include <contrib/libs/pire/pire/stub/noncopyable.h> #include <contrib/libs/pire/pire/stub/utf8.h> #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/re_lexer.h> -namespace Pire { - -namespace { - - /* - * A class providing a function which returns a character - * whose glyph resembles that of given char, if any; - * otherwise returns given char itself. - */ - class GlyphTable { - private: +namespace Pire { + +namespace { + + /* + * A class providing a function which returns a character + * whose glyph resembles that of given char, if any; + * otherwise returns given char itself. + */ + class GlyphTable { + private: TList< TVector<wchar32> > m_classes; TMap<wchar32, TVector<wchar32>*> m_map; - - struct GlyphClass { + + struct GlyphClass { TVector<wchar32>* m_class; TMap<wchar32, TVector<wchar32>*> *m_map; - - GlyphClass& operator << (wchar32 c) - { - m_class->push_back(c); - m_map->insert(ymake_pair(c, m_class)); - return *this; - } - }; - - GlyphClass Class() - { - GlyphClass cl; + + GlyphClass& operator << (wchar32 c) + { + m_class->push_back(c); + m_map->insert(ymake_pair(c, m_class)); + return *this; + } + }; + + GlyphClass Class() + { + GlyphClass cl; m_classes.push_back(TVector<wchar32>()); - cl.m_class = &m_classes.back(); - cl.m_map = &m_map; - return cl; - } - - public: - + cl.m_class = &m_classes.back(); + cl.m_map = &m_map; + return cl; + } + + public: + const TVector<wchar32>& Klass(wchar32 x) const - { + { TMap<wchar32, TVector<wchar32>*>::const_iterator i = m_map.find(x); - if (i != m_map.end()) - return *i->second; - else + if (i != m_map.end()) + return *i->second; + else return DefaultValue< TVector<wchar32> >(); - } - - GlyphTable() - { - Class() << 'A' << 0x0410; - Class() << 'B' << 0x0412; - Class() << 'C' << 0x0421; - Class() << 'E' << 0x0415 << 0x0401; - Class() << 'H' << 0x041D; - Class() << 'K' << 0x041A; - Class() << 'M' << 0x041C; - Class() << 'O' << 0x041E; - Class() << 'P' << 0x0420; - Class() << 'T' << 0x0422; - Class() << 'X' << 0x0425; - - Class() << 'a' << 0x0430; - Class() << 'c' << 0x0441; - Class() << 'e' << 0x0435 << 0x0451; - Class() << 'm' << 0x0442; - Class() << 'o' << 0x043E; - Class() << 'p' << 0x0440; - Class() << 'u' << 0x0438; - Class() << 'x' << 0x0445; - Class() << 'y' << 0x0443; - } - }; - - class GlueSimilarGlyphsImpl: public Feature { - public: - GlueSimilarGlyphsImpl(): m_table(Singleton<GlyphTable>()) {} - int Priority() const { return 9; } - - void Alter(Term& t) - { - if (t.Value().IsA<Term::CharacterRange>()) { - const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>(); - typedef Term::CharacterRange::first_type CharSet; - const CharSet& old = range.first; - CharSet altered; + } + + GlyphTable() + { + Class() << 'A' << 0x0410; + Class() << 'B' << 0x0412; + Class() << 'C' << 0x0421; + Class() << 'E' << 0x0415 << 0x0401; + Class() << 'H' << 0x041D; + Class() << 'K' << 0x041A; + Class() << 'M' << 0x041C; + Class() << 'O' << 0x041E; + Class() << 'P' << 0x0420; + Class() << 'T' << 0x0422; + Class() << 'X' << 0x0425; + + Class() << 'a' << 0x0430; + Class() << 'c' << 0x0441; + Class() << 'e' << 0x0435 << 0x0451; + Class() << 'm' << 0x0442; + Class() << 'o' << 0x043E; + Class() << 'p' << 0x0440; + Class() << 'u' << 0x0438; + Class() << 'x' << 0x0445; + Class() << 'y' << 0x0443; + } + }; + + class GlueSimilarGlyphsImpl: public Feature { + public: + GlueSimilarGlyphsImpl(): m_table(Singleton<GlyphTable>()) {} + int Priority() const { return 9; } + + void Alter(Term& t) + { + if (t.Value().IsA<Term::CharacterRange>()) { + const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>(); + typedef Term::CharacterRange::first_type CharSet; + const CharSet& old = range.first; + CharSet altered; for (auto&& i : old) { const TVector<wchar32>* klass = 0; if (i.size() == 1 && !(klass = &m_table->Klass(i[0]))->empty()) for (auto&& j : *klass) altered.insert(Term::String(1, j)); - else + else altered.insert(i); - } - - t = Term(t.Type(), Term::CharacterRange(altered, range.second)); - } - } - - private: - GlyphTable* m_table; - }; -} - -namespace Features { + } + + t = Term(t.Type(), Term::CharacterRange(altered, range.second)); + } + } + + private: + GlyphTable* m_table; + }; +} + +namespace Features { Feature::Ptr GlueSimilarGlyphs() { return Feature::Ptr(new GlueSimilarGlyphsImpl); } -} - -} - +} + +} + diff --git a/contrib/libs/pire/pire/extra/glyphs.h b/contrib/libs/pire/pire/extra/glyphs.h index 678b9e15c4..07c4276951 100644 --- a/contrib/libs/pire/pire/extra/glyphs.h +++ b/contrib/libs/pire/pire/extra/glyphs.h @@ -1,41 +1,41 @@ -/* - * glyphs.h -- declaration of the GlueSimilarGlyphs feature. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * glyphs.h -- declaration of the GlueSimilarGlyphs feature. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_EXTRA_GLYPHS_H -#define PIRE_EXTRA_GLYPHS_H - - -namespace Pire { -class Feature; -namespace Features { - - /** - * A feature which tells Pire not to distinguish latin - * and cyrillic letters having identical shapes - * (e.g. latin A and cyrillic A). - */ + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_EXTRA_GLYPHS_H +#define PIRE_EXTRA_GLYPHS_H + + +namespace Pire { +class Feature; +namespace Features { + + /** + * A feature which tells Pire not to distinguish latin + * and cyrillic letters having identical shapes + * (e.g. latin A and cyrillic A). + */ Feature::Ptr GlueSimilarGlyphs(); -} -} - -#endif +} +} + +#endif diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp index 984d708dfa..f2216b3aba 100644 --- a/contrib/libs/pire/pire/fsm.cpp +++ b/contrib/libs/pire/pire/fsm.cpp @@ -1,114 +1,114 @@ -/* - * fsm.cpp -- the implementation of the FSM class. +/* + * fsm.cpp -- the implementation of the FSM class. + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <algorithm> -#include <functional> -#include <stdexcept> -#include <iostream> -#include <iterator> -#include <numeric> -#include <queue> -#include <utility> + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <algorithm> +#include <functional> +#include <stdexcept> +#include <iostream> +#include <iterator> +#include <numeric> +#include <queue> +#include <utility> #include <iostream> #include <stdio.h> #include <contrib/libs/pire/pire/stub/lexical_cast.h> -#include "fsm.h" -#include "vbitset.h" -#include "partition.h" -#include "determine.h" +#include "fsm.h" +#include "vbitset.h" +#include "partition.h" +#include "determine.h" #include "minimize.h" -#include "platform.h" +#include "platform.h" + +namespace Pire { -namespace Pire { - -ystring CharDump(Char c) -{ - char buf[8]; +ystring CharDump(Char c) +{ + char buf[8]; if (c == '"') return ystring("\\\""); else if (c == '[' || c == ']' || c == '-' || c == '^') { snprintf(buf, sizeof(buf)-1, "\\\\%c", c); return ystring(buf); } else if (c >= 32 && c < 127) - return ystring(1, static_cast<char>(c)); - else if (c == '\n') + return ystring(1, static_cast<char>(c)); + else if (c == '\n') return ystring("\\\\n"); - else if (c == '\t') + else if (c == '\t') return ystring("\\\\t"); - else if (c == '\r') + else if (c == '\r') return ystring("\\\\r"); - else if (c < 256) { + else if (c < 256) { snprintf(buf, sizeof(buf)-1, "\\\\%03o", static_cast<int>(c)); - return ystring(buf); - } else if (c == Epsilon) - return ystring("<Epsilon>"); - else if (c == BeginMark) - return ystring("<Begin>"); - else if (c == EndMark) - return ystring("<End>"); - else - return ystring("<?" "?" "?>"); -} - -void Fsm::DumpState(yostream& s, size_t state) const -{ - // Fill in a 'row': Q -> exp(V) (for current state) + return ystring(buf); + } else if (c == Epsilon) + return ystring("<Epsilon>"); + else if (c == BeginMark) + return ystring("<Begin>"); + else if (c == EndMark) + return ystring("<End>"); + else + return ystring("<?" "?" "?>"); +} + +void Fsm::DumpState(yostream& s, size_t state) const +{ + // Fill in a 'row': Q -> exp(V) (for current state) TVector< ybitset<MaxChar> > row(Size()); for (auto&& transition : m_transitions[state]) for (auto&& transitionState : transition.second) { if (transitionState >= Size()) { std::cerr << "WTF?! Transition from " << state << " on letter " << transition.first << " leads to non-existing state " << transitionState << "\n"; Y_ASSERT(false); - } + } if (Letters().Contains(transition.first)) { const TVector<Char>& letters = Letters().Klass(Letters().Representative(transition.first)); for (auto&& letter : letters) row[transitionState].set(letter); - } else + } else row[transitionState].set(transition.first); - } - + } + bool statePrinted = false; - // Display each destination state + // Display each destination state for (auto rit = row.begin(), rie = row.end(); rit != rie; ++rit) { - unsigned begin = 0, end = 0; - + unsigned begin = 0, end = 0; + ystring delimiter; ystring label; - if (rit->test(Epsilon)) { + if (rit->test(Epsilon)) { label += delimiter + CharDump(Epsilon); delimiter = " "; - } - if (rit->test(BeginMark)) { + } + if (rit->test(BeginMark)) { label += delimiter + CharDump(BeginMark); delimiter = " "; - } - if (rit->test(EndMark)) { + } + if (rit->test(EndMark)) { label += delimiter + CharDump(EndMark); delimiter = " "; - } + } unsigned count = 0; for (unsigned i = 0; i < 256; ++i) if (rit->test(i)) @@ -130,13 +130,13 @@ void Fsm::DumpState(yostream& s, size_t state) const label += CharDump(begin) + "-" + (CharDump(end-1)); delimiter = " "; } - } + } label += "]"; delimiter = " "; } else if (count == 256) { label += delimiter + "."; delimiter = " "; - } + } if (!label.empty()) { if (!statePrinted) { s << " " << state << "[shape=\"" << (IsFinal(state) ? "double" : "") << "circle\",label=\"" << state; @@ -149,490 +149,490 @@ void Fsm::DumpState(yostream& s, size_t state) const statePrinted = true; } s << " " << state << " -> " << std::distance(row.begin(), rit) << "[label=\"" << label; - - // Display outputs + + // Display outputs auto oit = outputs.find(state); - if (oit != outputs.end()) { + if (oit != outputs.end()) { auto oit2 = oit->second.find(std::distance(row.begin(), rit)); - if (oit2 == oit->second.end()) - ; - else { + if (oit2 == oit->second.end()) + ; + else { TVector<int> payload; - for (unsigned i = 0; i < sizeof(oit2->second) * 8; ++i) - if (oit2->second & (1ul << i)) - payload.push_back(i); - if (!payload.empty()) + for (unsigned i = 0; i < sizeof(oit2->second) * 8; ++i) + if (oit2->second & (1ul << i)) + payload.push_back(i); + if (!payload.empty()) s << " (outputs: " << Join(payload.begin(), payload.end(), ", ") << ")"; - } - } + } + } s << "\"]\n"; - } - } + } + } if (statePrinted) s << '\n'; -} - +} + void Fsm::DumpTo(yostream& s, const ystring& name) const -{ +{ s << "digraph {\n \"initial\"[shape=\"plaintext\",label=\"" << name << "\"]\n\n"; - for (size_t state = 0; state < Size(); ++state) { - DumpState(s, state); - } + for (size_t state = 0; state < Size(); ++state) { + DumpState(s, state); + } s << "}\n\n"; -} - -yostream& operator << (yostream& s, const Fsm& fsm) { fsm.DumpTo(s); return s; } - - -namespace { - template<class Vector> void resizeVector(Vector& v, size_t s) { v.resize(s); } -} - -Fsm::Fsm(): - m_transitions(1), - initial(0), - letters(m_transitions), - m_sparsed(false), - determined(false), - isAlternative(false) -{ - m_final.insert(0); -} - -Fsm Fsm::MakeFalse() -{ - Fsm f; - f.SetFinal(0, false); - return f; -} - -Char Fsm::Translate(Char c) const -{ - if (!m_sparsed || c == Epsilon) - return c; - else - return Letters().Representative(c); -} - -bool Fsm::Connected(size_t from, size_t to, Char c) const -{ +} + +yostream& operator << (yostream& s, const Fsm& fsm) { fsm.DumpTo(s); return s; } + + +namespace { + template<class Vector> void resizeVector(Vector& v, size_t s) { v.resize(s); } +} + +Fsm::Fsm(): + m_transitions(1), + initial(0), + letters(m_transitions), + m_sparsed(false), + determined(false), + isAlternative(false) +{ + m_final.insert(0); +} + +Fsm Fsm::MakeFalse() +{ + Fsm f; + f.SetFinal(0, false); + return f; +} + +Char Fsm::Translate(Char c) const +{ + if (!m_sparsed || c == Epsilon) + return c; + else + return Letters().Representative(c); +} + +bool Fsm::Connected(size_t from, size_t to, Char c) const +{ auto it = m_transitions[from].find(Translate(c)); - return (it != m_transitions[from].end() && it->second.find(to) != it->second.end()); -} - -bool Fsm::Connected(size_t from, size_t to) const -{ + return (it != m_transitions[from].end() && it->second.find(to) != it->second.end()); +} + +bool Fsm::Connected(size_t from, size_t to) const +{ for (auto i = m_transitions[from].begin(), ie = m_transitions[from].end(); i != ie; ++i) - if (i->second.find(to) != i->second.end()) - return true; - return false; -} - -const Fsm::StatesSet& Fsm::Destinations(size_t from, Char c) const -{ + if (i->second.find(to) != i->second.end()) + return true; + return false; +} + +const Fsm::StatesSet& Fsm::Destinations(size_t from, Char c) const +{ auto i = m_transitions[from].find(Translate(c)); - return (i != m_transitions[from].end()) ? i->second : DefaultValue<StatesSet>(); -} - + return (i != m_transitions[from].end()) ? i->second : DefaultValue<StatesSet>(); +} + TSet<Char> Fsm::OutgoingLetters(size_t state) const -{ +{ TSet<Char> ret; for (auto&& i : m_transitions[state]) ret.insert(i.first); - return ret; -} - -size_t Fsm::Resize(size_t newSize) -{ - size_t ret = Size(); - m_transitions.resize(newSize); - return ret; -} - -void Fsm::Swap(Fsm& fsm) -{ - DoSwap(m_transitions, fsm.m_transitions); - DoSwap(initial, fsm.initial); - DoSwap(m_final, fsm.m_final); - DoSwap(letters, fsm.letters); - DoSwap(determined, fsm.determined); - DoSwap(outputs, fsm.outputs); - DoSwap(tags, fsm.tags); - DoSwap(isAlternative, fsm.isAlternative); -} - -void Fsm::SetFinal(size_t state, bool final) -{ - if (final) - m_final.insert(state); - else - m_final.erase(state); -} - -Fsm& Fsm::AppendDot() -{ - Resize(Size() + 1); - for (size_t letter = 0; letter != (1 << (sizeof(char)*8)); ++letter) - ConnectFinal(Size() - 1, letter); - ClearFinal(); - SetFinal(Size() - 1, true); - determined = false; - return *this; -} - -Fsm& Fsm::Append(char c) -{ - Resize(Size() + 1); - ConnectFinal(Size() - 1, static_cast<unsigned char>(c)); - ClearFinal(); - SetFinal(Size() - 1, true); - determined = false; - return *this; -} - -Fsm& Fsm::Append(const ystring& str) -{ + return ret; +} + +size_t Fsm::Resize(size_t newSize) +{ + size_t ret = Size(); + m_transitions.resize(newSize); + return ret; +} + +void Fsm::Swap(Fsm& fsm) +{ + DoSwap(m_transitions, fsm.m_transitions); + DoSwap(initial, fsm.initial); + DoSwap(m_final, fsm.m_final); + DoSwap(letters, fsm.letters); + DoSwap(determined, fsm.determined); + DoSwap(outputs, fsm.outputs); + DoSwap(tags, fsm.tags); + DoSwap(isAlternative, fsm.isAlternative); +} + +void Fsm::SetFinal(size_t state, bool final) +{ + if (final) + m_final.insert(state); + else + m_final.erase(state); +} + +Fsm& Fsm::AppendDot() +{ + Resize(Size() + 1); + for (size_t letter = 0; letter != (1 << (sizeof(char)*8)); ++letter) + ConnectFinal(Size() - 1, letter); + ClearFinal(); + SetFinal(Size() - 1, true); + determined = false; + return *this; +} + +Fsm& Fsm::Append(char c) +{ + Resize(Size() + 1); + ConnectFinal(Size() - 1, static_cast<unsigned char>(c)); + ClearFinal(); + SetFinal(Size() - 1, true); + determined = false; + return *this; +} + +Fsm& Fsm::Append(const ystring& str) +{ for (auto&& i : str) Append(i); - return *this; -} - -Fsm& Fsm::AppendSpecial(Char c) -{ - Resize(Size() + 1); - ConnectFinal(Size() - 1, c); - ClearFinal(); - SetFinal(Size() - 1, true); - determined = false; - return *this; -} - + return *this; +} + +Fsm& Fsm::AppendSpecial(Char c) +{ + Resize(Size() + 1); + ConnectFinal(Size() - 1, c); + ClearFinal(); + SetFinal(Size() - 1, true); + determined = false; + return *this; +} + Fsm& Fsm::AppendStrings(const TVector<ystring>& strings) -{ +{ for (auto&& i : strings) if (i.empty()) - throw Error("None of strings passed to appendStrings() can be empty"); - - Resize(Size() + 1); - size_t end = Size() - 1; - - // A local transitions table: (oldstate, char) -> newstate. - // Valid for all letters in given strings except final ones, - // which are always connected to the end state. - - // NB: since each FSM contains at least one state, - // state #0 cannot appear in LTRs. Thus we can use this - // criteria to test whether a transition has been created or not. - typedef ypair<size_t, char> Transition; + throw Error("None of strings passed to appendStrings() can be empty"); + + Resize(Size() + 1); + size_t end = Size() - 1; + + // A local transitions table: (oldstate, char) -> newstate. + // Valid for all letters in given strings except final ones, + // which are always connected to the end state. + + // NB: since each FSM contains at least one state, + // state #0 cannot appear in LTRs. Thus we can use this + // criteria to test whether a transition has been created or not. + typedef ypair<size_t, char> Transition; TMap<char, size_t> startLtr; TMap<Transition, size_t> ltr; - - // A presense of a transition in this set indicates that - // a that transition already points somewhere (either to end - // or somewhere else). Another attempt to create such transition - // will clear `determined flag. + + // A presense of a transition in this set indicates that + // a that transition already points somewhere (either to end + // or somewhere else). Another attempt to create such transition + // will clear `determined flag. TSet<Transition> usedTransitions; TSet<char> usedFirsts; - + for (const auto& str : strings) { - if (str.size() > 1) { - - // First letter: all previously final states are connected to the new state - size_t& firstJump = startLtr[str[0]]; - if (!firstJump) { - firstJump = Resize(Size() + 1); - ConnectFinal(firstJump, static_cast<unsigned char>(str[0])); - determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end()); - } - - // All other letters except last one - size_t state = firstJump; + if (str.size() > 1) { + + // First letter: all previously final states are connected to the new state + size_t& firstJump = startLtr[str[0]]; + if (!firstJump) { + firstJump = Resize(Size() + 1); + ConnectFinal(firstJump, static_cast<unsigned char>(str[0])); + determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end()); + } + + // All other letters except last one + size_t state = firstJump; for (auto cit = str.begin() + 1, cie = str.end() - 1; cit != cie; ++cit) { - size_t& newState = ltr[ymake_pair(state, *cit)]; - if (!newState) { - newState = Resize(Size() + 1); - Connect(state, newState, static_cast<unsigned char>(*cit)); - determined = determined && (usedTransitions.find(ymake_pair(state, *cit)) != usedTransitions.end()); - } - state = newState; - } - - // The last letter: connect the current state to end - unsigned char last = static_cast<unsigned char>(*(str.end() - 1)); - Connect(state, end, last); - determined = determined && (usedTransitions.find(ymake_pair(state, last)) != usedTransitions.end()); - - } else { - // The single letter: connect all the previously final states to end - ConnectFinal(end, static_cast<unsigned char>(str[0])); - determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end()); - } - } - - ClearFinal(); - SetFinal(end, true); - return *this; -} - -void Fsm::Import(const Fsm& rhs) -{ -// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "Importing"); -// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Left-hand side ===\n" << *this); -// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Right-hand side ===\n" << rhs); - - size_t oldsize = Resize(Size() + rhs.Size()); - + size_t& newState = ltr[ymake_pair(state, *cit)]; + if (!newState) { + newState = Resize(Size() + 1); + Connect(state, newState, static_cast<unsigned char>(*cit)); + determined = determined && (usedTransitions.find(ymake_pair(state, *cit)) != usedTransitions.end()); + } + state = newState; + } + + // The last letter: connect the current state to end + unsigned char last = static_cast<unsigned char>(*(str.end() - 1)); + Connect(state, end, last); + determined = determined && (usedTransitions.find(ymake_pair(state, last)) != usedTransitions.end()); + + } else { + // The single letter: connect all the previously final states to end + ConnectFinal(end, static_cast<unsigned char>(str[0])); + determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end()); + } + } + + ClearFinal(); + SetFinal(end, true); + return *this; +} + +void Fsm::Import(const Fsm& rhs) +{ +// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "Importing"); +// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Left-hand side ===\n" << *this); +// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Right-hand side ===\n" << rhs); + + size_t oldsize = Resize(Size() + rhs.Size()); + for (auto&& outer : m_transitions) { for (auto&& letter : letters) { auto targets = outer.find(letter.first); if (targets == outer.end()) - continue; + continue; for (auto&& character : letter.second.second) if (character != letter.first) outer.insert(ymake_pair(character, targets->second)); - } - } - + } + } + auto dest = m_transitions.begin() + oldsize; for (auto outer = rhs.m_transitions.begin(), outerEnd = rhs.m_transitions.end(); outer != outerEnd; ++outer, ++dest) { for (auto&& inner : *outer) { TSet<size_t> targets; std::transform(inner.second.begin(), inner.second.end(), std::inserter(targets, targets.begin()), - std::bind2nd(std::plus<size_t>(), oldsize)); + std::bind2nd(std::plus<size_t>(), oldsize)); dest->insert(ymake_pair(inner.first, targets)); - } - + } + for (auto&& letter : rhs.letters) { auto targets = dest->find(letter.first); if (targets == dest->end()) - continue; + continue; for (auto&& character : letter.second.second) if (character != letter.first) dest->insert(ymake_pair(character, targets->second)); - } - } - - // Import outputs + } + } + + // Import outputs for (auto&& output : rhs.outputs) { auto& dest = outputs[output.first + oldsize]; for (auto&& element : output.second) dest.insert(ymake_pair(element.first + oldsize, element.second)); - } - - // Import tags + } + + // Import tags for (auto&& tag : rhs.tags) tags.insert(ymake_pair(tag.first + oldsize, tag.second)); - - letters = LettersTbl(LettersEquality(m_transitions)); -} - -void Fsm::Connect(size_t from, size_t to, Char c /* = Epsilon */) -{ - m_transitions[from][c].insert(to); - ClearHints(); -} - -void Fsm::ConnectFinal(size_t to, Char c /* = Epsilon */) -{ + + letters = LettersTbl(LettersEquality(m_transitions)); +} + +void Fsm::Connect(size_t from, size_t to, Char c /* = Epsilon */) +{ + m_transitions[from][c].insert(to); + ClearHints(); +} + +void Fsm::ConnectFinal(size_t to, Char c /* = Epsilon */) +{ for (auto&& final : m_final) Connect(final, to, c); - ClearHints(); -} - -void Fsm::Disconnect(size_t from, size_t to, Char c) -{ + ClearHints(); +} + +void Fsm::Disconnect(size_t from, size_t to, Char c) +{ auto i = m_transitions[from].find(c); - if (i != m_transitions[from].end()) - i->second.erase(to); - ClearHints(); -} - -void Fsm::Disconnect(size_t from, size_t to) -{ + if (i != m_transitions[from].end()) + i->second.erase(to); + ClearHints(); +} + +void Fsm::Disconnect(size_t from, size_t to) +{ for (auto&& i : m_transitions[from]) i.second.erase(to); - ClearHints(); -} - -unsigned long Fsm::Output(size_t from, size_t to) const -{ + ClearHints(); +} + +unsigned long Fsm::Output(size_t from, size_t to) const +{ auto i = outputs.find(from); - if (i == outputs.end()) - return 0; + if (i == outputs.end()) + return 0; auto j = i->second.find(to); - if (j == i->second.end()) - return 0; - else - return j->second; -} - -Fsm& Fsm::operator += (const Fsm& rhs) -{ - size_t lhsSize = Size(); - Import(rhs); - - const TransitionRow& row = m_transitions[lhsSize + rhs.initial]; - + if (j == i->second.end()) + return 0; + else + return j->second; +} + +Fsm& Fsm::operator += (const Fsm& rhs) +{ + size_t lhsSize = Size(); + Import(rhs); + + const TransitionRow& row = m_transitions[lhsSize + rhs.initial]; + for (auto&& outer : row) for (auto&& inner : outer.second) ConnectFinal(inner, outer.first); - + auto out = rhs.outputs.find(rhs.initial); - if (out != rhs.outputs.end()) + if (out != rhs.outputs.end()) for (auto&& toAndOutput : out->second) { for (auto&& final : m_final) outputs[final].insert(ymake_pair(toAndOutput.first + lhsSize, toAndOutput.second)); - } - - ClearFinal(); + } + + ClearFinal(); for (auto&& letter : rhs.m_final) SetFinal(letter + lhsSize, true); - determined = false; - - ClearHints(); - PIRE_IFDEBUG(Cdbg << "=== After addition ===" << Endl << *this << Endl); - - return *this; -} - -Fsm& Fsm::operator |= (const Fsm& rhs) -{ - size_t lhsSize = Size(); - - Import(rhs); + determined = false; + + ClearHints(); + PIRE_IFDEBUG(Cdbg << "=== After addition ===" << Endl << *this << Endl); + + return *this; +} + +Fsm& Fsm::operator |= (const Fsm& rhs) +{ + size_t lhsSize = Size(); + + Import(rhs); for (auto&& final : rhs.m_final) m_final.insert(final + lhsSize); - if (!isAlternative && !rhs.isAlternative) { - Resize(Size() + 1); - Connect(Size() - 1, initial); - Connect(Size() - 1, lhsSize + rhs.initial); - initial = Size() - 1; - } else if (isAlternative && !rhs.isAlternative) { - Connect(initial, lhsSize + rhs.initial, Epsilon); - } else if (!isAlternative && rhs.isAlternative) { - Connect(lhsSize + rhs.initial, initial, Epsilon); - initial = rhs.initial + lhsSize; - } else if (isAlternative && rhs.isAlternative) { - const StatesSet& tos = rhs.Destinations(rhs.initial, Epsilon); + if (!isAlternative && !rhs.isAlternative) { + Resize(Size() + 1); + Connect(Size() - 1, initial); + Connect(Size() - 1, lhsSize + rhs.initial); + initial = Size() - 1; + } else if (isAlternative && !rhs.isAlternative) { + Connect(initial, lhsSize + rhs.initial, Epsilon); + } else if (!isAlternative && rhs.isAlternative) { + Connect(lhsSize + rhs.initial, initial, Epsilon); + initial = rhs.initial + lhsSize; + } else if (isAlternative && rhs.isAlternative) { + const StatesSet& tos = rhs.Destinations(rhs.initial, Epsilon); for (auto&& to : tos) { Connect(initial, to + lhsSize, Epsilon); Disconnect(rhs.initial + lhsSize, to + lhsSize, Epsilon); - } - } - - determined = false; - isAlternative = true; - return *this; -} - -Fsm& Fsm::operator &= (const Fsm& rhs) -{ - Fsm rhs2(rhs); - Complement(); - rhs2.Complement(); - *this |= rhs2; - Complement(); - return *this; -} - -Fsm& Fsm::Iterate() -{ - PIRE_IFDEBUG(Cdbg << "Iterating:" << Endl << *this << Endl); - Resize(Size() + 2); - - Connect(Size() - 2, Size() - 1); - Connect(Size() - 2, initial); - ConnectFinal(initial); - ConnectFinal(Size() - 1); - - ClearFinal(); - SetFinal(Size() - 1, true); - initial = Size() - 2; - - determined = false; - - PIRE_IFDEBUG(Cdbg << "Iterated:" << Endl << *this << Endl); - return *this; -} - -Fsm& Fsm::Complement() -{ - if (!Determine()) - throw Error("Regexp pattern too complicated"); - Minimize(); - Resize(Size() + 1); - for (size_t i = 0; i < Size(); ++i) - if (!IsFinal(i)) - Connect(i, Size() - 1); - ClearFinal(); - SetFinal(Size() - 1, true); - determined = false; - - return *this; -} - + } + } + + determined = false; + isAlternative = true; + return *this; +} + +Fsm& Fsm::operator &= (const Fsm& rhs) +{ + Fsm rhs2(rhs); + Complement(); + rhs2.Complement(); + *this |= rhs2; + Complement(); + return *this; +} + +Fsm& Fsm::Iterate() +{ + PIRE_IFDEBUG(Cdbg << "Iterating:" << Endl << *this << Endl); + Resize(Size() + 2); + + Connect(Size() - 2, Size() - 1); + Connect(Size() - 2, initial); + ConnectFinal(initial); + ConnectFinal(Size() - 1); + + ClearFinal(); + SetFinal(Size() - 1, true); + initial = Size() - 2; + + determined = false; + + PIRE_IFDEBUG(Cdbg << "Iterated:" << Endl << *this << Endl); + return *this; +} + +Fsm& Fsm::Complement() +{ + if (!Determine()) + throw Error("Regexp pattern too complicated"); + Minimize(); + Resize(Size() + 1); + for (size_t i = 0; i < Size(); ++i) + if (!IsFinal(i)) + Connect(i, Size() - 1); + ClearFinal(); + SetFinal(Size() - 1, true); + determined = false; + + return *this; +} + Fsm Fsm::operator *(size_t count) const +{ + Fsm ret; + while (count--) + ret += *this; + return ret; +} + +void Fsm::MakePrefix() +{ + RemoveDeadEnds(); + for (size_t i = 0; i < Size(); ++i) + if (!m_transitions[i].empty()) + m_final.insert(i); + ClearHints(); +} + +void Fsm::MakeSuffix() +{ + for (size_t i = 0; i < Size(); ++i) + if (i != initial) + Connect(initial, i); + ClearHints(); +} + +Fsm& Fsm::Reverse() { - Fsm ret; - while (count--) - ret += *this; - return ret; -} + Fsm out; + out.Resize(Size() + 1); + out.letters = Letters(); -void Fsm::MakePrefix() -{ - RemoveDeadEnds(); - for (size_t i = 0; i < Size(); ++i) - if (!m_transitions[i].empty()) - m_final.insert(i); - ClearHints(); -} - -void Fsm::MakeSuffix() -{ - for (size_t i = 0; i < Size(); ++i) - if (i != initial) - Connect(initial, i); - ClearHints(); -} - -Fsm& Fsm::Reverse() -{ - Fsm out; - out.Resize(Size() + 1); - out.letters = Letters(); - - // Invert transitions - for (size_t from = 0; from < Size(); ++from) + // Invert transitions + for (size_t from = 0; from < Size(); ++from) for (auto&& i : m_transitions[from]) for (auto&& j : i.second) out.Connect(j, from, i.first); - // Invert initial and final states + // Invert initial and final states out.m_final.clear(); - out.SetFinal(initial, true); + out.SetFinal(initial, true); for (auto i : m_final) out.Connect(Size(), i, Epsilon); - out.SetInitial(Size()); + out.SetInitial(Size()); - // Invert outputs + // Invert outputs for (auto&& i : outputs) for (auto&& j : i.second) out.SetOutput(j.first, i.first, j.second); - // Preserve tags (although thier semantics are usually heavily broken at this point) - out.tags = tags; - - // Apply - Swap(out); - return *this; -} + // Preserve tags (although thier semantics are usually heavily broken at this point) + out.tags = tags; + // Apply + Swap(out); + return *this; +} + TSet<size_t> Fsm::DeadStates() const -{ +{ TSet<size_t> res; for (int invert = 0; invert <= 1; ++invert) { @@ -649,26 +649,26 @@ TSet<size_t> Fsm::DeadStates() const digraph.Connect(j - m_transitions.begin(), *toSt, 0); } } - } - } - + } + } + TVector<bool> unchecked(Size(), true); TVector<bool> useless(Size(), true); TDeque<size_t> queue; - + // Put all final (or initial) states into queue, marking them useful for (size_t i = 0; i < Size(); ++i) if ((invert && IsFinal(i)) || (!invert && Initial() == i)) { useless[i] = false; queue.push_back(i); } - + // Do the breadth-first search, marking all states // from which already marked states are reachable while (!queue.empty()) { size_t to = queue.front(); queue.pop_front(); - + // All the states that are connected to this state in the transition matrix are useful const StatesSet& connections = (digraph.m_transitions[to])[0]; for (auto&& fr : connections) { @@ -677,310 +677,310 @@ TSet<size_t> Fsm::DeadStates() const useless[fr] = false; queue.push_back(fr); } - } + } // Now we consider this state checked unchecked[to] = false; - } - + } + for (size_t i = 0; i < Size(); ++i) { if (useless[i]) { res.insert(i); } - } - } - - return res; -} - -void Fsm::RemoveDeadEnds() -{ - PIRE_IFDEBUG(Cdbg << "Removing dead ends on:" << Endl << *this << Endl); - + } + } + + return res; +} + +void Fsm::RemoveDeadEnds() +{ + PIRE_IFDEBUG(Cdbg << "Removing dead ends on:" << Endl << *this << Endl); + TSet<size_t> dead = DeadStates(); - // Erase all useless states + // Erase all useless states for (auto&& i : dead) { PIRE_IFDEBUG(Cdbg << "Removing useless state " << i << Endl); m_transitions[i].clear(); for (auto&& j : m_transitions) for (auto&& k : j) k.second.erase(i); - } - ClearHints(); - - PIRE_IFDEBUG(Cdbg << "Result:" << Endl << *this << Endl); -} - -// This method is one step of Epsilon-connection removal algorithm. -// It merges transitions, tags, and outputs of 'to' state into 'from' state -void Fsm::MergeEpsilonConnection(size_t from, size_t to) -{ - unsigned long frEpsOutput = 0; - bool fsEpsOutputExists = false; - - // Is there an output for 'from'->'to' transition? - if (outputs.find(from) != outputs.end() && outputs[from].find(to) != outputs[from].end()) { - frEpsOutput = outputs[from][to]; - fsEpsOutputExists = true; - } - - // Merge transitions from 'to' state into transitions from 'from' state + } + ClearHints(); + + PIRE_IFDEBUG(Cdbg << "Result:" << Endl << *this << Endl); +} + +// This method is one step of Epsilon-connection removal algorithm. +// It merges transitions, tags, and outputs of 'to' state into 'from' state +void Fsm::MergeEpsilonConnection(size_t from, size_t to) +{ + unsigned long frEpsOutput = 0; + bool fsEpsOutputExists = false; + + // Is there an output for 'from'->'to' transition? + if (outputs.find(from) != outputs.end() && outputs[from].find(to) != outputs[from].end()) { + frEpsOutput = outputs[from][to]; + fsEpsOutputExists = true; + } + + // Merge transitions from 'to' state into transitions from 'from' state for (auto&& transition : m_transitions[to]) { TSet<size_t> connStates; std::copy(transition.second.begin(), transition.second.end(), std::inserter(m_transitions[from][transition.first], m_transitions[from][transition.first].end())); - - // If there is an output of the 'from'->'to' connection it has to be set to all - // new connections that were merged from 'to' state - if (fsEpsOutputExists) { - // Compute the set of states that are reachable from 'to' state + + // If there is an output of the 'from'->'to' connection it has to be set to all + // new connections that were merged from 'to' state + if (fsEpsOutputExists) { + // Compute the set of states that are reachable from 'to' state std::copy(transition.second.begin(), transition.second.end(), std::inserter(connStates, connStates.end())); - - // For each of these states add an output equal to the Epsilon-connection output + + // For each of these states add an output equal to the Epsilon-connection output for (auto&& newConnSt : connStates) { outputs[from][newConnSt] |= frEpsOutput; - } - } - } - - // Mark 'from' state final if 'to' state is final - if (IsFinal(to)) - SetFinal(from, true); - - // Combine tags + } + } + } + + // Mark 'from' state final if 'to' state is final + if (IsFinal(to)) + SetFinal(from, true); + + // Combine tags auto ti = tags.find(to); - if (ti != tags.end()) - tags[from] |= ti->second; - - // Merge all 'to' into 'from' outputs: - // outputs[from][i] |= (outputs[from][to] | outputs[to][i]) + if (ti != tags.end()) + tags[from] |= ti->second; + + // Merge all 'to' into 'from' outputs: + // outputs[from][i] |= (outputs[from][to] | outputs[to][i]) auto toOit = outputs.find(to); - if (toOit != outputs.end()) { + if (toOit != outputs.end()) { for (auto&& output : toOit->second) { outputs[from][output.first] |= (frEpsOutput | output.second); - } - } -} - -// Assuming the epsilon transitions is possible from 'from' to 'thru', -// finds all states which are Epsilon-reachable from 'thru' and connects -// them directly to 'from' with Epsilon transition having proper output. -// Updates inverse map of epsilon transitions as well. + } + } +} + +// Assuming the epsilon transitions is possible from 'from' to 'thru', +// finds all states which are Epsilon-reachable from 'thru' and connects +// them directly to 'from' with Epsilon transition having proper output. +// Updates inverse map of epsilon transitions as well. void Fsm::ShortCutEpsilon(size_t from, size_t thru, TVector< TSet<size_t> >& inveps) -{ - PIRE_IFDEBUG(Cdbg << "In Fsm::ShortCutEpsilon(" << from << ", " << thru << ")\n"); - const StatesSet& to = Destinations(thru, Epsilon); - Outputs::iterator outIt = outputs.find(from); - unsigned long fromThruOut = Output(from, thru); +{ + PIRE_IFDEBUG(Cdbg << "In Fsm::ShortCutEpsilon(" << from << ", " << thru << ")\n"); + const StatesSet& to = Destinations(thru, Epsilon); + Outputs::iterator outIt = outputs.find(from); + unsigned long fromThruOut = Output(from, thru); for (auto&& toElement : to) { PIRE_IFDEBUG(Cdbg << "Epsilon connecting " << from << " --> " << thru << " --> " << toElement << "\n"); Connect(from, toElement, Epsilon); inveps[toElement].insert(from); - if (outIt != outputs.end()) + if (outIt != outputs.end()) outIt->second[toElement] |= (fromThruOut | Output(thru, toElement)); } -} - -// Removes all Epsilon-connections by iterating though states and merging each Epsilon-connection -// effects from 'to' state into 'from' state -void Fsm::RemoveEpsilons() -{ - Unsparse(); - - // Build inverse map of epsilon transitions +} + +// Removes all Epsilon-connections by iterating though states and merging each Epsilon-connection +// effects from 'to' state into 'from' state +void Fsm::RemoveEpsilons() +{ + Unsparse(); + + // Build inverse map of epsilon transitions TVector< TSet<size_t> > inveps(Size()); // We have to use TSet<> here since we want it sorted - for (size_t from = 0; from != Size(); ++from) { - const StatesSet& tos = Destinations(from, Epsilon); + for (size_t from = 0; from != Size(); ++from) { + const StatesSet& tos = Destinations(from, Epsilon); for (auto&& to : tos) inveps[to].insert(from); - } + } - // Make a transitive closure of all epsilon transitions (Floyd-Warshall algorithm) - // (if there exists an epsilon-path between two states, epsilon-connect them directly) - for (size_t thru = 0; thru != Size(); ++thru) + // Make a transitive closure of all epsilon transitions (Floyd-Warshall algorithm) + // (if there exists an epsilon-path between two states, epsilon-connect them directly) + for (size_t thru = 0; thru != Size(); ++thru) for (auto&& from : inveps[thru]) - // inveps[thru] may alter during loop body, hence we cannot cache ivneps[thru].end() + // inveps[thru] may alter during loop body, hence we cannot cache ivneps[thru].end() if (from != thru) ShortCutEpsilon(from, thru, inveps); - PIRE_IFDEBUG(Cdbg << "=== After epsilons shortcut\n" << *this << Endl); + PIRE_IFDEBUG(Cdbg << "=== After epsilons shortcut\n" << *this << Endl); - // Iterate through all epsilon-connected state pairs, merging states together - for (size_t from = 0; from != Size(); ++from) { - const StatesSet& to = Destinations(from, Epsilon); + // Iterate through all epsilon-connected state pairs, merging states together + for (size_t from = 0; from != Size(); ++from) { + const StatesSet& to = Destinations(from, Epsilon); for (auto&& toElement : to) if (toElement != from) MergeEpsilonConnection(from, toElement); // it's a NOP if to == from, so don't waste time - } + } - PIRE_IFDEBUG(Cdbg << "=== After epsilons merged\n" << *this << Endl); + PIRE_IFDEBUG(Cdbg << "=== After epsilons merged\n" << *this << Endl); - // Drop all epsilon transitions + // Drop all epsilon transitions for (auto&& i : m_transitions) i.erase(Epsilon); - Sparse(); - ClearHints(); -} - -bool Fsm::LettersEquality::operator()(Char a, Char b) const -{ + Sparse(); + ClearHints(); +} + +bool Fsm::LettersEquality::operator()(Char a, Char b) const +{ for (auto&& outer : *m_tbl) { auto ia = outer.find(a); auto ib = outer.find(b); if (ia == outer.end() && ib == outer.end()) - continue; + continue; else if (ia == outer.end() || ib == outer.end() || ia->second != ib->second) { - return false; - } - } - return true; -} - + return false; + } + } + return true; +} + void Fsm::Sparse(bool needEpsilons /* = false */) -{ - letters = LettersTbl(LettersEquality(m_transitions)); - for (unsigned letter = 0; letter < MaxChar; ++letter) +{ + letters = LettersTbl(LettersEquality(m_transitions)); + for (unsigned letter = 0; letter < MaxChar; ++letter) if (letter != Epsilon || needEpsilons) - letters.Append(letter); - - m_sparsed = true; - PIRE_IFDEBUG(Cdbg << "Letter classes = " << letters << Endl); -} - -void Fsm::Unsparse() -{ + letters.Append(letter); + + m_sparsed = true; + PIRE_IFDEBUG(Cdbg << "Letter classes = " << letters << Endl); +} + +void Fsm::Unsparse() +{ for (auto&& letter : letters) for (auto&& i : m_transitions) for (auto&& j : letter.second.second) i[j] = i[letter.first]; - m_sparsed = false; -} - -// Returns a set of 'terminal states', which are those of the final states, -// from which a transition to themselves on any letter is possible. + m_sparsed = false; +} + +// Returns a set of 'terminal states', which are those of the final states, +// from which a transition to themselves on any letter is possible. TSet<size_t> Fsm::TerminalStates() const -{ +{ TSet<size_t> terminals; for (auto&& final : m_final) { - bool ok = true; + bool ok = true; for (auto&& letter : letters) { auto dests = m_transitions[final].find(letter.first); ok = ok && (dests != m_transitions[final].end() && dests->second.find(final) != dests->second.end()); - } - if (ok) + } + if (ok) terminals.insert(final); - } - return terminals; -} - -namespace Impl { -class FsmDetermineTask { -public: + } + return terminals; +} + +namespace Impl { +class FsmDetermineTask { +public: typedef TVector<size_t> State; - typedef Fsm::LettersTbl LettersTbl; + typedef Fsm::LettersTbl LettersTbl; typedef TMap<State, size_t> InvStates; - FsmDetermineTask(const Fsm& fsm) - : mFsm(fsm) - , mTerminals(fsm.TerminalStates()) - { - PIRE_IFDEBUG(Cdbg << "Terminal states: [" << Join(mTerminals.begin(), mTerminals.end(), ", ") << "]" << Endl); - } - const LettersTbl& Letters() const { return mFsm.letters; } - - State Initial() const { return State(1, mFsm.initial); } - bool IsRequired(const State& state) const - { + FsmDetermineTask(const Fsm& fsm) + : mFsm(fsm) + , mTerminals(fsm.TerminalStates()) + { + PIRE_IFDEBUG(Cdbg << "Terminal states: [" << Join(mTerminals.begin(), mTerminals.end(), ", ") << "]" << Endl); + } + const LettersTbl& Letters() const { return mFsm.letters; } + + State Initial() const { return State(1, mFsm.initial); } + bool IsRequired(const State& state) const + { for (auto&& i : state) if (mTerminals.find(i) != mTerminals.end()) - return false; - return true; - } - - State Next(const State& state, Char letter) const - { - State next; - next.reserve(20); + return false; + return true; + } + + State Next(const State& state, Char letter) const + { + State next; + next.reserve(20); for (auto&& from : state) { const auto& part = mFsm.Destinations(from, letter); - std::copy(part.begin(), part.end(), std::back_inserter(next)); - } - - std::sort(next.begin(), next.end()); - next.erase(std::unique(next.begin(), next.end()), next.end()); - PIRE_IFDEBUG(Cdbg << "Returning transition [" << Join(state.begin(), state.end(), ", ") << "] --" << letter - << "--> [" << Join(next.begin(), next.end(), ", ") << "]" << Endl); - return next; - } + std::copy(part.begin(), part.end(), std::back_inserter(next)); + } + + std::sort(next.begin(), next.end()); + next.erase(std::unique(next.begin(), next.end()), next.end()); + PIRE_IFDEBUG(Cdbg << "Returning transition [" << Join(state.begin(), state.end(), ", ") << "] --" << letter + << "--> [" << Join(next.begin(), next.end(), ", ") << "]" << Endl); + return next; + } void AcceptStates(const TVector<State>& states) - { - mNewFsm.Resize(states.size()); - mNewFsm.initial = 0; - mNewFsm.determined = true; - mNewFsm.letters = Letters(); - mNewFsm.m_final.clear(); - for (size_t ns = 0; ns < states.size(); ++ns) { - PIRE_IFDEBUG(Cdbg << "State " << ns << " = [" << Join(states[ns].begin(), states[ns].end(), ", ") << "]" << Endl); + { + mNewFsm.Resize(states.size()); + mNewFsm.initial = 0; + mNewFsm.determined = true; + mNewFsm.letters = Letters(); + mNewFsm.m_final.clear(); + for (size_t ns = 0; ns < states.size(); ++ns) { + PIRE_IFDEBUG(Cdbg << "State " << ns << " = [" << Join(states[ns].begin(), states[ns].end(), ", ") << "]" << Endl); for (auto&& j : states[ns]) { - // If it was a terminal state, connect it to itself + // If it was a terminal state, connect it to itself if (mTerminals.find(j) != mTerminals.end()) { for (auto&& letter : Letters()) mNewFsm.Connect(ns, ns, letter.first); - mNewTerminals.insert(ns); + mNewTerminals.insert(ns); PIRE_IFDEBUG(Cdbg << "State " << ns << " becomes terminal because of old state " << j << Endl); - } - } + } + } for (auto&& j : states[ns]) { - // If any state containing in our one is marked final, mark the new state final as well + // If any state containing in our one is marked final, mark the new state final as well if (mFsm.IsFinal(j)) { PIRE_IFDEBUG(Cdbg << "State " << ns << " becomes final because of old state " << j << Endl); - mNewFsm.SetFinal(ns, true); - if (mFsm.tags.empty()) - // Weve got no tags and already know that the state is final, - // hence weve done with this state and got nothing more to do. - break; - } - - // Bitwise OR all tags in states + mNewFsm.SetFinal(ns, true); + if (mFsm.tags.empty()) + // Weve got no tags and already know that the state is final, + // hence weve done with this state and got nothing more to do. + break; + } + + // Bitwise OR all tags in states auto ti = mFsm.tags.find(j); - if (ti != mFsm.tags.end()) { + if (ti != mFsm.tags.end()) { PIRE_IFDEBUG(Cdbg << "State " << ns << " carries tag " << ti->second << " because of old state " << j << Endl); - mNewFsm.tags[ns] |= ti->second; - } - } - } - // For each old state, prepare a list of new state it is contained in + mNewFsm.tags[ns] |= ti->second; + } + } + } + // For each old state, prepare a list of new state it is contained in typedef TMap< size_t, TVector<size_t> > Old2New; - Old2New old2new; - for (size_t ns = 0; ns < states.size(); ++ns) + Old2New old2new; + for (size_t ns = 0; ns < states.size(); ++ns) for (auto&& j : states[ns]) old2new[j].push_back(ns); - // Copy all outputs + // Copy all outputs for (auto&& i : mFsm.outputs) { for (auto&& j : i.second) { auto from = old2new.find(i.first); auto to = old2new.find(j.first); - if (from != old2new.end() && to != old2new.end()) { + if (from != old2new.end() && to != old2new.end()) { for (auto&& k : from->second) for (auto&& l : to->second) mNewFsm.outputs[k][l] |= j.second; - } - } - } - PIRE_IFDEBUG(Cdbg << "New terminals = [" << Join(mNewTerminals.begin(), mNewTerminals.end(), ",") << "]" << Endl); - } - - void Connect(size_t from, size_t to, Char letter) - { - PIRE_IFDEBUG(Cdbg << "Connecting " << from << " --" << letter << "--> " << to << Endl); + } + } + } + PIRE_IFDEBUG(Cdbg << "New terminals = [" << Join(mNewTerminals.begin(), mNewTerminals.end(), ",") << "]" << Endl); + } + + void Connect(size_t from, size_t to, Char letter) + { + PIRE_IFDEBUG(Cdbg << "Connecting " << from << " --" << letter << "--> " << to << Endl); Y_ASSERT(mNewTerminals.find(from) == mNewTerminals.end()); - mNewFsm.Connect(from, to, letter); - } - typedef bool Result; + mNewFsm.Connect(from, to, letter); + } + typedef bool Result; Result Success() { Fsm::Outputs oldOutputs; @@ -1003,40 +1003,40 @@ public: return true; } - Result Failure() { return false; } + Result Failure() { return false; } - Fsm& Output() { return mNewFsm; } -private: - const Fsm& mFsm; - Fsm mNewFsm; + Fsm& Output() { return mNewFsm; } +private: + const Fsm& mFsm; + Fsm mNewFsm; TSet<size_t> mTerminals; TSet<size_t> mNewTerminals; -}; -} - -bool Fsm::Determine(size_t maxsize /* = 0 */) -{ - static const unsigned MaxSize = 200000; - if (determined) - return true; - - PIRE_IFDEBUG(Cdbg << "=== Initial ===" << Endl << *this << Endl); - - RemoveEpsilons(); - PIRE_IFDEBUG(Cdbg << "=== After all epsilons removed" << Endl << *this << Endl); - - Impl::FsmDetermineTask task(*this); - if (Pire::Impl::Determine(task, maxsize ? maxsize : MaxSize)) { - task.Output().Swap(*this); - PIRE_IFDEBUG(Cdbg << "=== Determined ===" << Endl << *this << Endl); - return true; - } else - return false; -} - +}; +} + +bool Fsm::Determine(size_t maxsize /* = 0 */) +{ + static const unsigned MaxSize = 200000; + if (determined) + return true; + + PIRE_IFDEBUG(Cdbg << "=== Initial ===" << Endl << *this << Endl); + + RemoveEpsilons(); + PIRE_IFDEBUG(Cdbg << "=== After all epsilons removed" << Endl << *this << Endl); + + Impl::FsmDetermineTask task(*this); + if (Pire::Impl::Determine(task, maxsize ? maxsize : MaxSize)) { + task.Output().Swap(*this); + PIRE_IFDEBUG(Cdbg << "=== Determined ===" << Endl << *this << Endl); + return true; + } else + return false; +} + namespace Impl { class FsmMinimizeTask { -public: +public: explicit FsmMinimizeTask(const Fsm& fsm) : mFsm(fsm) , reversedTransitions(fsm.Size()) @@ -1044,7 +1044,7 @@ public: , Classes(0) { Y_ASSERT(mFsm.IsDetermined()); - + TMap<bool, size_t> FinalStateClassMap; for (size_t state = 0; state < mFsm.Size(); ++state) { @@ -1068,7 +1068,7 @@ public: } } } - + TVector<size_t>& GetStateClass() { return StateClass; } size_t& GetClassesNumber() { return Classes; } @@ -1080,22 +1080,22 @@ public: bool IsDetermined() const { return mFsm.IsDetermined(); } - + size_t Size() const { return mFsm.Size(); - } - + } + const TVector<size_t>& Previous(size_t state, size_t letter) const { return reversedTransitions[state][letter]; - } - + } + void AcceptStates() { mNewFsm.Resize(Classes); mNewFsm.letters = mFsm.letters; mNewFsm.determined = mFsm.determined; mNewFsm.m_sparsed = mFsm.m_sparsed; mNewFsm.SetFinal(0, false); - + // Unite equality classes into new states size_t fromIdx = 0; for (auto from = mFsm.m_transitions.begin(), fromEnd = mFsm.m_transitions.end(); from != fromEnd; ++from, ++fromIdx) { @@ -1109,36 +1109,36 @@ public: mNewFsm.SetFinal(dest, true); PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " becomes final because of old state " << fromIdx << Endl); } - + // Append tags auto ti = mFsm.tags.find(fromIdx); if (ti != mFsm.tags.end()) { mNewFsm.tags[dest] |= ti->second; PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " carries tag " << ti->second << " because of old state " << fromIdx << Endl); } - } + } mNewFsm.initial = StateClass[mFsm.initial]; // Restore outputs for (auto&& output : mFsm.outputs) for (auto&& output2 : output.second) mNewFsm.outputs[StateClass[output.first]].insert(ymake_pair(StateClass[output2.first], output2.second)); - } - + } + typedef bool Result; - + Result Success() { return true; } - + Result Failure() { return false; } - + Fsm& Output() { return mNewFsm; - } - + } + private: const Fsm& mFsm; Fsm mNewFsm; @@ -1147,89 +1147,89 @@ private: size_t Classes; }; } - + void Fsm::Minimize() { // Minimization algorithm is only applicable to a determined FSM. Y_ASSERT(determined); - + Impl::FsmMinimizeTask task{*this}; if (Pire::Impl::Minimize(task)) { task.Output().Swap(*this); - } -} - -Fsm& Fsm::Canonize(size_t maxSize /* = 0 */) -{ - if (!IsDetermined()) { + } +} + +Fsm& Fsm::Canonize(size_t maxSize /* = 0 */) +{ + if (!IsDetermined()) { if (!Determine(maxSize)) - throw Error("regexp pattern too complicated"); - } - Minimize(); - return *this; -} - -void Fsm::PrependAnything() -{ - size_t newstate = Size(); - Resize(Size() + 1); - for (size_t letter = 0; letter < MaxChar; ++letter) - Connect(newstate, newstate, letter); - - Connect(newstate, initial); - initial = newstate; - - determined = false; -} - -void Fsm::AppendAnything() -{ - size_t newstate = Size(); - Resize(Size() + 1); - for (size_t letter = 0; letter < MaxChar; ++letter) - Connect(newstate, newstate, letter); - - ConnectFinal(newstate); - ClearFinal(); - SetFinal(newstate, 1); - - determined = false; -} - -Fsm& Fsm::Surround() -{ - PrependAnything(); - AppendAnything(); - return *this; -} - -void Fsm::Divert(size_t from, size_t to, size_t dest) -{ - if (to == dest) - return; - - // Assign the output + throw Error("regexp pattern too complicated"); + } + Minimize(); + return *this; +} + +void Fsm::PrependAnything() +{ + size_t newstate = Size(); + Resize(Size() + 1); + for (size_t letter = 0; letter < MaxChar; ++letter) + Connect(newstate, newstate, letter); + + Connect(newstate, initial); + initial = newstate; + + determined = false; +} + +void Fsm::AppendAnything() +{ + size_t newstate = Size(); + Resize(Size() + 1); + for (size_t letter = 0; letter < MaxChar; ++letter) + Connect(newstate, newstate, letter); + + ConnectFinal(newstate); + ClearFinal(); + SetFinal(newstate, 1); + + determined = false; +} + +Fsm& Fsm::Surround() +{ + PrependAnything(); + AppendAnything(); + return *this; +} + +void Fsm::Divert(size_t from, size_t to, size_t dest) +{ + if (to == dest) + return; + + // Assign the output auto oi = outputs.find(from); - if (oi != outputs.end()) { + if (oi != outputs.end()) { auto oi2 = oi->second.find(to); - if (oi2 != oi->second.end()) { - unsigned long output = oi2->second; - oi->second.erase(oi2); - oi->second.insert(ymake_pair(dest, output)); - } - } - - // Assign the transition + if (oi2 != oi->second.end()) { + unsigned long output = oi2->second; + oi->second.erase(oi2); + oi->second.insert(ymake_pair(dest, output)); + } + } + + // Assign the transition for (auto&& i : m_transitions[from]) { auto di = i.second.find(to); if (di != i.second.end()) { i.second.erase(di); i.second.insert(dest); - } - } - - ClearHints(); -} - - -} + } + } + + ClearHints(); +} + + +} diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h index 4dad06ca06..d25d1764e3 100644 --- a/contrib/libs/pire/pire/fsm.h +++ b/contrib/libs/pire/pire/fsm.h @@ -1,283 +1,283 @@ -/* - * fsm.h -- the definition of the FSM class. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * fsm.h -- the definition of the FSM class. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_FSM_H -#define PIRE_FSM_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_FSM_H +#define PIRE_FSM_H + + #include <contrib/libs/pire/pire/stub/stl.h> -#include "partition.h" -#include "defs.h" - -namespace Pire { - - namespace Impl { - class FsmDetermineTask; +#include "partition.h" +#include "defs.h" + +namespace Pire { + + namespace Impl { + class FsmDetermineTask; class FsmMinimizeTask; class HalfFinalDetermineTask; - } - - /// A Flying Spaghetti Monster... no, just a Finite State Machine. - class Fsm { - public: - typedef ybitset<MaxChar> Charset; - - Fsm(); - void Swap(Fsm& fsm); - - static Fsm MakeFalse(); - - /// Current number of states - size_t Size() const { return m_transitions.size(); } - - Fsm& Append(char c); - Fsm& Append(const ystring& str); - Fsm& AppendSpecial(Char c); - - /// Efficiently appends a union of passed strings to FSM. - /// Used for ranges (e.g. [a-z]), character classes (e.g. \w, \d) - /// and case-insensitive comparison of multibyte characters, - /// when one string represents a lowercase variant of a character, - /// while another string represents its uppercase variant. + } + + /// A Flying Spaghetti Monster... no, just a Finite State Machine. + class Fsm { + public: + typedef ybitset<MaxChar> Charset; + + Fsm(); + void Swap(Fsm& fsm); + + static Fsm MakeFalse(); + + /// Current number of states + size_t Size() const { return m_transitions.size(); } + + Fsm& Append(char c); + Fsm& Append(const ystring& str); + Fsm& AppendSpecial(Char c); + + /// Efficiently appends a union of passed strings to FSM. + /// Used for ranges (e.g. [a-z]), character classes (e.g. \w, \d) + /// and case-insensitive comparison of multibyte characters, + /// when one string represents a lowercase variant of a character, + /// while another string represents its uppercase variant. Fsm& AppendStrings(const TVector<ystring>& strings); - - /// Appends a part matching a single byte (any). - Fsm& AppendDot(); - - /// Appends and prepends the FSM with the iterated dot (see above). - Fsm& Surround(); // returns *this - Fsm Surrounded() const { Fsm copy(*this); copy.Surround(); return copy; } - - Fsm& operator += (const Fsm& rhs); ///< Concatenation - Fsm& operator |= (const Fsm& rhs); ///< Alternation - Fsm& operator &= (const Fsm& rhs); ///< Conjunction - Fsm& Iterate(); ///< Klene star - Fsm& Complement(); ///< Complementation - Fsm& operator *= (size_t count) { *this = *this * count; return *this; } - - Fsm operator + (const Fsm& rhs) const { Fsm a(*this); return a += rhs; } - Fsm operator | (const Fsm& rhs) const { Fsm a(*this); return a |= rhs; } - Fsm operator & (const Fsm& rhs) const { Fsm a(*this); return a &= rhs; } - Fsm operator * () const { Fsm a(*this); return a.Iterate(); } - Fsm operator ~ () const { Fsm a(*this); return a.Complement(); } + + /// Appends a part matching a single byte (any). + Fsm& AppendDot(); + + /// Appends and prepends the FSM with the iterated dot (see above). + Fsm& Surround(); // returns *this + Fsm Surrounded() const { Fsm copy(*this); copy.Surround(); return copy; } + + Fsm& operator += (const Fsm& rhs); ///< Concatenation + Fsm& operator |= (const Fsm& rhs); ///< Alternation + Fsm& operator &= (const Fsm& rhs); ///< Conjunction + Fsm& Iterate(); ///< Klene star + Fsm& Complement(); ///< Complementation + Fsm& operator *= (size_t count) { *this = *this * count; return *this; } + + Fsm operator + (const Fsm& rhs) const { Fsm a(*this); return a += rhs; } + Fsm operator | (const Fsm& rhs) const { Fsm a(*this); return a |= rhs; } + Fsm operator & (const Fsm& rhs) const { Fsm a(*this); return a &= rhs; } + Fsm operator * () const { Fsm a(*this); return a.Iterate(); } + Fsm operator ~ () const { Fsm a(*this); return a.Complement(); } Fsm operator * (size_t count) const; - - // === Raw FSM construction === - - /// Connects two states with given transition - void Connect(size_t from, size_t to, Char c = Epsilon); - - /// Removes given character from the specified transition. - void Disconnect(size_t from, size_t to, Char c); - - /// Completely removes given transition - void Disconnect(size_t from, size_t to); - + + // === Raw FSM construction === + + /// Connects two states with given transition + void Connect(size_t from, size_t to, Char c = Epsilon); + + /// Removes given character from the specified transition. + void Disconnect(size_t from, size_t to, Char c); + + /// Completely removes given transition + void Disconnect(size_t from, size_t to); + /// Creates an FSM which matches any prefix of any word current FSM matches. - void MakePrefix(); - - /// Creates an FSM which matches any suffix of any word current FSM matches. - void MakeSuffix(); - - /// Does the one way part of Surround(). - void PrependAnything(); - void AppendAnything(); - - /// Creates an FSM which matches reversed strings matched by current FSM. - Fsm& Reverse(); - - /// Returns a set of states from which no final states are reachable + void MakePrefix(); + + /// Creates an FSM which matches any suffix of any word current FSM matches. + void MakeSuffix(); + + /// Does the one way part of Surround(). + void PrependAnything(); + void AppendAnything(); + + /// Creates an FSM which matches reversed strings matched by current FSM. + Fsm& Reverse(); + + /// Returns a set of states from which no final states are reachable TSet<size_t> DeadStates() const; - - /// Removes all dead end paths from FSM - void RemoveDeadEnds(); - - /// Determines and minimizes the FSM if neccessary. Returns *this. - Fsm& Canonize(size_t maxSize = 0); - - template<class Scanner> + + /// Removes all dead end paths from FSM + void RemoveDeadEnds(); + + /// Determines and minimizes the FSM if neccessary. Returns *this. + Fsm& Canonize(size_t maxSize = 0); + + template<class Scanner> Scanner Compile(size_t distance = 0); - - void DumpState(yostream& s, size_t state) const; + + void DumpState(yostream& s, size_t state) const; void DumpTo(yostream& s, const ystring& name = "") const; - + typedef TSet<size_t> StatesSet; typedef TMap<size_t, StatesSet> TransitionRow; typedef TVector<TransitionRow> TransitionTable; - - struct LettersEquality { - LettersEquality(const Fsm::TransitionTable& tbl): m_tbl(&tbl) {} - bool operator()(Char a, Char b) const; - private: - const Fsm::TransitionTable* m_tbl; - }; - + + struct LettersEquality { + LettersEquality(const Fsm::TransitionTable& tbl): m_tbl(&tbl) {} + bool operator()(Char a, Char b) const; + private: + const Fsm::TransitionTable* m_tbl; + }; + typedef TSet<size_t> FinalTable; - typedef Partition<Char, LettersEquality> LettersTbl; - - - /* - * A very low level FSM building interface. - * - * It is generally unwise to call any of these functions unless you are building - * your own scanner, your own ecoding or exaclty know what you are doing. - */ - unsigned long Tag(size_t state) const { Tags::const_iterator i = tags.find(state); return (i == tags.end()) ? 0 : i->second; } - void SetTag(size_t state, unsigned long tag) { tags[state] = tag; } - - unsigned long Output(size_t from, size_t to) const; - void SetOutput(size_t from, size_t to, unsigned long output) { outputs[from][to] = output; } - void ClearOutputs() { outputs.clear(); } - - const FinalTable& Finals() const { return m_final; } - bool IsFinal(size_t state) const { return m_final.find(state) != m_final.end(); } - void SetFinal(size_t size, bool final); - void ClearFinal() { m_final.clear(); } - - /// Removes all espilon transitions from the FSM. Does not change the FSMs language. - void RemoveEpsilons(); - - /// Resize FSM to newSize states. Returns old size. - size_t Resize(size_t newSize); - - /// Imports foreign transition table - void Import(const Fsm& rhs); - - /// Connects all final state with given state - void ConnectFinal(size_t to, Char c = Epsilon); - - /// Diverts all transition between two given states to @p dest, preserving outputs - void Divert(size_t from, size_t to, size_t dest); - - /// Checks whether two states are connected using given letter. - bool Connected(size_t from, size_t to, Char c) const; - - /// Returns a set of letters on which a transition from the specified state exists + typedef Partition<Char, LettersEquality> LettersTbl; + + + /* + * A very low level FSM building interface. + * + * It is generally unwise to call any of these functions unless you are building + * your own scanner, your own ecoding or exaclty know what you are doing. + */ + unsigned long Tag(size_t state) const { Tags::const_iterator i = tags.find(state); return (i == tags.end()) ? 0 : i->second; } + void SetTag(size_t state, unsigned long tag) { tags[state] = tag; } + + unsigned long Output(size_t from, size_t to) const; + void SetOutput(size_t from, size_t to, unsigned long output) { outputs[from][to] = output; } + void ClearOutputs() { outputs.clear(); } + + const FinalTable& Finals() const { return m_final; } + bool IsFinal(size_t state) const { return m_final.find(state) != m_final.end(); } + void SetFinal(size_t size, bool final); + void ClearFinal() { m_final.clear(); } + + /// Removes all espilon transitions from the FSM. Does not change the FSMs language. + void RemoveEpsilons(); + + /// Resize FSM to newSize states. Returns old size. + size_t Resize(size_t newSize); + + /// Imports foreign transition table + void Import(const Fsm& rhs); + + /// Connects all final state with given state + void ConnectFinal(size_t to, Char c = Epsilon); + + /// Diverts all transition between two given states to @p dest, preserving outputs + void Divert(size_t from, size_t to, size_t dest); + + /// Checks whether two states are connected using given letter. + bool Connected(size_t from, size_t to, Char c) const; + + /// Returns a set of letters on which a transition from the specified state exists TSet<Char> OutgoingLetters(size_t state) const; - - /// Returns a set of states where a transition from the given state using the given letter is possible - const StatesSet& Destinations(size_t from, Char letter) const; - - /// Checks whether two states are connected using any letter. - bool Connected(size_t from, size_t to) const; - size_t Initial() const { return initial; } - void SetInitial(size_t init) { initial = init; } - - const LettersTbl& Letters() const { return letters; } - - /// Determines the FSM. - /// Breaks FSM invariant of having a single final state, so high-level FSM building - /// functions (i.e. Append(), operator+(), etc...) no longer can be applied to the FSM - /// until the invariants have been manually restored. - /// return value: successful? - bool Determine(size_t maxsize = 0); - bool IsDetermined() const { return determined; } - void SetIsDetermined(bool det) { determined = det; } - - /// Minimizes amount of states in the regexp. - /// Requires a determined FSM. - void Minimize(); - - - /// Builds letters equivalence classes + + /// Returns a set of states where a transition from the given state using the given letter is possible + const StatesSet& Destinations(size_t from, Char letter) const; + + /// Checks whether two states are connected using any letter. + bool Connected(size_t from, size_t to) const; + size_t Initial() const { return initial; } + void SetInitial(size_t init) { initial = init; } + + const LettersTbl& Letters() const { return letters; } + + /// Determines the FSM. + /// Breaks FSM invariant of having a single final state, so high-level FSM building + /// functions (i.e. Append(), operator+(), etc...) no longer can be applied to the FSM + /// until the invariants have been manually restored. + /// return value: successful? + bool Determine(size_t maxsize = 0); + bool IsDetermined() const { return determined; } + void SetIsDetermined(bool det) { determined = det; } + + /// Minimizes amount of states in the regexp. + /// Requires a determined FSM. + void Minimize(); + + + /// Builds letters equivalence classes void Sparse(bool needEpsilons = false); - - /// Unpacks all letters equivalence classs back into transitions table - void Unsparse(); - - private: - - /// Transitions table :: Q x V -> exp(Q) - TransitionTable m_transitions; - - /// Initial state - size_t initial; - - /// Final states. - FinalTable m_final; - - LettersTbl letters; - - /// Does 'letters' make sense? - bool m_sparsed; - - /// Is the FSM already determined? - bool determined; - - /// Output + + /// Unpacks all letters equivalence classs back into transitions table + void Unsparse(); + + private: + + /// Transitions table :: Q x V -> exp(Q) + TransitionTable m_transitions; + + /// Initial state + size_t initial; + + /// Final states. + FinalTable m_final; + + LettersTbl letters; + + /// Does 'letters' make sense? + bool m_sparsed; + + /// Is the FSM already determined? + bool determined; + + /// Output typedef TMap< size_t, TMap<size_t, unsigned long> > Outputs; - Outputs outputs; - + Outputs outputs; + typedef TMap<size_t, unsigned long> Tags; - Tags tags; - - /// Heuristics hit: true iff this FSM is a union of two other FSMs - bool isAlternative; - + Tags tags; + + /// Heuristics hit: true iff this FSM is a union of two other FSMs + bool isAlternative; + void ShortCutEpsilon(size_t from, size_t thru, TVector< TSet<size_t> >& inveps); ///< internal - void MergeEpsilonConnection(size_t from, size_t to); ///< internal - + void MergeEpsilonConnection(size_t from, size_t to); ///< internal + TSet<size_t> TerminalStates() const; - - Char Translate(Char c) const; - - void ClearHints() { isAlternative = false; } - - friend class Impl::FsmDetermineTask; + + Char Translate(Char c) const; + + void ClearHints() { isAlternative = false; } + + friend class Impl::FsmDetermineTask; friend class Impl::FsmMinimizeTask; friend class Impl::HalfFinalDetermineTask; - }; - - template<class Scanner> + }; + + template<class Scanner> void BuildScanner(const Fsm& fsm, Scanner& r) - { + { TSet<size_t> dead; - if (Scanner::DeadFlag) - dead = fsm.DeadStates(); - - for (size_t state = 0; state < fsm.Size(); ++state) - r.SetTag(state, typename Scanner::Tag(fsm.Tag(state) - | (fsm.IsFinal(state) ? Scanner::FinalFlag : 0) - | ((dead.find(state) != dead.end()) ? Scanner::DeadFlag : 0))); - - for (size_t from = 0; from != fsm.Size(); ++from) - for (Fsm::LettersTbl::ConstIterator lit = fsm.Letters().Begin(), lie = fsm.Letters().End(); lit != lie; ++lit) { - const Fsm::StatesSet& tos = fsm.Destinations(from, lit->first); - for (Fsm::StatesSet::const_iterator to = tos.begin(), toEnd = tos.end(); to != toEnd; ++to) - r.SetJump(from, lit->first, *to, r.RemapAction(fsm.Output(from, *to))); - } - - r.FinishBuild(); - } - - template<class Scanner> + if (Scanner::DeadFlag) + dead = fsm.DeadStates(); + + for (size_t state = 0; state < fsm.Size(); ++state) + r.SetTag(state, typename Scanner::Tag(fsm.Tag(state) + | (fsm.IsFinal(state) ? Scanner::FinalFlag : 0) + | ((dead.find(state) != dead.end()) ? Scanner::DeadFlag : 0))); + + for (size_t from = 0; from != fsm.Size(); ++from) + for (Fsm::LettersTbl::ConstIterator lit = fsm.Letters().Begin(), lie = fsm.Letters().End(); lit != lie; ++lit) { + const Fsm::StatesSet& tos = fsm.Destinations(from, lit->first); + for (Fsm::StatesSet::const_iterator to = tos.begin(), toEnd = tos.end(); to != toEnd; ++to) + r.SetJump(from, lit->first, *to, r.RemapAction(fsm.Output(from, *to))); + } + + r.FinishBuild(); + } + + template<class Scanner> inline Scanner Fsm::Compile(size_t distance) - { + { return Scanner(*this, distance); - } - - yostream& operator << (yostream&, const Fsm&); -} - -#endif + } + + yostream& operator << (yostream&, const Fsm&); +} + +#endif diff --git a/contrib/libs/pire/pire/fwd.h b/contrib/libs/pire/pire/fwd.h index c2b5870b05..aa6eb6b051 100644 --- a/contrib/libs/pire/pire/fwd.h +++ b/contrib/libs/pire/pire/fwd.h @@ -1,42 +1,42 @@ -/* - * fwd.h -- forward declarations of Pire classes - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * fwd.h -- forward declarations of Pire classes * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_FWD_H -#define PIRE_FWD_H - - -namespace Pire { - - class Scanner; - class MultiScanner; - class SlowScanner; - class CapturingScanner; - class CountingScanner; - - class Fsm; - - class Lexer; - class Encoding; -} - -#endif + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_FWD_H +#define PIRE_FWD_H + + +namespace Pire { + + class Scanner; + class MultiScanner; + class SlowScanner; + class CapturingScanner; + class CountingScanner; + + class Fsm; + + class Lexer; + class Encoding; +} + +#endif diff --git a/contrib/libs/pire/pire/glue.h b/contrib/libs/pire/pire/glue.h index bac086f2f0..fb34c6cfa8 100644 --- a/contrib/libs/pire/pire/glue.h +++ b/contrib/libs/pire/pire/glue.h @@ -1,166 +1,166 @@ -/* - * glue.h -- scanner agglutination task, which can be used as - * a parameter to Determine(). - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * glue.h -- scanner agglutination task, which can be used as + * a parameter to Determine(). * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_GLUE_H -#define PIRE_GLUE_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_GLUE_H +#define PIRE_GLUE_H + + #include <contrib/libs/pire/pire/stub/stl.h> -#include "partition.h" - -namespace Pire { -namespace Impl { - -template <class Scanner> -class LettersEquality: public ybinary_function<Char, Char, bool> { -public: - LettersEquality(typename Scanner::Letter* lhs, typename Scanner::Letter* rhs): m_lhs(lhs), m_rhs(rhs) {} - - bool operator()(Char a, Char b) const - { - return m_lhs[a] == m_lhs[b] && m_rhs[a] == m_rhs[b]; - } - -private: - typename Scanner::Letter* m_lhs; - typename Scanner::Letter* m_rhs; -}; - -// This lookup table is used instead of std::map. -// The key idea is to specify size which is a power of 2 in order to use >> and | instead of -// divisions and remainders. -// NB: it mimics limited std::map<> behaviour, hence stl-like method names and typedefs. -template <size_t N, class State> -class GluedStateLookupTable { -public: - static const size_t MaxSize = N; - typedef ypair<State, State> key_type; - typedef size_t mapped_type; - typedef ypair<key_type, mapped_type> value_type; - typedef value_type* iterator; - typedef const value_type* const_iterator; - - GluedStateLookupTable() - : mMap(new value_type[N]) - , mFilled(N, false) - {} - +#include "partition.h" + +namespace Pire { +namespace Impl { + +template <class Scanner> +class LettersEquality: public ybinary_function<Char, Char, bool> { +public: + LettersEquality(typename Scanner::Letter* lhs, typename Scanner::Letter* rhs): m_lhs(lhs), m_rhs(rhs) {} + + bool operator()(Char a, Char b) const + { + return m_lhs[a] == m_lhs[b] && m_rhs[a] == m_rhs[b]; + } + +private: + typename Scanner::Letter* m_lhs; + typename Scanner::Letter* m_rhs; +}; + +// This lookup table is used instead of std::map. +// The key idea is to specify size which is a power of 2 in order to use >> and | instead of +// divisions and remainders. +// NB: it mimics limited std::map<> behaviour, hence stl-like method names and typedefs. +template <size_t N, class State> +class GluedStateLookupTable { +public: + static const size_t MaxSize = N; + typedef ypair<State, State> key_type; + typedef size_t mapped_type; + typedef ypair<key_type, mapped_type> value_type; + typedef value_type* iterator; + typedef const value_type* const_iterator; + + GluedStateLookupTable() + : mMap(new value_type[N]) + , mFilled(N, false) + {} + ~GluedStateLookupTable() = default; - - const_iterator end() const { + + const_iterator end() const { return mMap.Get() + MaxSize; - } - // Note that in fact mMap is sparsed and traditional [begin,end) - // traversal is unavailable; hence no begin() method here. - // end() is only valid for comparing with find() result. - const_iterator find(const key_type& st) const { - size_t ind = Search(st); + } + // Note that in fact mMap is sparsed and traditional [begin,end) + // traversal is unavailable; hence no begin() method here. + // end() is only valid for comparing with find() result. + const_iterator find(const key_type& st) const { + size_t ind = Search(st); return mFilled[ind] ? (mMap.Get() + ind) : end(); - } - - ypair<iterator, bool> insert(const value_type& v) { - size_t ind = Search(v.first); - if (!mFilled[ind]) { + } + + ypair<iterator, bool> insert(const value_type& v) { + size_t ind = Search(v.first); + if (!mFilled[ind]) { mMap[ind] = v; - mFilled[ind] = true; + mFilled[ind] = true; return ymake_pair(mMap.Get() + ind, true); - } else + } else return ymake_pair(mMap.Get() + ind, false); - } - -private: - size_t Search(const key_type& st) const { - size_t startInd = (Hash(st) % N); - for (size_t ind = startInd; ind != (startInd + N - 1) % N; ind = (ind + 1) % N) { - if (!mFilled[ind] || mMap[ind].first == st) { - return ind; - } - } + } + +private: + size_t Search(const key_type& st) const { + size_t startInd = (Hash(st) % N); + for (size_t ind = startInd; ind != (startInd + N - 1) % N; ind = (ind + 1) % N) { + if (!mFilled[ind] || mMap[ind].first == st) { + return ind; + } + } return (size_t)-1; - } - - static size_t Hash(const key_type& st) { - return size_t((st.first >> 2) ^ (st.second >> 4) ^ (st.second << 10)); - } - + } + + static size_t Hash(const key_type& st) { + return size_t((st.first >> 2) ^ (st.second >> 4) ^ (st.second << 10)); + } + TArrayHolder<value_type> mMap; TVector<bool> mFilled; - - // Noncopyable - GluedStateLookupTable(const GluedStateLookupTable&); - GluedStateLookupTable& operator = (const GluedStateLookupTable&); -}; - -template<class Scanner> -class ScannerGlueCommon { -public: - typedef Partition< Char, Impl::LettersEquality<Scanner> > LettersTbl; - - typedef ypair<typename Scanner::InternalState, typename Scanner::InternalState> State; - ScannerGlueCommon(const Scanner& lhs, const Scanner& rhs, const LettersTbl& letters) - : m_lhs(lhs) - , m_rhs(rhs) - , m_letters(letters) - { - // Form a new letters partition - for (unsigned ch = 0; ch < MaxChar; ++ch) - if (ch != Epsilon) - m_letters.Append(ch); - } - - const LettersTbl& Letters() const { return m_letters; } - - const Scanner& Lhs() const { return m_lhs; } - const Scanner& Rhs() const { return m_rhs; } - - State Initial() const { return State(Lhs().m.initial, Rhs().m.initial); } - - State Next(State state, Char letter) const - { - Lhs().Next(state.first, letter); - Rhs().Next(state.second, letter); - return state; - } - - bool IsRequired(const State& /*state*/) const { return true; } - - typedef Scanner Result; - const Scanner& Success() const { return *m_result; } - Scanner Failure() const { return Scanner(); } - -protected: - Scanner& Sc() { return *m_result; } + + // Noncopyable + GluedStateLookupTable(const GluedStateLookupTable&); + GluedStateLookupTable& operator = (const GluedStateLookupTable&); +}; + +template<class Scanner> +class ScannerGlueCommon { +public: + typedef Partition< Char, Impl::LettersEquality<Scanner> > LettersTbl; + + typedef ypair<typename Scanner::InternalState, typename Scanner::InternalState> State; + ScannerGlueCommon(const Scanner& lhs, const Scanner& rhs, const LettersTbl& letters) + : m_lhs(lhs) + , m_rhs(rhs) + , m_letters(letters) + { + // Form a new letters partition + for (unsigned ch = 0; ch < MaxChar; ++ch) + if (ch != Epsilon) + m_letters.Append(ch); + } + + const LettersTbl& Letters() const { return m_letters; } + + const Scanner& Lhs() const { return m_lhs; } + const Scanner& Rhs() const { return m_rhs; } + + State Initial() const { return State(Lhs().m.initial, Rhs().m.initial); } + + State Next(State state, Char letter) const + { + Lhs().Next(state.first, letter); + Rhs().Next(state.second, letter); + return state; + } + + bool IsRequired(const State& /*state*/) const { return true; } + + typedef Scanner Result; + const Scanner& Success() const { return *m_result; } + Scanner Failure() const { return Scanner(); } + +protected: + Scanner& Sc() { return *m_result; } void SetSc(THolder<Scanner>&& sc) { m_result = std::move(sc); } - -private: - const Scanner& m_lhs; - const Scanner& m_rhs; - LettersTbl m_letters; + +private: + const Scanner& m_lhs; + const Scanner& m_rhs; + LettersTbl m_letters; THolder<Scanner> m_result; -}; - -} -} - -#endif +}; + +} +} + +#endif diff --git a/contrib/libs/pire/pire/inline.l b/contrib/libs/pire/pire/inline.l index a4d2e1a836..67f6d80584 100644 --- a/contrib/libs/pire/pire/inline.l +++ b/contrib/libs/pire/pire/inline.l @@ -1,31 +1,31 @@ -%{ // -*- mode: c++ -*- - -/* - * inline.lpp -- a tool for inlining Pire regexps into your C++ code - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#include <stdio.h> -#include <vector> -#include <string> -#include <stdexcept> +%{ // -*- mode: c++ -*- + +/* + * inline.lpp -- a tool for inlining Pire regexps into your C++ code + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#include <stdio.h> +#include <vector> +#include <string> +#include <stdexcept> #include <contrib/libs/pire/pire/stub/hacks.h> #include <contrib/libs/pire/pire/stub/lexical_cast.h> @@ -35,238 +35,238 @@ #include "pire.h" -ystring filename = ""; -int line = 1; +ystring filename = ""; +int line = 1; TVector<ystring> args; - -#ifdef _WIN32 -#if _MCS_VER >= 1600 -static int isatty(int) { return 0; } -#endif -#endif - -class Die { -public: - Die() { - Msg = filename.empty() ? "pire_inline" : (filename + ":" + ToString(line) + ":"); - } - - - template<class T> - Die& operator << (const T& t) { - Msg += ToString(t); - return *this; - } - - - ~Die() { - fprintf(stderr, "%s\n", Msg.c_str()); - exit(1); - } -private: - ystring Msg; -}; -Die DieHelper() { - return Die(); -} - -void putChar(char c) { putc(c, yyout); } -void suppressChar(char) {} -void eatComment(void (*action)(char)); - -#define YY_FATAL_ERROR(msg) DieHelper() << msg -%} -%x Regexp -%% - - -<INITIAL>"/*" { eatComment(putChar); } -<Regexp>"/*" { eatComment(suppressChar); } -<INITIAL>"//".*\n { ++line; fprintf(yyout, "%s", yytext); } -<Regexp>"//".*\n { ++line; } -"\""([^\"]|\\.)*"\"" { fprintf(yyout, "%s", yytext); } -\n { ++line; putc('\n', yyout); } - - -<INITIAL>"PIRE_REGEXP"[:space:]*"(" { BEGIN(Regexp); args.clear(); args.push_back(ystring()); } -<Regexp>"\""([^\"]|\\.)*"\"" { - ystring& s = args.back(); - const char* p; - for (p = yytext + 1; *p && p[1]; ++p) { - if (*p == '\\') { - ++p; - if (!*p) - Die() << "string ends with a backslash"; - else if (*p == '\'' || *p == '\"' || *p == '\\') - s.push_back(*p); - else if (*p == 'n') - s.push_back('\n'); - else if (*p == 't') - s.push_back('\t'); - else if (isdigit(*p)) { - const char* beg = p; - while (isdigit(*p)) - ++p; - s.push_back(strtol(ystring(beg, p).c_str(), 0, 8)); - } else if (*p == 'x') { - const char* beg = p; - while (isdigit(*p) || (*p > 'a' && *p <= 'f') || (*p > 'A' && *p < 'F')) - ++p; - s.push_back(strtol(ystring(beg, p).c_str(), 0, 16)); - } else - Die() << "unknown escape sequence (\\" << *p << ")"; - } else - s.push_back(*p); - } - if (!*p) - Die() << "string ends with a backslash"; -} -<Regexp>[ \t] {} -<Regexp>\n { ++line; } -<Regexp>"," { args.push_back(ystring()); } -<Regexp>")" { - - if (args.size() & 1 || args.empty()) - Die() << "Usage: PIRE_REGEXP(\"regexp1\", \"flags1\" [, \"regexp2\", \"flags2\" [,...] ])"; - - bool first = true; - Pire::Scanner sc; - ystring pattern; + +#ifdef _WIN32 +#if _MCS_VER >= 1600 +static int isatty(int) { return 0; } +#endif +#endif + +class Die { +public: + Die() { + Msg = filename.empty() ? "pire_inline" : (filename + ":" + ToString(line) + ":"); + } + + + template<class T> + Die& operator << (const T& t) { + Msg += ToString(t); + return *this; + } + + + ~Die() { + fprintf(stderr, "%s\n", Msg.c_str()); + exit(1); + } +private: + ystring Msg; +}; +Die DieHelper() { + return Die(); +} + +void putChar(char c) { putc(c, yyout); } +void suppressChar(char) {} +void eatComment(void (*action)(char)); + +#define YY_FATAL_ERROR(msg) DieHelper() << msg +%} +%x Regexp +%% + + +<INITIAL>"/*" { eatComment(putChar); } +<Regexp>"/*" { eatComment(suppressChar); } +<INITIAL>"//".*\n { ++line; fprintf(yyout, "%s", yytext); } +<Regexp>"//".*\n { ++line; } +"\""([^\"]|\\.)*"\"" { fprintf(yyout, "%s", yytext); } +\n { ++line; putc('\n', yyout); } + + +<INITIAL>"PIRE_REGEXP"[:space:]*"(" { BEGIN(Regexp); args.clear(); args.push_back(ystring()); } +<Regexp>"\""([^\"]|\\.)*"\"" { + ystring& s = args.back(); + const char* p; + for (p = yytext + 1; *p && p[1]; ++p) { + if (*p == '\\') { + ++p; + if (!*p) + Die() << "string ends with a backslash"; + else if (*p == '\'' || *p == '\"' || *p == '\\') + s.push_back(*p); + else if (*p == 'n') + s.push_back('\n'); + else if (*p == 't') + s.push_back('\t'); + else if (isdigit(*p)) { + const char* beg = p; + while (isdigit(*p)) + ++p; + s.push_back(strtol(ystring(beg, p).c_str(), 0, 8)); + } else if (*p == 'x') { + const char* beg = p; + while (isdigit(*p) || (*p > 'a' && *p <= 'f') || (*p > 'A' && *p < 'F')) + ++p; + s.push_back(strtol(ystring(beg, p).c_str(), 0, 16)); + } else + Die() << "unknown escape sequence (\\" << *p << ")"; + } else + s.push_back(*p); + } + if (!*p) + Die() << "string ends with a backslash"; +} +<Regexp>[ \t] {} +<Regexp>\n { ++line; } +<Regexp>"," { args.push_back(ystring()); } +<Regexp>")" { + + if (args.size() & 1 || args.empty()) + Die() << "Usage: PIRE_REGEXP(\"regexp1\", \"flags1\" [, \"regexp2\", \"flags2\" [,...] ])"; + + bool first = true; + Pire::Scanner sc; + ystring pattern; for (auto i = args.begin(), ie = args.end(); i != ie; i += 2) { - - Pire::Lexer lexer(i->c_str(), i->c_str() + i->size()); - bool surround = false; - bool greedy = false; + + Pire::Lexer lexer(i->c_str(), i->c_str() + i->size()); + bool surround = false; + bool greedy = false; bool reverse = false; - for (const char* option = (i+1)->c_str(); *option; ++option) { - if (*option == 'i') - lexer.AddFeature(Pire::Features::CaseInsensitive()); - else if (*option == 'u') - lexer.SetEncoding(Pire::Encodings::Utf8()); - else if (*option == 's') - surround = true; - else if (*option == 'a') - lexer.AddFeature(Pire::Features::AndNotSupport()); - else if (*option == 'g') - greedy = true; + for (const char* option = (i+1)->c_str(); *option; ++option) { + if (*option == 'i') + lexer.AddFeature(Pire::Features::CaseInsensitive()); + else if (*option == 'u') + lexer.SetEncoding(Pire::Encodings::Utf8()); + else if (*option == 's') + surround = true; + else if (*option == 'a') + lexer.AddFeature(Pire::Features::AndNotSupport()); + else if (*option == 'g') + greedy = true; else if (*option == 'r') reverse = true; - else - Die() << "unknown option " << *option << ""; - } - - Pire::Fsm fsm; - try { - fsm = lexer.Parse(); - } - catch (std::exception& e) { - Die() << "" << filename << ":" << line << ": " << e.what() << ""; - } + else + Die() << "unknown option " << *option << ""; + } + + Pire::Fsm fsm; + try { + fsm = lexer.Parse(); + } + catch (std::exception& e) { + Die() << "" << filename << ":" << line << ": " << e.what() << ""; + } if (reverse) fsm.Reverse(); - if (greedy && surround) - Die() << "greedy and surround options are incompatible"; - if (greedy) - fsm = ~fsm.Surrounded() + fsm; - else if (surround) - fsm.Surround(); - - Pire::Scanner tsc(fsm); - if (first) { - pattern = *i; - first = false; - tsc.Swap(sc); - } else { - sc = Pire::Scanner::Glue(sc, tsc); - pattern += " | "; - pattern += *i; - } - } - - BufferOutput buf; - AlignedOutput stream(&buf); - Save(&stream, sc); - - fprintf(yyout, "Pire::MmappedScanner<Pire::Scanner>(PIRE_LITERAL( // %s \n \"", pattern.c_str()); - size_t pos = 5; + if (greedy && surround) + Die() << "greedy and surround options are incompatible"; + if (greedy) + fsm = ~fsm.Surrounded() + fsm; + else if (surround) + fsm.Surround(); + + Pire::Scanner tsc(fsm); + if (first) { + pattern = *i; + first = false; + tsc.Swap(sc); + } else { + sc = Pire::Scanner::Glue(sc, tsc); + pattern += " | "; + pattern += *i; + } + } + + BufferOutput buf; + AlignedOutput stream(&buf); + Save(&stream, sc); + + fprintf(yyout, "Pire::MmappedScanner<Pire::Scanner>(PIRE_LITERAL( // %s \n \"", pattern.c_str()); + size_t pos = 5; for (auto i = buf.Buffer().Begin(), ie = buf.Buffer().End(); i != ie; ++i) { - pos += fprintf(yyout, "\\x%02X", static_cast<unsigned char>(*i)); - if (pos >= 78) { - fprintf(yyout, "\"\n \""); - pos = 5; - } - } - fprintf(yyout, "\"), %u)\n#line %d \"%s\"\n", - (unsigned int) buf.Buffer().Size(), line, filename.c_str()); - BEGIN(INITIAL); -} -<INITIAL>. { putc(*yytext, yyout); } - - - - -%% - -void eatComment(void (*action)(char)) -{ - int c; - action('/'); action('*'); - for (;;) { - while ((c = yyinput()) != EOF && c != '*') { - if (c == '\n') - ++line; - action(c); - } - if (c == '*') { - action(c); - while ((c = yyinput()) == '*') - action(c); - if (c == '/') { - action(c); - break; - } - } - if (c == EOF) - Die() << "EOF in comment"; - } -} - -int yywrap() { return 1; } - - -int main(int argc, char** argv) -{ - // Suppress warnings - static_cast<void>(&yy_fatal_error); - static_cast<void>(&yyunput); - - - try { - const char* outfile = 0; - if (argc >= 3 && !strcmp(argv[1], "-o")) { - outfile = argv[2]; - argv += 2, argc -= 2; - } - if (argc == 2) - filename = ystring(argv[1]); - else if (argc > 2) - Die() << "usage: pire_inline [-o outfile] [infile]"; - - yyin = stdin, yyout = stdout; - if (outfile && (yyout = fopen(outfile, "w")) == NULL) - Die() << "cannot open file " << outfile << " for writing"; - if (!filename.empty()) { - if ((yyin = fopen(filename.c_str(), "r")) == NULL) - Die() << "cannot open file " << filename.c_str() << "\n"; - } else - filename = "(stdin)"; - - - yylex(); - return 0; - } - catch (std::exception& e) { - fprintf(stderr, "%s\n", e.what()); - return 1; - } -} + pos += fprintf(yyout, "\\x%02X", static_cast<unsigned char>(*i)); + if (pos >= 78) { + fprintf(yyout, "\"\n \""); + pos = 5; + } + } + fprintf(yyout, "\"), %u)\n#line %d \"%s\"\n", + (unsigned int) buf.Buffer().Size(), line, filename.c_str()); + BEGIN(INITIAL); +} +<INITIAL>. { putc(*yytext, yyout); } + + + + +%% + +void eatComment(void (*action)(char)) +{ + int c; + action('/'); action('*'); + for (;;) { + while ((c = yyinput()) != EOF && c != '*') { + if (c == '\n') + ++line; + action(c); + } + if (c == '*') { + action(c); + while ((c = yyinput()) == '*') + action(c); + if (c == '/') { + action(c); + break; + } + } + if (c == EOF) + Die() << "EOF in comment"; + } +} + +int yywrap() { return 1; } + + +int main(int argc, char** argv) +{ + // Suppress warnings + static_cast<void>(&yy_fatal_error); + static_cast<void>(&yyunput); + + + try { + const char* outfile = 0; + if (argc >= 3 && !strcmp(argv[1], "-o")) { + outfile = argv[2]; + argv += 2, argc -= 2; + } + if (argc == 2) + filename = ystring(argv[1]); + else if (argc > 2) + Die() << "usage: pire_inline [-o outfile] [infile]"; + + yyin = stdin, yyout = stdout; + if (outfile && (yyout = fopen(outfile, "w")) == NULL) + Die() << "cannot open file " << outfile << " for writing"; + if (!filename.empty()) { + if ((yyin = fopen(filename.c_str(), "r")) == NULL) + Die() << "cannot open file " << filename.c_str() << "\n"; + } else + filename = "(stdin)"; + + + yylex(); + return 0; + } + catch (std::exception& e) { + fprintf(stderr, "%s\n", e.what()); + return 1; + } +} diff --git a/contrib/libs/pire/pire/partition.h b/contrib/libs/pire/pire/partition.h index 85a9af8863..c41cf5c335 100644 --- a/contrib/libs/pire/pire/partition.h +++ b/contrib/libs/pire/pire/partition.h @@ -1,193 +1,193 @@ -/* - * partition.h -- a disjoint set of pairwise equivalent items - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * partition.h -- a disjoint set of pairwise equivalent items * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_PARTITION_H -#define PIRE_PARTITION_H - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_PARTITION_H +#define PIRE_PARTITION_H + + #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/singleton.h> - -namespace Pire { - -/* -* A class which forms a disjoint set of pairwise equivalent items, -* depending on given equivalence relation. -*/ -template<class T, class Eq> -class Partition { -private: + +namespace Pire { + +/* +* A class which forms a disjoint set of pairwise equivalent items, +* depending on given equivalence relation. +*/ +template<class T, class Eq> +class Partition { +private: typedef TMap< T, ypair< size_t, TVector<T> > > Set; - -public: - Partition(const Eq& eq) - : m_eq(eq) - , m_maxidx(0) - { - } - - /// Appends a new item into partition, creating new equivalience class if neccessary. - void Append(const T& t) { - DoAppend(m_set, t); - } - - typedef typename Set::const_iterator ConstIterator; - - ConstIterator Begin() const { - return m_set.begin(); - } + +public: + Partition(const Eq& eq) + : m_eq(eq) + , m_maxidx(0) + { + } + + /// Appends a new item into partition, creating new equivalience class if neccessary. + void Append(const T& t) { + DoAppend(m_set, t); + } + + typedef typename Set::const_iterator ConstIterator; + + ConstIterator Begin() const { + return m_set.begin(); + } ConstIterator begin() const { return m_set.begin(); } - ConstIterator End() const { - return m_set.end(); - } + ConstIterator End() const { + return m_set.end(); + } ConstIterator end() const { return m_set.end(); } - size_t Size() const { - return m_set.size(); - } - bool Empty() const { - return m_set.empty(); - } - - /// Returns an item equal to @p t. It is guaranteed that: - /// - representative(a) equals representative(b) iff a is equivalent to b; - /// - representative(a) is equivalent to a. - const T& Representative(const T& t) const - { + size_t Size() const { + return m_set.size(); + } + bool Empty() const { + return m_set.empty(); + } + + /// Returns an item equal to @p t. It is guaranteed that: + /// - representative(a) equals representative(b) iff a is equivalent to b; + /// - representative(a) is equivalent to a. + const T& Representative(const T& t) const + { auto it = m_inv.find(t); - if (it != m_inv.end()) - return it->second; - else - return DefaultValue<T>(); - } - - bool Contains(const T& t) const - { - return m_inv.find(t) != m_inv.end(); - } - - /// Returns an index of set containing @p t. It is guaranteed that: - /// - index(a) equals index(b) iff a is equivalent to b; - /// - 0 <= index(a) < size(). - size_t Index(const T& t) const - { + if (it != m_inv.end()) + return it->second; + else + return DefaultValue<T>(); + } + + bool Contains(const T& t) const + { + return m_inv.find(t) != m_inv.end(); + } + + /// Returns an index of set containing @p t. It is guaranteed that: + /// - index(a) equals index(b) iff a is equivalent to b; + /// - 0 <= index(a) < size(). + size_t Index(const T& t) const + { auto it = m_inv.find(t); - if (it == m_inv.end()) - throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); + if (it == m_inv.end()) + throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); auto it2 = m_set.find(it->second); Y_ASSERT(it2 != m_set.end()); - return it2->second.first; - } - /// Returns the whole equivalence class of @p t (i.e. item @p i - /// is returned iff representative(i) == representative(t)). + return it2->second.first; + } + /// Returns the whole equivalence class of @p t (i.e. item @p i + /// is returned iff representative(i) == representative(t)). const TVector<T>& Klass(const T& t) const - { + { auto it = m_inv.find(t); - if (it == m_inv.end()) - throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); + if (it == m_inv.end()) + throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); auto it2 = m_set.find(it->second); Y_ASSERT(it2 != m_set.end()); - return it2->second.second; - } - - bool operator == (const Partition& rhs) const { return m_set == rhs.m_set; } - bool operator != (const Partition& rhs) const { return !(*this == rhs); } - - /// Splits the current sets into smaller ones, using given equivalence relation. - /// Requires given relation imply previous one (set either in ctor or - /// in preceeding calls to split()), but performs faster. - /// Replaces previous relation with given one. - void Split(const Eq& eq) - { - m_eq = eq; - + return it2->second.second; + } + + bool operator == (const Partition& rhs) const { return m_set == rhs.m_set; } + bool operator != (const Partition& rhs) const { return !(*this == rhs); } + + /// Splits the current sets into smaller ones, using given equivalence relation. + /// Requires given relation imply previous one (set either in ctor or + /// in preceeding calls to split()), but performs faster. + /// Replaces previous relation with given one. + void Split(const Eq& eq) + { + m_eq = eq; + for (auto&& element : m_set) if (element.second.second.size() > 1) { TVector<T>& v = element.second.second; auto bound = std::partition(v.begin(), v.end(), std::bind2nd(m_eq, v[0])); - if (bound == v.end()) - continue; - - Set delta; + if (bound == v.end()) + continue; + + Set delta; for (auto it = bound, ie = v.end(); it != ie; ++it) - DoAppend(delta, *it); - - v.erase(bound, v.end()); - m_set.insert(delta.begin(), delta.end()); - } - } - -private: - Eq m_eq; - Set m_set; + DoAppend(delta, *it); + + v.erase(bound, v.end()); + m_set.insert(delta.begin(), delta.end()); + } + } + +private: + Eq m_eq; + Set m_set; TMap<T, T> m_inv; - size_t m_maxidx; - - void DoAppend(Set& set, const T& t) - { + size_t m_maxidx; + + void DoAppend(Set& set, const T& t) + { auto it = set.begin(); auto end = set.end(); - for (; it != end; ++it) - if (m_eq(it->first, t)) { - it->second.second.push_back(t); - m_inv[t] = it->first; - break; - } - - if (it == end) { - // Begin new set + for (; it != end; ++it) + if (m_eq(it->first, t)) { + it->second.second.push_back(t); + m_inv[t] = it->first; + break; + } + + if (it == end) { + // Begin new set TVector<T> v(1, t); - set.insert(ymake_pair(t, ymake_pair(m_maxidx++, v))); - m_inv[t] = t; - } - } -}; - -// Mainly for debugging -template<class T, class Eq> -yostream& operator << (yostream& stream, const Partition<T, Eq>& partition) -{ - stream << "Partition {\n"; + set.insert(ymake_pair(t, ymake_pair(m_maxidx++, v))); + m_inv[t] = t; + } + } +}; + +// Mainly for debugging +template<class T, class Eq> +yostream& operator << (yostream& stream, const Partition<T, Eq>& partition) +{ + stream << "Partition {\n"; for (auto&& partitionElement : partition) { stream << " Class " << partitionElement.second.first << " \"" << partitionElement.first << "\" { "; - bool first = false; + bool first = false; for (auto&& element : partitionElement.second.second) { - if (first) - stream << ", "; - else - first = true; + if (first) + stream << ", "; + else + first = true; stream << element; - } - stream << " }\n"; - } - stream << "}"; - return stream; -} - -} - - -#endif + } + stream << " }\n"; + } + stream << "}"; + return stream; +} + +} + + +#endif diff --git a/contrib/libs/pire/pire/pire.h b/contrib/libs/pire/pire/pire.h index 12eb84ccb6..d4d3acd92d 100644 --- a/contrib/libs/pire/pire/pire.h +++ b/contrib/libs/pire/pire/pire.h @@ -1,38 +1,38 @@ -/* - * pire.h -- a single include file for end-users - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * pire.h -- a single include file for end-users * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_PIRE_H -#define PIRE_PIRE_H - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_PIRE_H +#define PIRE_PIRE_H + #include <contrib/libs/pire/pire/scanners/multi.h> #include <contrib/libs/pire/pire/scanners/half_final.h> #include <contrib/libs/pire/pire/scanners/simple.h> #include <contrib/libs/pire/pire/scanners/slow.h> #include <contrib/libs/pire/pire/scanners/pair.h> - -#include "re_lexer.h" -#include "fsm.h" -#include "encoding.h" -#include "run.h" - -#endif + +#include "re_lexer.h" +#include "fsm.h" +#include "encoding.h" +#include "run.h" + +#endif diff --git a/contrib/libs/pire/pire/platform.h b/contrib/libs/pire/pire/platform.h index 54ded6b387..c0504b7ce3 100644 --- a/contrib/libs/pire/pire/platform.h +++ b/contrib/libs/pire/pire/platform.h @@ -1,47 +1,47 @@ -/* - * platform.h -- hardware and OS specific stuff - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#ifndef PIRE_PLATFORM_H_INCLUDED -#define PIRE_PLATFORM_H_INCLUDED - +/* + * platform.h -- hardware and OS specific stuff + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#ifndef PIRE_PLATFORM_H_INCLUDED +#define PIRE_PLATFORM_H_INCLUDED + #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/static_assert.h> - + #ifndef PIRE_FORCED_INLINE -#ifdef __GNUC__ +#ifdef __GNUC__ #define PIRE_FORCED_INLINE inline __attribute__((__always_inline__)) -#elif _MSC_VER +#elif _MSC_VER #define PIRE_FORCED_INLINE __forceinline -#else +#else #define PIRE_FORCED_INLINE inline -#endif -#endif - -#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) -#define PIRE_HOT_FUNCTION __attribute__ ((hot)) -#else -#define PIRE_HOT_FUNCTION -#endif - +#endif +#endif + +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) +#define PIRE_HOT_FUNCTION __attribute__ ((hot)) +#else +#define PIRE_HOT_FUNCTION +#endif + #ifndef PIRE_LIKELY #ifdef __GNUC__ #define PIRE_LIKELY(x) (__builtin_expect((x), 1)) @@ -58,27 +58,27 @@ #endif #endif -#ifdef _MSC_VER -#include <stdio.h> -#include <stdarg.h> - -namespace Pire { - -#if _MCS_VER >= 1600 -#ifdef _WIN64 -typedef i64 ssize_t; -#else -typedef i32 ssize_t; -#endif -#endif - -inline int snprintf(char *str, size_t size, const char *format, ...) -{ +#ifdef _MSC_VER +#include <stdio.h> +#include <stdarg.h> + +namespace Pire { + +#if _MCS_VER >= 1600 +#ifdef _WIN64 +typedef i64 ssize_t; +#else +typedef i32 ssize_t; +#endif +#endif + +inline int snprintf(char *str, size_t size, const char *format, ...) +{ va_list argptr; va_start(argptr, format); int i = _vsnprintf(str, size-1, format, argptr); va_end(argptr); - + // A workaround for some bug if (i < 0) { str[size - 1] = '\x00'; @@ -87,14 +87,14 @@ inline int snprintf(char *str, size_t size, const char *format, ...) str[i] = '\x00'; } return i; -} - -} -#endif - -namespace Pire { -namespace Impl { - +} + +} +#endif + +namespace Pire { +namespace Impl { + // A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings. template<unsigned Pos, unsigned char Byte> struct DoGenerateConst { @@ -112,11 +112,11 @@ struct GenerateConst { }; -// Common implementation of mask comparison logic suitable for -// any instruction set -struct BasicInstructionSet { +// Common implementation of mask comparison logic suitable for +// any instruction set +struct BasicInstructionSet { typedef size_t Vector; - + // Check bytes in the chunk against bytes in the mask static inline Vector CheckBytes(Vector mask, Vector chunk) { @@ -125,66 +125,66 @@ struct BasicInstructionSet { size_t mc = chunk ^ mask; return ((mc - mask0x01) & ~mc & mask0x80); } - + static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); } - + static inline bool IsAnySet(Vector mask) { return (mask != 0); } -}; - -}} - -#if defined(__SSE2__) -#include <emmintrin.h> - -namespace Pire { -namespace Impl { - -// SSE2-optimized mask comparison logic -struct AvailSSE2 { +}; + +}} + +#if defined(__SSE2__) +#include <emmintrin.h> + +namespace Pire { +namespace Impl { + +// SSE2-optimized mask comparison logic +struct AvailSSE2 { typedef __m128i Vector; - + static inline Vector CheckBytes(Vector mask, Vector chunk) { return _mm_cmpeq_epi8(mask, chunk); } - + static inline Vector Or(Vector mask1, Vector mask2) { return _mm_or_si128(mask1, mask2); } - + static inline bool IsAnySet(Vector mask) { return _mm_movemask_epi8(mask); } -}; - -typedef AvailSSE2 AvailInstructionSet; - -inline AvailSSE2::Vector ToLittleEndian(AvailSSE2::Vector x) { return x; } - -}} - -#elif defined(__MMX__) -#include <mmintrin.h> - -namespace Pire { -namespace Impl { - -// MMX-optimized mask comparison logic -struct AvailMMX { +}; + +typedef AvailSSE2 AvailInstructionSet; + +inline AvailSSE2::Vector ToLittleEndian(AvailSSE2::Vector x) { return x; } + +}} + +#elif defined(__MMX__) +#include <mmintrin.h> + +namespace Pire { +namespace Impl { + +// MMX-optimized mask comparison logic +struct AvailMMX { typedef __m64 Vector; - + static inline Vector CheckBytes(Vector mask, Vector chunk) { return _mm_cmpeq_pi8(mask, chunk); } - + static inline Vector Or(Vector mask1, Vector mask2) { return _mm_or_si64(mask1, mask2); } - + static inline bool IsAnySet(Vector mask) { union { @@ -194,68 +194,68 @@ struct AvailMMX { mmxMask = mask; return ui64Mask; } -}; - -typedef AvailMMX AvailInstructionSet; - -inline AvailMMX::Vector ToLittleEndian(AvailMMX::Vector x) { return x; } - -}} - -#else // no SSE and MMX - -namespace Pire { -namespace Impl { - -typedef BasicInstructionSet AvailInstructionSet; - -}} - -#endif - -namespace Pire { -namespace Impl { - -typedef AvailInstructionSet::Vector Word; - -inline Word CheckBytes(Word mask, Word chunk) { return AvailInstructionSet::CheckBytes(mask, chunk); } - -inline Word Or(Word mask1, Word mask2) { return AvailInstructionSet::Or(mask1, mask2); } - -inline bool IsAnySet(Word mask) { return AvailInstructionSet::IsAnySet(mask); } - -// MaxSizeWord type is largest integer type supported by the plaform including -// all possible SSE extensions that are are known for this platform (even if these -// extensions are not available at compile time) -// It is used for alignments and save/load data structures to produce data format -// compatible between all platforms with the same endianness and pointer size -template <size_t Size> struct MaxWordSizeHelper; - -// Maximum size of SSE register is 128 bit on x86 and x86_64 -template <> -struct MaxWordSizeHelper<16> { +}; + +typedef AvailMMX AvailInstructionSet; + +inline AvailMMX::Vector ToLittleEndian(AvailMMX::Vector x) { return x; } + +}} + +#else // no SSE and MMX + +namespace Pire { +namespace Impl { + +typedef BasicInstructionSet AvailInstructionSet; + +}} + +#endif + +namespace Pire { +namespace Impl { + +typedef AvailInstructionSet::Vector Word; + +inline Word CheckBytes(Word mask, Word chunk) { return AvailInstructionSet::CheckBytes(mask, chunk); } + +inline Word Or(Word mask1, Word mask2) { return AvailInstructionSet::Or(mask1, mask2); } + +inline bool IsAnySet(Word mask) { return AvailInstructionSet::IsAnySet(mask); } + +// MaxSizeWord type is largest integer type supported by the plaform including +// all possible SSE extensions that are are known for this platform (even if these +// extensions are not available at compile time) +// It is used for alignments and save/load data structures to produce data format +// compatible between all platforms with the same endianness and pointer size +template <size_t Size> struct MaxWordSizeHelper; + +// Maximum size of SSE register is 128 bit on x86 and x86_64 +template <> +struct MaxWordSizeHelper<16> { struct MaxSizeWord { char val[16]; }; -}; - -typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord; - -// MaxSizeWord size should be a multiple of size_t size and a multipe of Word size -PIRE_STATIC_ASSERT( +}; + +typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord; + +// MaxSizeWord size should be a multiple of size_t size and a multipe of Word size +PIRE_STATIC_ASSERT( (sizeof(MaxSizeWord) % sizeof(size_t) == 0) && (sizeof(MaxSizeWord) % sizeof(Word) == 0)); - -inline size_t FillSizeT(char c) -{ + +inline size_t FillSizeT(char c) +{ size_t w = c; w &= 0x0ff; for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1) w = (w << i) | w; return w; -} - -}} - -#endif - +} + +}} + +#endif + diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp index 132fbeb039..c2258dd759 100644 --- a/contrib/libs/pire/pire/re_lexer.cpp +++ b/contrib/libs/pire/pire/re_lexer.cpp @@ -1,28 +1,28 @@ -/* - * re_lexer.cpp -- implementation of Lexer class +/* + * re_lexer.cpp -- implementation of Lexer class + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <ctype.h> -#include <stdexcept> + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <ctype.h> +#include <stdexcept> #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/utf8.h> @@ -32,24 +32,24 @@ #include "re_lexer.h" #include "re_parser.h" #include "read_unicode.h" + - -namespace Pire { - -namespace Impl { +namespace Pire { + +namespace Impl { int yre_parse(Pire::Lexer& lexer); -} - -Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); } -Term Term::Repetition(int lower, int upper) { return Term(TokenTypes::Count, RepetitionCount(lower, upper)); } -Term Term::Dot() { return Term(TokenTypes::Dot, DotTag()); } -Term Term::BeginMark() { return Term(TokenTypes::BeginMark, BeginTag()); } -Term Term::EndMark() { return Term(TokenTypes::EndMark, EndTag()); } - +} + +Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); } +Term Term::Repetition(int lower, int upper) { return Term(TokenTypes::Count, RepetitionCount(lower, upper)); } +Term Term::Dot() { return Term(TokenTypes::Dot, DotTag()); } +Term Term::BeginMark() { return Term(TokenTypes::BeginMark, BeginTag()); } +Term Term::EndMark() { return Term(TokenTypes::EndMark, EndTag()); } + Lexer::~Lexer() = default; - -wchar32 Lexer::GetChar() -{ + +wchar32 Lexer::GetChar() +{ if (m_input.empty()) return End; else if (m_input.front() == '\\') { @@ -64,23 +64,23 @@ wchar32 Lexer::GetChar() m_input.pop_front(); return ch; } -} - -wchar32 Lexer::PeekChar() -{ +} + +wchar32 Lexer::PeekChar() +{ if (m_input.empty()) return End; else return m_input.front(); -} - -void Lexer::UngetChar(wchar32 c) -{ +} + +void Lexer::UngetChar(wchar32 c) +{ if (c != End) m_input.push_front(c); -} - -namespace { +} + +namespace { class CompareFeaturesByPriority: public ybinary_function<const Feature::Ptr&, const Feature::Ptr&, bool> { public: bool operator()(const Feature::Ptr& a, const Feature::Ptr& b) const @@ -88,15 +88,15 @@ namespace { return a->Priority() < b->Priority(); } }; -} - +} + Lexer& Lexer::AddFeature(Feature::Ptr& feature) -{ +{ feature->m_lexer = this; m_features.insert(LowerBound(m_features.begin(), m_features.end(), feature, CompareFeaturesByPriority()), std::move(feature)); return *this; -} - +} + Lexer& Lexer::AddFeature(Feature::Ptr&& feature) { feature->m_lexer = this; @@ -104,8 +104,8 @@ Lexer& Lexer::AddFeature(Feature::Ptr&& feature) return *this; } -Term Lexer::DoLex() -{ +Term Lexer::DoLex() +{ static const char* controls = "|().*+?^$\\"; for (;;) { UngetChar(GetChar()); @@ -120,7 +120,7 @@ Term Lexer::DoLex() } } ch = GetChar(); - + if (ch == '|') return Term(TokenTypes::Or); else if (ch == '(') { @@ -144,15 +144,15 @@ Term Lexer::DoLex() else return Term::Character(ch); } -} - -Term Lexer::Lex() -{ +} + +Term Lexer::Lex() +{ Term t = DoLex(); - + for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i) (*i)->Alter(t); - + if (t.Value().IsA<Term::CharacterRange>()) { const auto& chars = t.Value().As<Term::CharacterRange>(); //std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl; @@ -186,25 +186,25 @@ Term Lexer::Lex() else if (type == TokenTypes::End) type = 0; return Term(type, t.Value()); -} +} -void Lexer::Parenthesized(Fsm& fsm) -{ +void Lexer::Parenthesized(Fsm& fsm) +{ for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i) (*i)->Parenthesized(fsm); -} - -wchar32 Feature::CorrectChar(wchar32 c, const char* controls) -{ +} + +wchar32 Feature::CorrectChar(wchar32 c, const char* controls) +{ bool ctrl = (strchr(controls, c & 0xFF) != 0); if ((c & ControlMask) == Control && ctrl) return c & ~ControlMask; if (c <= 0xFF && ctrl) return c | Control; return c; -} - -namespace { +} + +namespace { class EnableUnicodeSequencesImpl : public UnicodeReader { public: bool Accepts(wchar32 c) const { @@ -219,7 +219,7 @@ namespace { class CharacterRangeReader: public UnicodeReader { public: bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); } - + Term Lex() { static const char* controls = "^[]-\\"; @@ -227,14 +227,14 @@ namespace { wchar32 ch = CorrectChar(GetChar(), controls); if (ch == '[' || ch == ']') return Term::Character(ch); - + Term::CharacterRange cs; ch = CorrectChar(GetChar(), controls); if (ch == (Control | '^')) { cs.second = true; ch = CorrectChar(GetChar(), controls); } - + bool firstUnicode; wchar32 unicodeSymbol = 0; @@ -281,15 +281,15 @@ namespace { } if (ch == End) Error("Unexpected end of pattern"); - + return Term(TokenTypes::Letters, cs); } }; - + class RepetitionCountReader: public Feature { public: bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); } - + Term Lex() { wchar32 ch = GetChar(); @@ -297,17 +297,17 @@ namespace { return Term::Character(ch & ~ControlMask); ch = GetChar(); int lower = 0, upper = 0; - + if (!is_digit(ch)) Error("Wrong repetition count"); - + for (; is_digit(ch); ch = GetChar()) lower = lower * 10 + (ch - '0'); if (ch == '}') return Term::Repetition(lower, lower); else if (ch != ',') Error("Wrong repetition count"); - + ch = GetChar(); if (ch == '}') return Term::Repetition(lower, Inf); @@ -315,13 +315,13 @@ namespace { Error("Wrong repetition count"); for (; is_digit(ch); ch = GetChar()) upper = upper * 10 + (ch - '0'); - + if (ch != '}') Error("Wrong repetition count"); return Term::Repetition(lower, upper); } }; - + class CaseInsensitiveImpl: public Feature { public: void Alter(Term& t) @@ -363,30 +363,30 @@ namespace { } } }; -} - -namespace Features { +} + +namespace Features { Feature::Ptr CaseInsensitive() { return Feature::Ptr(new CaseInsensitiveImpl); } Feature::Ptr CharClasses(); Feature::Ptr AndNotSupport() { return Feature::Ptr(new AndNotSupportImpl); } -}; - -void Lexer::InstallDefaultFeatures() -{ +}; + +void Lexer::InstallDefaultFeatures() +{ AddFeature(Feature::Ptr(new CharacterRangeReader)); AddFeature(Feature::Ptr(new RepetitionCountReader)); AddFeature(Features::CharClasses()); AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); -} - -Fsm Lexer::Parse() -{ +} + +Fsm Lexer::Parse() +{ if (!Impl::yre_parse(*this)) return m_retval.As<Fsm>(); else { Error("Syntax error in regexp"); return Fsm(); // Make compiler happy } -} - -} +} + +} diff --git a/contrib/libs/pire/pire/re_lexer.h b/contrib/libs/pire/pire/re_lexer.h index 5591c16d34..e397a38d5c 100644 --- a/contrib/libs/pire/pire/re_lexer.h +++ b/contrib/libs/pire/pire/re_lexer.h @@ -1,244 +1,244 @@ -/* - * re_lexer.h -- definition required for parsing regexps - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * re_lexer.h -- definition required for parsing regexps * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_RE_LEXER_H -#define PIRE_RE_LEXER_H - - -#include <vector> -#include <stack> -#include <set> -#include <utility> -#include <stdexcept> -#include <utility> -#include <string.h> + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_RE_LEXER_H +#define PIRE_RE_LEXER_H + + +#include <vector> +#include <stack> +#include <set> +#include <utility> +#include <stdexcept> +#include <utility> +#include <string.h> #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/stub/stl.h> - + #include "encoding.h" #include "any.h" -namespace Pire { - -namespace Consts { -enum { Inf = -1 }; - -static const wchar32 Control = 0xF000; -static const wchar32 ControlMask = 0xFF00; -static const wchar32 End = Control | 0xFF; -}; - -using namespace Consts; - -namespace TokenTypes { -enum { - None = 0, - Letters, - Count, - Dot, - Open, - Close, - Or, - And, - Not, - BeginMark, - EndMark, - End -}; -} - -/** -* A single terminal character in regexp pattern. -* Consists of a type (a character, a repetition count, an opening parenthesis, etc...) -* and optional value. -*/ -class Term { -public: +namespace Pire { + +namespace Consts { +enum { Inf = -1 }; + +static const wchar32 Control = 0xF000; +static const wchar32 ControlMask = 0xFF00; +static const wchar32 End = Control | 0xFF; +}; + +using namespace Consts; + +namespace TokenTypes { +enum { + None = 0, + Letters, + Count, + Dot, + Open, + Close, + Or, + And, + Not, + BeginMark, + EndMark, + End +}; +} + +/** +* A single terminal character in regexp pattern. +* Consists of a type (a character, a repetition count, an opening parenthesis, etc...) +* and optional value. +*/ +class Term { +public: typedef TVector<wchar32> String; typedef TSet<String> Strings; - - typedef ypair<int, int> RepetitionCount; - typedef ypair<Strings, bool> CharacterRange; - - struct DotTag {}; - struct BeginTag {}; - struct EndTag {}; - - Term(int type): m_type(type) {} - template<class T> Term(int type, T t): m_type(type), m_value(t) {} - Term(int type, const Any& value): m_type(type), m_value(value) {} - - static Term Character(wchar32 c); - static Term Repetition(int lower, int upper); - static Term Dot(); - static Term BeginMark(); - static Term EndMark(); - - int Type() const { return m_type; } - const Any& Value() const { return m_value; } -private: - int m_type; - Any m_value; -}; - -class Feature; - -/** -* A class performing regexp pattern parsing. -*/ -class Lexer { -public: - // One-size-fits-all constructor set. - Lexer() - : m_encoding(&Encodings::Latin1()) - { InstallDefaultFeatures(); } - - explicit Lexer(const char* str) - : m_encoding(&Encodings::Latin1()) - { - InstallDefaultFeatures(); - Assign(str, str + strlen(str)); - } - template<class T> explicit Lexer(const T& t) - : m_encoding(&Encodings::Latin1()) - { - InstallDefaultFeatures(); - Assign(t.begin(), t.end()); - } - - template<class Iter> Lexer(Iter begin, Iter end) - : m_encoding(&Encodings::Latin1()) - { - InstallDefaultFeatures(); - Assign(begin, end); - } - ~Lexer(); - - template<class Iter> void Assign(Iter begin, Iter end) - { - m_input.clear(); - std::copy(begin, end, std::back_inserter(m_input)); - } - - /// The main lexer function. Extracts and returns the next term in input sequence. - Term Lex(); - /// Installs an additional lexer feature. + + typedef ypair<int, int> RepetitionCount; + typedef ypair<Strings, bool> CharacterRange; + + struct DotTag {}; + struct BeginTag {}; + struct EndTag {}; + + Term(int type): m_type(type) {} + template<class T> Term(int type, T t): m_type(type), m_value(t) {} + Term(int type, const Any& value): m_type(type), m_value(value) {} + + static Term Character(wchar32 c); + static Term Repetition(int lower, int upper); + static Term Dot(); + static Term BeginMark(); + static Term EndMark(); + + int Type() const { return m_type; } + const Any& Value() const { return m_value; } +private: + int m_type; + Any m_value; +}; + +class Feature; + +/** +* A class performing regexp pattern parsing. +*/ +class Lexer { +public: + // One-size-fits-all constructor set. + Lexer() + : m_encoding(&Encodings::Latin1()) + { InstallDefaultFeatures(); } + + explicit Lexer(const char* str) + : m_encoding(&Encodings::Latin1()) + { + InstallDefaultFeatures(); + Assign(str, str + strlen(str)); + } + template<class T> explicit Lexer(const T& t) + : m_encoding(&Encodings::Latin1()) + { + InstallDefaultFeatures(); + Assign(t.begin(), t.end()); + } + + template<class Iter> Lexer(Iter begin, Iter end) + : m_encoding(&Encodings::Latin1()) + { + InstallDefaultFeatures(); + Assign(begin, end); + } + ~Lexer(); + + template<class Iter> void Assign(Iter begin, Iter end) + { + m_input.clear(); + std::copy(begin, end, std::back_inserter(m_input)); + } + + /// The main lexer function. Extracts and returns the next term in input sequence. + Term Lex(); + /// Installs an additional lexer feature. /// We declare both lvalue and rvalue reference types to fix some linker errors. Lexer& AddFeature(THolder<Feature>& a); Lexer& AddFeature(THolder<Feature>&& a); - - const Pire::Encoding& Encoding() const { return *m_encoding; } - Lexer& SetEncoding(const Pire::Encoding& encoding) { m_encoding = &encoding; return *this; } + + const Pire::Encoding& Encoding() const { return *m_encoding; } + Lexer& SetEncoding(const Pire::Encoding& encoding) { m_encoding = &encoding; return *this; } void SetError(const char* msg) { errmsg = msg; } void SetError(ystring msg) { errmsg = msg; } ystring& GetError() { return errmsg; } - - Any& Retval() { return m_retval; } - - Fsm Parse(); - - void Parenthesized(Fsm& fsm); - -private: - Term DoLex(); - - wchar32 GetChar(); - wchar32 PeekChar(); - void UngetChar(wchar32 c); - - void Error(const char* msg) { throw Pire::Error(msg); } - - void InstallDefaultFeatures(); - + + Any& Retval() { return m_retval; } + + Fsm Parse(); + + void Parenthesized(Fsm& fsm); + +private: + Term DoLex(); + + wchar32 GetChar(); + wchar32 PeekChar(); + void UngetChar(wchar32 c); + + void Error(const char* msg) { throw Pire::Error(msg); } + + void InstallDefaultFeatures(); + TDeque<wchar32> m_input; - const Pire::Encoding* m_encoding; + const Pire::Encoding* m_encoding; TVector<THolder<Feature>> m_features; - Any m_retval; + Any m_retval; ystring errmsg; - - friend class Feature; - - Lexer(const Lexer&); - Lexer& operator = (const Lexer&); -}; - -/** -* A basic class for Pire customization. -* Features can be installed in the lexer and alter its behaviour. -*/ -class Feature { -public: + + friend class Feature; + + Lexer(const Lexer&); + Lexer& operator = (const Lexer&); +}; + +/** +* A basic class for Pire customization. +* Features can be installed in the lexer and alter its behaviour. +*/ +class Feature { +public: /// Precedence of features. The less the priority, the earlier - /// will Lex() be called, and the later will Alter() and Parenthesized() be called. - virtual int Priority() const { return 50; } - - /// Lexer will call this function to check whether the feature - /// wants to handle the next part of the input sequence in its - /// specific way. If it does not, features Lex() will not be called. - virtual bool Accepts(wchar32 /*c*/) const { return false; } - /// Should eat up some part of the input sequence, handle it - /// somehow and produce a terminal. - virtual Term Lex() { return Term(0); } - - /// This function recieves a shiny new terminal, and the feature - /// has a chance to hack it somehow if it wants. - virtual void Alter(Term&) {} - /// This function recieves a parenthesized part of a pattern, and the feature - /// has a chance to hack it somehow if it wants (its the way to implement - /// those perl-style (?@#$%:..) clauses). - virtual void Parenthesized(Fsm&) {} - + /// will Lex() be called, and the later will Alter() and Parenthesized() be called. + virtual int Priority() const { return 50; } + + /// Lexer will call this function to check whether the feature + /// wants to handle the next part of the input sequence in its + /// specific way. If it does not, features Lex() will not be called. + virtual bool Accepts(wchar32 /*c*/) const { return false; } + /// Should eat up some part of the input sequence, handle it + /// somehow and produce a terminal. + virtual Term Lex() { return Term(0); } + + /// This function recieves a shiny new terminal, and the feature + /// has a chance to hack it somehow if it wants. + virtual void Alter(Term&) {} + /// This function recieves a parenthesized part of a pattern, and the feature + /// has a chance to hack it somehow if it wants (its the way to implement + /// those perl-style (?@#$%:..) clauses). + virtual void Parenthesized(Fsm&) {} + using Ptr = THolder<Feature>; - + virtual ~Feature() = default; -protected: - - // These functions are exposed versions of the corresponding lexer functions. - const Pire::Encoding& Encoding() const { return m_lexer->Encoding(); } - wchar32 GetChar() { return m_lexer->GetChar(); } - wchar32 PeekChar() { return m_lexer->PeekChar(); } - void UngetChar(wchar32 c) { m_lexer->UngetChar(c); } - wchar32 CorrectChar(wchar32 c, const char* controls); - void Error(const char* msg) { m_lexer->Error(msg); } - -private: - friend class Lexer; - Lexer* m_lexer; -}; - -namespace Features { - /// Disables case sensitivity +protected: + + // These functions are exposed versions of the corresponding lexer functions. + const Pire::Encoding& Encoding() const { return m_lexer->Encoding(); } + wchar32 GetChar() { return m_lexer->GetChar(); } + wchar32 PeekChar() { return m_lexer->PeekChar(); } + void UngetChar(wchar32 c) { m_lexer->UngetChar(c); } + wchar32 CorrectChar(wchar32 c, const char* controls); + void Error(const char* msg) { m_lexer->Error(msg); } + +private: + friend class Lexer; + Lexer* m_lexer; +}; + +namespace Features { + /// Disables case sensitivity Feature::Ptr CaseInsensitive(); - - /** - * Adds two more operations: - * (pattern1)&(pattern2) -- matches those strings which match both /pattern1/ and /pattern2/; - * ~(pattern) -- matches those strings which do not match /pattern/. - */ + + /** + * Adds two more operations: + * (pattern1)&(pattern2) -- matches those strings which match both /pattern1/ and /pattern2/; + * ~(pattern) -- matches those strings which do not match /pattern/. + */ Feature::Ptr AndNotSupport(); -} - -} - -#endif +} + +} + +#endif diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y index dbad88e287..292c275ebd 100644 --- a/contrib/libs/pire/pire/re_parser.y +++ b/contrib/libs/pire/pire/re_parser.y @@ -1,80 +1,80 @@ -%{ // -*- mode: c++ -*- - -/* - * re_parser.ypp -- the main regexp parsing routine - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +%{ // -*- mode: c++ -*- + +/* + * re_parser.ypp -- the main regexp parsing routine * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifdef _MSC_VER -// Disable yacc warnings -#pragma warning(disable: 4060) // switch contains no 'case' or 'default' statements -#pragma warning(disable: 4065) // switch contains 'default' but no 'case' statements -#pragma warning(disable: 4102) // unreferenced label 'yyerrlabl' + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifdef _MSC_VER +// Disable yacc warnings +#pragma warning(disable: 4060) // switch contains no 'case' or 'default' statements +#pragma warning(disable: 4065) // switch contains 'default' but no 'case' statements +#pragma warning(disable: 4102) // unreferenced label 'yyerrlabl' #pragma warning(disable: 4702) // unreachable code -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic ignored "-Wuninitialized" // 'yylval' may be used uninitialized -#endif - -#include <stdexcept> - +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wuninitialized" // 'yylval' may be used uninitialized +#endif + +#include <stdexcept> + #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/re_lexer.h> #include <contrib/libs/pire/pire/any.h> #include <contrib/libs/pire/pire/stub/stl.h> - -#define YYSTYPE Any* -#define YYSTYPE_IS_TRIVIAL 0 - -namespace { - -using namespace Pire; -using Pire::Fsm; -using Pire::Encoding; - -int yylex(YYSTYPE*, Lexer&); + +#define YYSTYPE Any* +#define YYSTYPE_IS_TRIVIAL 0 + +namespace { + +using namespace Pire; +using Pire::Fsm; +using Pire::Encoding; + +int yylex(YYSTYPE*, Lexer&); void yyerror(Pire::Lexer&, const char*); - -Fsm& ConvertToFSM(const Encoding& encoding, Any* any); -void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr); - -%} - + +Fsm& ConvertToFSM(const Encoding& encoding, Any* any); +void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr); + +%} + %parse-param { Pire::Lexer& rlex } %lex-param { Pire::Lexer& rlex } %pure-parser - -// Terminal declarations -%term YRE_LETTERS -%term YRE_COUNT -%term YRE_DOT -%term YRE_AND -%term YRE_NOT - + +// Terminal declarations +%term YRE_LETTERS +%term YRE_COUNT +%term YRE_DOT +%term YRE_AND +%term YRE_NOT + %destructor { delete $$; } <> -%% - -regexp +%% + +regexp : alternative { ConvertToFSM(rlex.Encoding(), $1); @@ -83,23 +83,23 @@ regexp $$ = nullptr; } ; - -alternative + +alternative : conjunction | alternative '|' conjunction { ConvertToFSM(rlex.Encoding(), ($$ = $1)) |= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; } ; - -conjunction + +conjunction : negation | conjunction YRE_AND negation { ConvertToFSM(rlex.Encoding(), ($$ = $1)) &= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; } ; - -negation + +negation : concatenation | YRE_NOT concatenation { ConvertToFSM(rlex.Encoding(), ($$ = $2)).Complement(); delete $1; } ; - -concatenation + +concatenation : { $$ = new Any(Fsm()); } | concatenation iteration { @@ -113,8 +113,8 @@ concatenation delete $2; } ; - -iteration + +iteration : term | term YRE_COUNT { @@ -122,8 +122,8 @@ iteration $$ = new Any(orig); Fsm& cur = $$->As<Fsm>(); const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>(); - - + + if (repc.first == 0 && repc.second == 1) { Fsm empty; cur |= empty; @@ -144,19 +144,19 @@ iteration delete $2; } ; - -term + +term : YRE_LETTERS | YRE_DOT | '^' | '$' | '(' alternative ')' { $$ = $2; rlex.Parenthesized($$->As<Fsm>()); delete $1; delete $3; } ; - -%% - -int yylex(YYSTYPE* lval, Pire::Lexer& rlex) -{ + +%% + +int yylex(YYSTYPE* lval, Pire::Lexer& rlex) +{ try { Pire::Term term = rlex.Lex(); if (!term.Value().Empty()) @@ -168,18 +168,18 @@ int yylex(YYSTYPE* lval, Pire::Lexer& rlex) rlex.SetError(e.what()); return 0; } -} - +} + void yyerror(Pire::Lexer& rlex, const char* str) -{ +{ if (rlex.GetError().length() == 0) rlex.SetError(ystring("Regexp parse error: ").append(str)); -} - -void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr) -{ +} + +void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr) +{ TVector<ystring> strings; - + for (auto&& i : cr.first) { ystring s; for (auto&& j : i) { @@ -199,16 +199,16 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c a = Fsm::MakeFalse(); else a.AppendStrings(strings); -} - -Fsm& ConvertToFSM(const Encoding& encoding, Any* any) -{ +} + +Fsm& ConvertToFSM(const Encoding& encoding, Any* any) +{ if (any->IsA<Fsm>()) return any->As<Fsm>(); - + Any ret = Fsm(); Fsm& a = ret.As<Fsm>(); - + if (any->IsA<Term::DotTag>()) { encoding.AppendDot(a); } else if (any->IsA<Term::BeginTag>()) { @@ -229,11 +229,11 @@ Fsm& ConvertToFSM(const Encoding& encoding, Any* any) } any->Swap(ret); return a; -} - -} - -namespace Pire { +} + +} + +namespace Pire { namespace Impl { int yre_parse(Pire::Lexer& rlex) { @@ -244,4 +244,4 @@ namespace Pire { return rc; } } -} +} diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h index f6e1ff734d..a2f3a2fc8b 100644 --- a/contrib/libs/pire/pire/run.h +++ b/contrib/libs/pire/pire/run.h @@ -1,113 +1,113 @@ -/* - * run.h -- routines for running scanners on strings. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * run.h -- routines for running scanners on strings. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_RE_SCANNER_H -#define PIRE_RE_SCANNER_H - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_RE_SCANNER_H +#define PIRE_RE_SCANNER_H + #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/memstreams.h> #include <contrib/libs/pire/pire/scanners/pair.h> #include "platform.h" -#include "defs.h" - +#include "defs.h" + #include <string> -namespace Pire { - - template<class Scanner> - struct StDumper { - StDumper(const Scanner& sc, typename Scanner::State st): m_sc(&sc), m_st(st) {} - void Dump(yostream& stream) const { stream << m_sc->StateIndex(m_st) << (m_sc->Final(m_st) ? " [final]" : ""); } - private: - const Scanner* m_sc; - typename Scanner::State m_st; - }; - - template<class Scanner> StDumper<Scanner> StDump(const Scanner& sc, typename Scanner::State st) { return StDumper<Scanner>(sc, st); } - template<class Scanner> yostream& operator << (yostream& stream, const StDumper<Scanner>& stdump) { stdump.Dump(stream); return stream; } -} - -namespace Pire { - -template<class Scanner> +namespace Pire { + + template<class Scanner> + struct StDumper { + StDumper(const Scanner& sc, typename Scanner::State st): m_sc(&sc), m_st(st) {} + void Dump(yostream& stream) const { stream << m_sc->StateIndex(m_st) << (m_sc->Final(m_st) ? " [final]" : ""); } + private: + const Scanner* m_sc; + typename Scanner::State m_st; + }; + + template<class Scanner> StDumper<Scanner> StDump(const Scanner& sc, typename Scanner::State st) { return StDumper<Scanner>(sc, st); } + template<class Scanner> yostream& operator << (yostream& stream, const StDumper<Scanner>& stdump) { stdump.Dump(stream); return stream; } +} + +namespace Pire { + +template<class Scanner> PIRE_FORCED_INLINE PIRE_HOT_FUNCTION -void Step(const Scanner& scanner, typename Scanner::State& state, Char ch) -{ +void Step(const Scanner& scanner, typename Scanner::State& state, Char ch) +{ Y_ASSERT(ch < MaxCharUnaligned); - typename Scanner::Action a = scanner.Next(state, ch); - scanner.TakeAction(state, a); -} - -namespace Impl { - - enum Action { Continue, Stop }; - - template<class Scanner> - struct RunPred { + typename Scanner::Action a = scanner.Next(state, ch); + scanner.TakeAction(state, a); +} + +namespace Impl { + + enum Action { Continue, Stop }; + + template<class Scanner> + struct RunPred { PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action operator()(const Scanner&, const typename Scanner::State&, const char*) const { return Continue; } - }; + Action operator()(const Scanner&, const typename Scanner::State&, const char*) const { return Continue; } + }; - template<class Scanner> - struct ShortestPrefixPred { - explicit ShortestPrefixPred(const char*& pos): m_pos(&pos) {} - + template<class Scanner> + struct ShortestPrefixPred { + explicit ShortestPrefixPred(const char*& pos): m_pos(&pos) {} + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const - { - if (sc.Final(st)) { - *m_pos = pos; - return Stop; - } else { + Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const + { + if (sc.Final(st)) { + *m_pos = pos; + return Stop; + } else { return (sc.Dead(st) ? Stop : Continue); - } - } - private: - const char** m_pos; - }; + } + } + private: + const char** m_pos; + }; - template<class Scanner> - struct LongestPrefixPred { - explicit LongestPrefixPred(const char*& pos): m_pos(&pos) {} + template<class Scanner> + struct LongestPrefixPred { + explicit LongestPrefixPred(const char*& pos): m_pos(&pos) {} PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const - { - if (sc.Final(st)) - *m_pos = pos; - return (sc.Dead(st) ? Stop : Continue); - } - private: - const char** m_pos; - }; - -} - -#ifndef PIRE_DEBUG - -namespace Impl { - + Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const + { + if (sc.Final(st)) + *m_pos = pos; + return (sc.Dead(st) ? Stop : Continue); + } + private: + const char** m_pos; + }; + +} + +#ifndef PIRE_DEBUG + +namespace Impl { + template<class Scanner, class Pred> PIRE_FORCED_INLINE PIRE_HOT_FUNCTION Action SafeRunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred) @@ -128,168 +128,168 @@ namespace Impl { return Continue; } - /// Effectively runs a scanner on a short data chunk, fit completely into one machine word. - template<class Scanner, class Pred> + /// Effectively runs a scanner on a short data chunk, fit completely into one machine word. + template<class Scanner, class Pred> PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred) - { + Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred) + { Y_ASSERT(pos <= sizeof(size_t)); Y_ASSERT(size <= sizeof(size_t)); Y_ASSERT(pos + size <= sizeof(size_t)); - + if (PIRE_UNLIKELY(size == 0)) return Continue; - size_t chunk = Impl::ToLittleEndian(*p) >> 8*pos; - const char* ptr = (const char*) p + pos + size + 1; - - for (size_t i = size; i != 0; --i) { - Step(scanner, state, chunk & 0xFF); - if (pred(scanner, state, ptr - i) == Stop) - return Stop; - chunk >>= 8; - } - return Continue; - } - - template<class Scanner> - struct AlignedRunner { - - // Generic version for LongestPrefix()/ShortestPrefix() impelementations - template<class Pred> - static inline PIRE_HOT_FUNCTION - Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, Pred stop) - { - typename Scanner::State st = state; - Action ret = Continue; - for (; begin != end && (ret = RunChunk(scanner, st, begin, 0, sizeof(void*), stop)) == Continue; ++begin) - ; - state = st; - return ret; - } - - // A special version for Run() impelementation that skips predicate checks - static inline PIRE_HOT_FUNCTION - Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, RunPred<Scanner>) - { - typename Scanner::State st = state; - for (; begin != end; ++begin) { - size_t chunk = *begin; - for (size_t i = sizeof(chunk); i != 0; --i) { - Step(scanner, st, chunk & 0xFF); - chunk >>= 8; - } - } - state = st; - return Continue; - } - }; - - /// The main function: runs a scanner through given memory range. - template<class Scanner, class Pred> + size_t chunk = Impl::ToLittleEndian(*p) >> 8*pos; + const char* ptr = (const char*) p + pos + size + 1; + + for (size_t i = size; i != 0; --i) { + Step(scanner, state, chunk & 0xFF); + if (pred(scanner, state, ptr - i) == Stop) + return Stop; + chunk >>= 8; + } + return Continue; + } + + template<class Scanner> + struct AlignedRunner { + + // Generic version for LongestPrefix()/ShortestPrefix() impelementations + template<class Pred> + static inline PIRE_HOT_FUNCTION + Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, Pred stop) + { + typename Scanner::State st = state; + Action ret = Continue; + for (; begin != end && (ret = RunChunk(scanner, st, begin, 0, sizeof(void*), stop)) == Continue; ++begin) + ; + state = st; + return ret; + } + + // A special version for Run() impelementation that skips predicate checks + static inline PIRE_HOT_FUNCTION + Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, RunPred<Scanner>) + { + typename Scanner::State st = state; + for (; begin != end; ++begin) { + size_t chunk = *begin; + for (size_t i = sizeof(chunk); i != 0; --i) { + Step(scanner, st, chunk & 0xFF); + chunk >>= 8; + } + } + state = st; + return Continue; + } + }; + + /// The main function: runs a scanner through given memory range. + template<class Scanner, class Pred> inline void DoRun(const Scanner& scanner, typename Scanner::State& st, TStringBuf str, Pred pred) - { - + { + const size_t* head = reinterpret_cast<const size_t*>((reinterpret_cast<uintptr_t>(str.begin())) & ~(sizeof(size_t)-1)); const size_t* tail = reinterpret_cast<const size_t*>((reinterpret_cast<uintptr_t>(str.end())) & ~(sizeof(size_t)-1)); - + size_t headSize = (sizeof(size_t) - (str.begin() - (const char*)head)); // The distance from @p begin to the end of the word containing @p begin size_t tailSize = str.end() - (const char*) tail; // The distance from the beginning of the word containing @p end to the @p end - + Y_ASSERT(headSize >= 1 && headSize <= sizeof(size_t)); Y_ASSERT(tailSize < sizeof(size_t)); - - if (head == tail) { + + if (head == tail) { Impl::SafeRunChunk(scanner, st, head, sizeof(size_t) - headSize, str.end() - str.begin(), pred); - return; - } - - // st is passed by reference to this function. If we use it directly on each step the compiler will have to - // update it in memory because of pointer aliasing assumptions. Copying it into a local var allows the - // compiler to store it in a register. This saves some instructions and cycles - typename Scanner::State state = st; - + return; + } + + // st is passed by reference to this function. If we use it directly on each step the compiler will have to + // update it in memory because of pointer aliasing assumptions. Copying it into a local var allows the + // compiler to store it in a register. This saves some instructions and cycles + typename Scanner::State state = st; + if (str.begin() != (const char*) head) { if (Impl::RunChunk(scanner, state, head, sizeof(size_t) - headSize, headSize, pred) == Stop) { - st = state; - return; - } - ++head; - } - - if (Impl::AlignedRunner<Scanner>::RunAligned(scanner, state, head, tail, pred) == Stop) { - st = state; - return; - } - - if (tailSize) + st = state; + return; + } + ++head; + } + + if (Impl::AlignedRunner<Scanner>::RunAligned(scanner, state, head, tail, pred) == Stop) { + st = state; + return; + } + + if (tailSize) Impl::SafeRunChunk(scanner, state, tail, 0, tailSize, pred); - - st = state; - } - -} - -/// Runs two scanners through given memory range simultaneously. -/// This is several percent faster than running them independently. -template<class Scanner1, class Scanner2> + + st = state; + } + +} + +/// Runs two scanners through given memory range simultaneously. +/// This is several percent faster than running them independently. +template<class Scanner1, class Scanner2> inline void Run(const Scanner1& scanner1, const Scanner2& scanner2, typename Scanner1::State& state1, typename Scanner2::State& state2, TStringBuf str) -{ - typedef ScannerPair<Scanner1, Scanner2> Scanners; - Scanners pair(scanner1, scanner2); - typename Scanners::State states(state1, state2); +{ + typedef ScannerPair<Scanner1, Scanner2> Scanners; + Scanners pair(scanner1, scanner2); + typename Scanners::State states(state1, state2); Run(pair, states, str); - state1 = states.first; - state2 = states.second; -} - -#else - -namespace Impl { - /// A debug version of all Run() methods. - template<class Scanner, class Pred> - inline void DoRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end, Pred pred) - { - Cdbg << "Running regexp on string " << ystring(begin, ymin(end - begin, static_cast<ptrdiff_t>(100u))) << Endl; - Cdbg << "Initial state " << StDump(scanner, state) << Endl; - - if (pred(scanner, state, begin) == Stop) { - Cdbg << " exiting" << Endl; - return; - } - - for (; begin != end; ++begin) { - Step(scanner, state, (unsigned char)*begin); - Cdbg << *begin << " => state " << StDump(scanner, state) << Endl; - if (pred(scanner, state, begin + 1) == Stop) { - Cdbg << " exiting" << Endl; - return; - } - } - } -} - -#endif + state1 = states.first; + state2 = states.second; +} + +#else + +namespace Impl { + /// A debug version of all Run() methods. + template<class Scanner, class Pred> + inline void DoRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end, Pred pred) + { + Cdbg << "Running regexp on string " << ystring(begin, ymin(end - begin, static_cast<ptrdiff_t>(100u))) << Endl; + Cdbg << "Initial state " << StDump(scanner, state) << Endl; + + if (pred(scanner, state, begin) == Stop) { + Cdbg << " exiting" << Endl; + return; + } + + for (; begin != end; ++begin) { + Step(scanner, state, (unsigned char)*begin); + Cdbg << *begin << " => state " << StDump(scanner, state) << Endl; + if (pred(scanner, state, begin + 1) == Stop) { + Cdbg << " exiting" << Endl; + return; + } + } + } +} + +#endif -template<class Scanner> +template<class Scanner> void Run(const Scanner& sc, typename Scanner::State& st, TStringBuf str) { Impl::DoRun(sc, st, str, Impl::RunPred<Scanner>()); } template<class Scanner> -void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, const char* end) -{ +void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, const char* end) +{ Run(sc, st, TStringBuf(begin, end)); -} - +} + /// Returns default constructed string_view{} if there is no matching prefix /// Returns str.substr(0, 0) if matching prefix is empty -template<class Scanner> +template<class Scanner> std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) -{ - typename Scanner::State st; - sc.Initialize(st); +{ + typename Scanner::State st; + sc.Initialize(st); if (throughBeginMark) Pire::Step(sc, st, BeginMark); const char* pos = (sc.Final(st) ? str.data() : nullptr); @@ -300,11 +300,11 @@ std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool thr pos = str.data() + str.size(); } return pos ? str.substr(0, pos - str.data()) : std::string_view{}; -} - -template<class Scanner> +} + +template<class Scanner> const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false) -{ +{ auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark); return prefix.data() + prefix.size(); } @@ -314,11 +314,11 @@ const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end, template<class Scanner> std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) { - typename Scanner::State st; - sc.Initialize(st); + typename Scanner::State st; + sc.Initialize(st); if (throughBeginMark) Pire::Step(sc, st, BeginMark); - if (sc.Final(st)) + if (sc.Final(st)) return str.substr(0, 0); const char* pos = nullptr; Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos)); @@ -328,8 +328,8 @@ std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool th pos = str.data() + str.size(); } return pos ? str.substr(0, pos - str.data()) : std::string_view{}; -} - +} + template<class Scanner> const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false) { @@ -338,30 +338,30 @@ const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end } -/// The same as above, but scans string in reverse direction -/// (consider using Fsm::Reverse() for using in this function). +/// The same as above, but scans string in reverse direction +/// (consider using Fsm::Reverse() for using in this function). /// Returns default constructed string_view{} if there is no matching suffix /// Returns str.substr(str.size(), 0) if matching suffix is empty -template<class Scanner> +template<class Scanner> inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) -{ - typename Scanner::State state; - scanner.Initialize(state); +{ + typename Scanner::State state; + scanner.Initialize(state); if (throughEndMark) Step(scanner, state, EndMark); PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl); - PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); - + PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); + std::string_view suffix{}; auto begin = str.data() + str.size(); while (begin != str.data() && !scanner.Dead(state)) { - if (scanner.Final(state)) + if (scanner.Final(state)) suffix = str.substr(begin - str.data()); --begin; Step(scanner, state, (unsigned char)*begin); PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl); - } - if (scanner.Final(state)) + } + if (scanner.Final(state)) suffix = str.substr(begin - str.data()); if (throughBeginMark) { Step(scanner, state, BeginMark); @@ -369,97 +369,97 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s suffix = str.substr(begin - str.data()); } return suffix; -} - +} + template<class Scanner> inline const char* LongestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) { auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); return suffix.data() ? suffix.data() - 1 : nullptr; } -/// The same as above, but scans string in reverse direction +/// The same as above, but scans string in reverse direction /// Returns default constructed string_view{} if there is no matching suffix /// Returns str.substr(str.size(), 0) if matching suffix is empty -template<class Scanner> +template<class Scanner> inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) -{ +{ auto begin = str.data() + str.size(); - typename Scanner::State state; - scanner.Initialize(state); + typename Scanner::State state; + scanner.Initialize(state); if (throughEndMark) Step(scanner, state, EndMark); PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl); PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); - + while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) { --begin; scanner.Next(state, (unsigned char)*begin); PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl); - } + } if (throughBeginMark) Step(scanner, state, BeginMark); return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{}; -} - +} + template<class Scanner> inline const char* ShortestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) { auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); return suffix.data() ? suffix.data() - 1 : nullptr; } - - -template<class Scanner> -class RunHelper { -public: - RunHelper(const Scanner& sc, typename Scanner::State st): Sc(&sc), St(st) {} - explicit RunHelper(const Scanner& sc): Sc(&sc) { Sc->Initialize(St); } - - RunHelper<Scanner>& Step(Char letter) { Pire::Step(*Sc, St, letter); return *this; } + + +template<class Scanner> +class RunHelper { +public: + RunHelper(const Scanner& sc, typename Scanner::State st): Sc(&sc), St(st) {} + explicit RunHelper(const Scanner& sc): Sc(&sc) { Sc->Initialize(St); } + + RunHelper<Scanner>& Step(Char letter) { Pire::Step(*Sc, St, letter); return *this; } RunHelper<Scanner>& Run(TStringBuf str) { Pire::Run(*Sc, St, str); return *this; } RunHelper<Scanner>& Run(const char* begin, const char* end) { return Run(TStringBuf(begin, end)); } RunHelper<Scanner>& Run(const char* begin, size_t size) { return Run(TStringBuf(begin, begin + size)); } - RunHelper<Scanner>& Begin() { return Step(BeginMark); } - RunHelper<Scanner>& End() { return Step(EndMark); } - - const typename Scanner::State& State() const { return St; } - struct Tag {}; - operator const Tag*() const { return Sc->Final(St) ? (const Tag*) this : 0; } - bool operator ! () const { return !Sc->Final(St); } - -private: - const Scanner* Sc; - typename Scanner::State St; -}; - -template<class Scanner> -RunHelper<Scanner> Runner(const Scanner& sc) { return RunHelper<Scanner>(sc); } - -template<class Scanner> -RunHelper<Scanner> Runner(const Scanner& sc, typename Scanner::State st) { return RunHelper<Scanner>(sc, st); } - - -/// Provided for testing purposes and convinience -template<class Scanner> + RunHelper<Scanner>& Begin() { return Step(BeginMark); } + RunHelper<Scanner>& End() { return Step(EndMark); } + + const typename Scanner::State& State() const { return St; } + struct Tag {}; + operator const Tag*() const { return Sc->Final(St) ? (const Tag*) this : 0; } + bool operator ! () const { return !Sc->Final(St); } + +private: + const Scanner* Sc; + typename Scanner::State St; +}; + +template<class Scanner> +RunHelper<Scanner> Runner(const Scanner& sc) { return RunHelper<Scanner>(sc); } + +template<class Scanner> +RunHelper<Scanner> Runner(const Scanner& sc, typename Scanner::State st) { return RunHelper<Scanner>(sc, st); } + + +/// Provided for testing purposes and convinience +template<class Scanner> bool Matches(const Scanner& scanner, TStringBuf str) { return Runner(scanner).Run(str); } template<class Scanner> -bool Matches(const Scanner& scanner, const char* begin, const char* end) -{ +bool Matches(const Scanner& scanner, const char* begin, const char* end) +{ return Runner(scanner).Run(TStringBuf(begin, end)); -} - -/// Constructs an inline scanner in one statement -template<class Scanner> -Scanner MmappedScanner(const char* ptr, size_t size) -{ - Scanner s; - s.Mmap(ptr, size); - return s; -} - -} - -#endif +} + +/// Constructs an inline scanner in one statement +template<class Scanner> +Scanner MmappedScanner(const char* ptr, size_t size) +{ + Scanner s; + s.Mmap(ptr, size); + return s; +} + +} + +#endif diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp index 3956e3c6ed..22fcccf665 100644 --- a/contrib/libs/pire/pire/scanner_io.cpp +++ b/contrib/libs/pire/pire/scanner_io.cpp @@ -1,26 +1,26 @@ -/* - * scanner_io.cpp -- scanner serialization and deserialization - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * scanner_io.cpp -- scanner serialization and deserialization * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/saveload.h> #include <contrib/libs/pire/pire/scanners/common.h> @@ -28,75 +28,75 @@ #include <contrib/libs/pire/pire/scanners/simple.h> #include <contrib/libs/pire/pire/scanners/loaded.h> -#include "align.h" - -namespace Pire { - -void SimpleScanner::Save(yostream* s) const -{ +#include "align.h" + +namespace Pire { + +void SimpleScanner::Save(yostream* s) const +{ SavePodType(s, Header(ScannerIOTypes::SimpleScanner, sizeof(m))); - Impl::AlignSave(s, sizeof(Header)); - Locals mc = m; - mc.initial -= reinterpret_cast<size_t>(m_transitions); - SavePodType(s, mc); - Impl::AlignSave(s, sizeof(mc)); - SavePodType(s, Empty()); - Impl::AlignSave(s, sizeof(Empty())); - if (!Empty()) { + Impl::AlignSave(s, sizeof(Header)); + Locals mc = m; + mc.initial -= reinterpret_cast<size_t>(m_transitions); + SavePodType(s, mc); + Impl::AlignSave(s, sizeof(mc)); + SavePodType(s, Empty()); + Impl::AlignSave(s, sizeof(Empty())); + if (!Empty()) { Y_ASSERT(m_buffer); Impl::AlignedSaveArray(s, m_buffer.Get(), BufSize()); - } -} - -void SimpleScanner::Load(yistream* s) -{ - SimpleScanner sc; + } +} + +void SimpleScanner::Load(yistream* s) +{ + SimpleScanner sc; Impl::ValidateHeader(s, ScannerIOTypes::SimpleScanner, sizeof(sc.m)); - LoadPodType(s, sc.m); - Impl::AlignLoad(s, sizeof(sc.m)); - bool empty; - LoadPodType(s, empty); - Impl::AlignLoad(s, sizeof(empty)); - if (empty) { - sc.Alias(Null()); - } else { + LoadPodType(s, sc.m); + Impl::AlignLoad(s, sizeof(sc.m)); + bool empty; + LoadPodType(s, empty); + Impl::AlignLoad(s, sizeof(empty)); + if (empty) { + sc.Alias(Null()); + } else { sc.m_buffer = BufferType(new char[sc.BufSize()]); Impl::AlignedLoadArray(s, sc.m_buffer.Get(), sc.BufSize()); sc.Markup(sc.m_buffer.Get()); - sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions); - } - Swap(sc); -} - -void SlowScanner::Save(yostream* s) const -{ + sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions); + } + Swap(sc); +} + +void SlowScanner::Save(yostream* s) const +{ SavePodType(s, Header(ScannerIOTypes::SlowScanner, sizeof(m))); - Impl::AlignSave(s, sizeof(Header)); - SavePodType(s, m); - Impl::AlignSave(s, sizeof(m)); - SavePodType(s, Empty()); - Impl::AlignSave(s, sizeof(Empty())); - if (!Empty()) { + Impl::AlignSave(s, sizeof(Header)); + SavePodType(s, m); + Impl::AlignSave(s, sizeof(m)); + SavePodType(s, Empty()); + Impl::AlignSave(s, sizeof(Empty())); + if (!Empty()) { Y_ASSERT(!m_vec.empty()); - Impl::AlignedSaveArray(s, m_letters, MaxChar); - Impl::AlignedSaveArray(s, m_finals, m.statesCount); - - size_t c = 0; - SavePodType<size_t>(s, 0); + Impl::AlignedSaveArray(s, m_letters, MaxChar); + Impl::AlignedSaveArray(s, m_finals, m.statesCount); + + size_t c = 0; + SavePodType<size_t>(s, 0); for (auto&& i : m_vec) { size_t n = c + i.size(); - SavePodType(s, n); - c = n; - } - Impl::AlignSave(s, (m_vec.size() + 1) * sizeof(size_t)); - - size_t size = 0; + SavePodType(s, n); + c = n; + } + Impl::AlignSave(s, (m_vec.size() + 1) * sizeof(size_t)); + + size_t size = 0; for (auto&& i : m_vec) if (!i.empty()) { SavePodArray(s, &(i)[0], i.size()); size += sizeof(unsigned) * i.size(); - } - Impl::AlignSave(s, size); + } + Impl::AlignSave(s, size); if (need_actions) { size_t pos = 0; for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i) @@ -106,55 +106,55 @@ void SlowScanner::Save(yostream* s) const } Impl::AlignSave(s, pos); } - } -} - -void SlowScanner::Load(yistream* s) -{ - SlowScanner sc; + } +} + +void SlowScanner::Load(yistream* s) +{ + SlowScanner sc; Impl::ValidateHeader(s, ScannerIOTypes::SlowScanner, sizeof(sc.m)); - LoadPodType(s, sc.m); - Impl::AlignLoad(s, sizeof(sc.m)); - bool empty; - LoadPodType(s, empty); - Impl::AlignLoad(s, sizeof(empty)); + LoadPodType(s, sc.m); + Impl::AlignLoad(s, sizeof(sc.m)); + bool empty; + LoadPodType(s, empty); + Impl::AlignLoad(s, sizeof(empty)); sc.need_actions = need_actions; - if (empty) { - sc.Alias(Null()); - } else { - sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount); + if (empty) { + sc.Alias(Null()); + } else { + sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount); if (sc.need_actions) sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount); - sc.m_vecptr = &sc.m_vec; - - sc.alloc(sc.m_letters, MaxChar); - Impl::AlignedLoadArray(s, sc.m_letters, MaxChar); - - sc.alloc(sc.m_finals, sc.m.statesCount); - Impl::AlignedLoadArray(s, sc.m_finals, sc.m.statesCount); - - size_t c; - LoadPodType(s, c); + sc.m_vecptr = &sc.m_vec; + + sc.alloc(sc.m_letters, MaxChar); + Impl::AlignedLoadArray(s, sc.m_letters, MaxChar); + + sc.alloc(sc.m_finals, sc.m.statesCount); + Impl::AlignedLoadArray(s, sc.m_finals, sc.m.statesCount); + + size_t c; + LoadPodType(s, c); auto act = sc.m_actionsvec.begin(); for (auto&& i : sc.m_vec) { - size_t n; - LoadPodType(s, n); + size_t n; + LoadPodType(s, n); i.resize(n - c); if (sc.need_actions) { act->resize(n - c); ++act; } - c = n; - } - Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t)); - - size_t size = 0; + c = n; + } + Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t)); + + size_t size = 0; for (auto&& i : sc.m_vec) if (!i.empty()) { LoadPodArray(s, &(i)[0], i.size()); size += sizeof(unsigned) * i.size(); - } - Impl::AlignLoad(s, size); + } + Impl::AlignLoad(s, size); size_t actSize = 0; if (sc.need_actions) { for (auto&& i : sc.m_actionsvec) { @@ -165,53 +165,53 @@ void SlowScanner::Load(yistream* s) } Impl::AlignLoad(s, actSize); } - } - Swap(sc); -} - + } + Swap(sc); +} + void LoadedScanner::Save(yostream* s) const { Save(s, ScannerIOTypes::LoadedScanner); } void LoadedScanner::Save(yostream* s, ui32 type) const -{ +{ Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner); SavePodType(s, Header(type, sizeof(m))); - Impl::AlignSave(s, sizeof(Header)); - Locals mc = m; - mc.initial -= reinterpret_cast<size_t>(m_jumps); - SavePodType(s, mc); - Impl::AlignSave(s, sizeof(mc)); - - Impl::AlignedSaveArray(s, m_letters, MaxChar); - Impl::AlignedSaveArray(s, m_jumps, m.statesCount * m.lettersCount); - Impl::AlignedSaveArray(s, m_tags, m.statesCount); -} - + Impl::AlignSave(s, sizeof(Header)); + Locals mc = m; + mc.initial -= reinterpret_cast<size_t>(m_jumps); + SavePodType(s, mc); + Impl::AlignSave(s, sizeof(mc)); + + Impl::AlignedSaveArray(s, m_letters, MaxChar); + Impl::AlignedSaveArray(s, m_jumps, m.statesCount * m.lettersCount); + Impl::AlignedSaveArray(s, m_tags, m.statesCount); +} + void LoadedScanner::Load(yistream* s) { Load(s, nullptr); } void LoadedScanner::Load(yistream* s, ui32* type) -{ - LoadedScanner sc; +{ + LoadedScanner sc; Header header = Impl::ValidateHeader(s, ScannerIOTypes::LoadedScanner, sizeof(sc.m)); if (type) { *type = header.Type; } - LoadPodType(s, sc.m); - Impl::AlignLoad(s, sizeof(sc.m)); + LoadPodType(s, sc.m); + Impl::AlignLoad(s, sizeof(sc.m)); sc.m_buffer = BufferType(new char[sc.BufSize()]); sc.Markup(sc.m_buffer.Get()); - Impl::AlignedLoadArray(s, sc.m_letters, MaxChar); - Impl::AlignedLoadArray(s, sc.m_jumps, sc.m.statesCount * sc.m.lettersCount); + Impl::AlignedLoadArray(s, sc.m_letters, MaxChar); + Impl::AlignedLoadArray(s, sc.m_jumps, sc.m.statesCount * sc.m.lettersCount); if (header.Version == Header::RE_VERSION_WITH_MACTIONS) { TVector<Action> actions(sc.m.statesCount * sc.m.lettersCount); Impl::AlignedLoadArray(s, actions.data(), actions.size()); } - Impl::AlignedLoadArray(s, sc.m_tags, sc.m.statesCount); - sc.m.initial += reinterpret_cast<size_t>(sc.m_jumps); - Swap(sc); -} - -} + Impl::AlignedLoadArray(s, sc.m_tags, sc.m.statesCount); + sc.m.initial += reinterpret_cast<size_t>(sc.m_jumps); + Swap(sc); +} + +} diff --git a/contrib/libs/pire/pire/scanners/common.h b/contrib/libs/pire/pire/scanners/common.h index de5ea0af7b..4d03c1e4bc 100644 --- a/contrib/libs/pire/pire/scanners/common.h +++ b/contrib/libs/pire/pire/scanners/common.h @@ -1,35 +1,35 @@ -/* - * common.h -- common declaration for Pire scanners - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * common.h -- common declaration for Pire scanners * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#ifndef PIRE_SCANNERS_COMMON_H_INCLUDED -#define PIRE_SCANNERS_COMMON_H_INCLUDED - -#include <stdlib.h> + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#ifndef PIRE_SCANNERS_COMMON_H_INCLUDED +#define PIRE_SCANNERS_COMMON_H_INCLUDED + +#include <stdlib.h> #include <contrib/libs/pire/pire/align.h> #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/defs.h> #include <contrib/libs/pire/pire/platform.h> - -namespace Pire { + +namespace Pire { namespace ScannerIOTypes { enum { NoScanner = 0, @@ -40,84 +40,84 @@ namespace Pire { NoGlueLimitCountingScanner = 5, }; } - - struct Header { - ui32 Magic; - ui32 Version; - ui32 PtrSize; - ui32 MaxWordSize; - ui32 Type; - ui32 HdrSize; - - static const ui32 MAGIC = 0x45524950; // "PIRE" on litte-endian + + struct Header { + ui32 Magic; + ui32 Version; + ui32 PtrSize; + ui32 MaxWordSize; + ui32 Type; + ui32 HdrSize; + + static const ui32 MAGIC = 0x45524950; // "PIRE" on litte-endian static const ui32 RE_VERSION = 7; // Should be incremented each time when the format of serialized scanner changes static const ui32 RE_VERSION_WITH_MACTIONS = 6; // LoadedScanner with m_actions, which is ignored - - explicit Header(ui32 type, size_t hdrsize) - : Magic(MAGIC) - , Version(RE_VERSION) - , PtrSize(sizeof(void*)) - , MaxWordSize(sizeof(Impl::MaxSizeWord)) - , Type(type) + + explicit Header(ui32 type, size_t hdrsize) + : Magic(MAGIC) + , Version(RE_VERSION) + , PtrSize(sizeof(void*)) + , MaxWordSize(sizeof(Impl::MaxSizeWord)) + , Type(type) , HdrSize((ui32)hdrsize) - {} - - void Validate(ui32 type, size_t hdrsize) const - { - if (Magic != MAGIC || PtrSize != sizeof(void*) || MaxWordSize != sizeof(Impl::MaxSizeWord)) - throw Error("Serialized regexp incompatible with your system"); + {} + + void Validate(ui32 type, size_t hdrsize) const + { + if (Magic != MAGIC || PtrSize != sizeof(void*) || MaxWordSize != sizeof(Impl::MaxSizeWord)) + throw Error("Serialized regexp incompatible with your system"); if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS) - throw Error("You are trying to used an incompatible version of a serialized regexp"); + throw Error("You are trying to used an incompatible version of a serialized regexp"); if (type != ScannerIOTypes::NoScanner && type != Type && !(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) { - throw Error("Serialized regexp incompatible with your system"); + throw Error("Serialized regexp incompatible with your system"); } if (hdrsize != 0 && HdrSize != hdrsize) throw Error("Serialized regexp incompatible with your system"); - } - }; - - namespace Impl { - inline const void* AdvancePtr(const size_t*& ptr, size_t& size, size_t delta) - { - ptr = (const size_t*) ((const char*) ptr + delta); - size -= delta; - return (const void*) ptr; - } - - template<class T> - inline void MapPtr(T*& field, size_t count, const size_t*& p, size_t& size) - { - if (size < count * sizeof(*field)) - throw Error("EOF reached while mapping Pire::SlowScanner"); - field = (T*) p; - Impl::AdvancePtr(p, size, count * sizeof(*field)); - Impl::AlignPtr(p, size); - } - - inline void CheckAlign(const void* ptr, size_t bound = sizeof(size_t)) - { - if (!IsAligned(ptr, bound)) - throw Error("Tried to mmap scanner at misaligned address"); - } - + } + }; + + namespace Impl { + inline const void* AdvancePtr(const size_t*& ptr, size_t& size, size_t delta) + { + ptr = (const size_t*) ((const char*) ptr + delta); + size -= delta; + return (const void*) ptr; + } + + template<class T> + inline void MapPtr(T*& field, size_t count, const size_t*& p, size_t& size) + { + if (size < count * sizeof(*field)) + throw Error("EOF reached while mapping Pire::SlowScanner"); + field = (T*) p; + Impl::AdvancePtr(p, size, count * sizeof(*field)); + Impl::AlignPtr(p, size); + } + + inline void CheckAlign(const void* ptr, size_t bound = sizeof(size_t)) + { + if (!IsAligned(ptr, bound)) + throw Error("Tried to mmap scanner at misaligned address"); + } + inline Header ValidateHeader(const size_t*& ptr, size_t& size, ui32 type, size_t hdrsize) - { - const Header* hdr; - MapPtr(hdr, 1, ptr, size); - hdr->Validate(type, hdrsize); + { + const Header* hdr; + MapPtr(hdr, 1, ptr, size); + hdr->Validate(type, hdrsize); return *hdr; - } - + } + inline Header ValidateHeader(yistream* s, ui32 type, size_t hdrsize) - { + { Header hdr(ScannerIOTypes::NoScanner, 0); - LoadPodType(s, hdr); - AlignLoad(s, sizeof(hdr)); - hdr.Validate(type, hdrsize); + LoadPodType(s, hdr); + AlignLoad(s, sizeof(hdr)); + hdr.Validate(type, hdrsize); return hdr; - } - } -} - -#endif + } + } +} + +#endif diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h index 120dc403b7..7d5d6a50d7 100644 --- a/contrib/libs/pire/pire/scanners/loaded.h +++ b/contrib/libs/pire/pire/scanners/loaded.h @@ -1,108 +1,108 @@ -/* - * loaded.h -- a definition of the LoadedScanner +/* + * loaded.h -- a definition of the LoadedScanner + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_SCANNERS_LOADED_H -#define PIRE_SCANNERS_LOADED_H - -#include <string.h> + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_SCANNERS_LOADED_H +#define PIRE_SCANNERS_LOADED_H + +#include <string.h> #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/partition.h> -#include "common.h" - -#ifdef PIRE_DEBUG -#include <iostream> -#endif - -namespace Pire { - -/** -* A loaded scanner -- the deterministic scanner having actions -* associated with states and transitions -* -* Not a complete scanner itself (hence abstract), this class provides -* infrastructure for regexp-based algorithms (e.g. counts or captures), -* supporting major part of scanner construction, (de)serialization, -* mmap()-ing, etc. -* -* It is a good idea to override copy ctor, operator= and swap() -* in subclasses to avoid mixing different scanner types in these methods. -* Also please note that subclasses should not have any data members of thier own. -*/ -class LoadedScanner { -public: - typedef ui8 Letter; - typedef ui32 Action; - typedef ui8 Tag; - - typedef size_t InternalState; - - union Transition { - size_t raw; // alignment hint for compiler - struct { - ui32 shift; - Action action; - }; - }; - - // Override in subclass, if neccessary +#include "common.h" + +#ifdef PIRE_DEBUG +#include <iostream> +#endif + +namespace Pire { + +/** +* A loaded scanner -- the deterministic scanner having actions +* associated with states and transitions +* +* Not a complete scanner itself (hence abstract), this class provides +* infrastructure for regexp-based algorithms (e.g. counts or captures), +* supporting major part of scanner construction, (de)serialization, +* mmap()-ing, etc. +* +* It is a good idea to override copy ctor, operator= and swap() +* in subclasses to avoid mixing different scanner types in these methods. +* Also please note that subclasses should not have any data members of thier own. +*/ +class LoadedScanner { +public: + typedef ui8 Letter; + typedef ui32 Action; + typedef ui8 Tag; + + typedef size_t InternalState; + + union Transition { + size_t raw; // alignment hint for compiler + struct { + ui32 shift; + Action action; + }; + }; + + // Override in subclass, if neccessary enum { - FinalFlag = 0, - DeadFlag = 0 - }; - + FinalFlag = 0, + DeadFlag = 0 + }; + static const size_t MAX_RE_COUNT = 16; protected: - LoadedScanner() { Alias(Null()); } + LoadedScanner() { Alias(Null()); } - LoadedScanner(const LoadedScanner& s): m(s.m) - { - if (s.m_buffer) { + LoadedScanner(const LoadedScanner& s): m(s.m) + { + if (s.m_buffer) { m_buffer = BufferType(new char [BufSize()]); memcpy(m_buffer.Get(), s.m_buffer.Get(), BufSize()); Markup(m_buffer.Get()); - m.initial = (InternalState)m_jumps + (s.m.initial - (InternalState)s.m_jumps); - } else { - Alias(s); - } - } - - void Swap(LoadedScanner& s) - { - DoSwap(m_buffer, s.m_buffer); - DoSwap(m.statesCount, s.m.statesCount); - DoSwap(m.lettersCount, s.m.lettersCount); - DoSwap(m.regexpsCount, s.m.regexpsCount); - DoSwap(m.initial, s.m.initial); - DoSwap(m_letters, s.m_letters); - DoSwap(m_jumps, s.m_jumps); - DoSwap(m_tags, s.m_tags); - } - - LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; } + m.initial = (InternalState)m_jumps + (s.m.initial - (InternalState)s.m_jumps); + } else { + Alias(s); + } + } + + void Swap(LoadedScanner& s) + { + DoSwap(m_buffer, s.m_buffer); + DoSwap(m.statesCount, s.m.statesCount); + DoSwap(m.lettersCount, s.m.lettersCount); + DoSwap(m.regexpsCount, s.m.regexpsCount); + DoSwap(m.initial, s.m.initial); + DoSwap(m_letters, s.m_letters); + DoSwap(m_jumps, s.m_jumps); + DoSwap(m_tags, s.m_tags); + } + + LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; } LoadedScanner (LoadedScanner&& other) : LoadedScanner() { Swap(other); } @@ -110,14 +110,14 @@ protected: Swap(other); return *this; } - -public: - size_t Size() const { return m.statesCount; } - - bool Empty() const { return m_jumps == Null().m_jumps; } - - size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; } - + +public: + size_t Size() const { return m.statesCount; } + + bool Empty() const { return m_jumps == Null().m_jumps; } + + size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; } + size_t LettersCount() const { return m.lettersCount; } const void* Mmap(const void* ptr, size_t size) { @@ -125,93 +125,93 @@ public: } const void* Mmap(const void* ptr, size_t size, ui32* type) - { - Impl::CheckAlign(ptr); - LoadedScanner s; - const size_t* p = reinterpret_cast<const size_t*>(ptr); + { + Impl::CheckAlign(ptr); + LoadedScanner s; + const size_t* p = reinterpret_cast<const size_t*>(ptr); Header header = Impl::ValidateHeader(p, size, ScannerIOTypes::LoadedScanner, sizeof(s.m)); if (type) { *type = header.Type; } + + Locals* locals; + Impl::MapPtr(locals, 1, p, size); + memcpy(&s.m, locals, sizeof(s.m)); - Locals* locals; - Impl::MapPtr(locals, 1, p, size); - memcpy(&s.m, locals, sizeof(s.m)); - - Impl::MapPtr(s.m_letters, MaxChar, p, size); - Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size); + Impl::MapPtr(s.m_letters, MaxChar, p, size); + Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size); if (header.Version == Header::RE_VERSION_WITH_MACTIONS) { Action* actions = 0; Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size); } - Impl::MapPtr(s.m_tags, s.m.statesCount, p, size); - - s.m.initial += reinterpret_cast<size_t>(s.m_jumps); - Swap(s); - - return (const void*) p; - } - + Impl::MapPtr(s.m_tags, s.m.statesCount, p, size); + + s.m.initial += reinterpret_cast<size_t>(s.m_jumps); + Swap(s); + + return (const void*) p; + } + void Save(yostream*, ui32 type) const; - void Save(yostream*) const; + void Save(yostream*) const; void Load(yistream*, ui32* type); - void Load(yistream*); - - template<class Eq> - void Init(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) - { - m.statesCount = states; - m.lettersCount = letters.Size(); - m.regexpsCount = regexpsCount; + void Load(yistream*); + + template<class Eq> + void Init(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) + { + m.statesCount = states; + m.lettersCount = letters.Size(); + m.regexpsCount = regexpsCount; m_buffer = BufferType(new char[BufSize()]); memset(m_buffer.Get(), 0, BufSize()); Markup(m_buffer.Get()); - - m.initial = reinterpret_cast<size_t>(m_jumps + startState * m.lettersCount); - - // Build letter translation table + + m.initial = reinterpret_cast<size_t>(m_jumps + startState * m.lettersCount); + + // Build letter translation table Fill(m_letters, m_letters + MaxChar, 0); for (auto&& letter : letters) for (auto&& character : letter.second.second) m_letters[character] = letter.second.first; - } - + } + size_t StateSize() const { return m.lettersCount * sizeof(*m_jumps); } - + size_t TransitionIndex(size_t state, Char c) const { return state * m.lettersCount + m_letters[c]; } - void SetJump(size_t oldState, Char c, size_t newState, Action action) - { + void SetJump(size_t oldState, Char c, size_t newState, Action action) + { Y_ASSERT(m_buffer); Y_ASSERT(oldState < m.statesCount); Y_ASSERT(newState < m.statesCount); - + size_t shift = (newState - oldState) * StateSize(); - Transition tr; + Transition tr; tr.shift = (ui32)shift; - tr.action = action; + tr.action = action; m_jumps[TransitionIndex(oldState, c)] = tr; - } - - Action RemapAction(Action action) { return action; } - + } + + Action RemapAction(Action action) { return action; } + void SetInitial(size_t state) { Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_jumps + state * m.lettersCount); } void SetTag(size_t state, Tag tag) { Y_ASSERT(m_buffer); m_tags[state] = tag; } - void FinishBuild() {} - - size_t StateIdx(InternalState s) const - { - return (reinterpret_cast<Transition*>(s) - m_jumps) / m.lettersCount; - } - - i64 SignExtend(i32 i) const { return i; } - + void FinishBuild() {} + + size_t StateIdx(InternalState s) const + { + return (reinterpret_cast<Transition*>(s) - m_jumps) / m.lettersCount; + } + + i64 SignExtend(i32 i) const { return i; } + size_t BufSize() const { return @@ -221,74 +221,74 @@ public: ; } -protected: - +protected: + static const Action IncrementMask = (1 << MAX_RE_COUNT) - 1; static const Action ResetMask = IncrementMask << MAX_RE_COUNT; - - // TODO: maybe, put fields in private section and provide data accessors - - struct Locals { - ui32 statesCount; - ui32 lettersCount; - ui32 regexpsCount; - size_t initial; - } m; - + + // TODO: maybe, put fields in private section and provide data accessors + + struct Locals { + ui32 statesCount; + ui32 lettersCount; + ui32 regexpsCount; + size_t initial; + } m; + using BufferType = TArrayHolder<char>; BufferType m_buffer; - - Letter* m_letters; - Transition* m_jumps; - Tag* m_tags; - - virtual ~LoadedScanner(); - -private: + + Letter* m_letters; + Transition* m_jumps; + Tag* m_tags; + + virtual ~LoadedScanner(); + +private: explicit LoadedScanner(Fsm& fsm, size_t distance = 0) - { + { if (distance) { fsm = CreateApproxFsm(fsm, distance); } - fsm.Canonize(); - Init(fsm.Size(), fsm.Letters(), fsm.Initial()); - BuildScanner(fsm, *this); - } - - inline static const LoadedScanner& Null() - { - static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>(); - return n; - } - - void Markup(void* buf) - { - m_letters = reinterpret_cast<Letter*>(buf); - m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar); + fsm.Canonize(); + Init(fsm.Size(), fsm.Letters(), fsm.Initial()); + BuildScanner(fsm, *this); + } + + inline static const LoadedScanner& Null() + { + static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>(); + return n; + } + + void Markup(void* buf) + { + m_letters = reinterpret_cast<Letter*>(buf); + m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar); m_tags = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount); - } - - void Alias(const LoadedScanner& s) - { - memcpy(&m, &s.m, sizeof(m)); - m_buffer = 0; - m_letters = s.m_letters; - m_jumps = s.m_jumps; - m_tags = s.m_tags; - } - - template<class Eq> - LoadedScanner(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) - { - Init(states, letters, startState, regexpsCount); - } - + } + + void Alias(const LoadedScanner& s) + { + memcpy(&m, &s.m, sizeof(m)); + m_buffer = 0; + m_letters = s.m_letters; + m_jumps = s.m_jumps; + m_tags = s.m_tags; + } + + template<class Eq> + LoadedScanner(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1) + { + Init(states, letters, startState, regexpsCount); + } + friend class Fsm; -}; +}; inline LoadedScanner::~LoadedScanner() = default; - -} - - -#endif + +} + + +#endif diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h index 29679e416e..8b6c537836 100644 --- a/contrib/libs/pire/pire/scanners/multi.h +++ b/contrib/libs/pire/pire/scanners/multi.h @@ -1,31 +1,31 @@ -/* - * multi.h -- definition of the Scanner - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_SCANNERS_MULTI_H -#define PIRE_SCANNERS_MULTI_H - +/* + * multi.h -- definition of the Scanner + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_SCANNERS_MULTI_H +#define PIRE_SCANNERS_MULTI_H + #include <cstring> -#include <string.h> +#include <string.h> #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/partition.h> @@ -38,1094 +38,1094 @@ #include <contrib/libs/pire/pire/stub/saveload.h> #include <contrib/libs/pire/pire/stub/lexical_cast.h> -#include "common.h" - -namespace Pire { - -namespace Impl { - - inline static ssize_t SignExtend(i32 i) { return i; } - template<class T> - class ScannerGlueCommon; - - template<class T> - class ScannerGlueTask; - - // This strategy allows to mmap() saved representation of a scanner. This is achieved by - // storing shifts instead of addresses in the transition table. - struct Relocatable { - static const size_t Signature = 1; - // Please note that Transition size is hardcoded as 32 bits. - // This limits size of transition table to 4G, but compresses - // it twice compared to 64-bit transitions. In future Transition - // can be made a template parameter if this is a concern. - typedef ui32 Transition; - - typedef const void* RetvalForMmap; - - static size_t Go(size_t state, Transition shift) { return state + SignExtend(shift); } - static Transition Diff(size_t from, size_t to) { return static_cast<Transition>(to - from); } - }; - - // With this strategy the transition table stores addresses. This makes the scanner faster - // compared to mmap()-ed - struct Nonrelocatable { - static const size_t Signature = 2; - typedef size_t Transition; - - // Generates a compile-time error if Scanner<Nonrelocatable>::Mmap() - // (which is unsupported) is mistakenly called - typedef struct {} RetvalForMmap; - - static size_t Go(size_t /*state*/, Transition shift) { return shift; } - static Transition Diff(size_t /*from*/, size_t to) { return to; } - }; - - +#include "common.h" + +namespace Pire { + +namespace Impl { + + inline static ssize_t SignExtend(i32 i) { return i; } + template<class T> + class ScannerGlueCommon; + + template<class T> + class ScannerGlueTask; + + // This strategy allows to mmap() saved representation of a scanner. This is achieved by + // storing shifts instead of addresses in the transition table. + struct Relocatable { + static const size_t Signature = 1; + // Please note that Transition size is hardcoded as 32 bits. + // This limits size of transition table to 4G, but compresses + // it twice compared to 64-bit transitions. In future Transition + // can be made a template parameter if this is a concern. + typedef ui32 Transition; + + typedef const void* RetvalForMmap; + + static size_t Go(size_t state, Transition shift) { return state + SignExtend(shift); } + static Transition Diff(size_t from, size_t to) { return static_cast<Transition>(to - from); } + }; + + // With this strategy the transition table stores addresses. This makes the scanner faster + // compared to mmap()-ed + struct Nonrelocatable { + static const size_t Signature = 2; + typedef size_t Transition; + + // Generates a compile-time error if Scanner<Nonrelocatable>::Mmap() + // (which is unsupported) is mistakenly called + typedef struct {} RetvalForMmap; + + static size_t Go(size_t /*state*/, Transition shift) { return shift; } + static Transition Diff(size_t /*from*/, size_t to) { return to; } + }; + + // Scanner implementation parametrized by -// - transition table representation strategy -// - strategy for fast forwarding through memory ranges -template<class Relocation, class Shortcutting> -class Scanner { -protected: - enum { - FinalFlag = 1, - DeadFlag = 2, - Flags = FinalFlag | DeadFlag - }; - - static const size_t End = static_cast<size_t>(-1); - -public: - typedef typename Relocation::Transition Transition; - - typedef ui16 Letter; - typedef ui32 Action; - typedef ui8 Tag; - - /// Some properties of the particular state. - struct CommonRowHeader { - size_t Flags; ///< Holds FinalFlag, DeadFlag, etc... - - CommonRowHeader(): Flags(0) {} - - template <class OtherCommonRowHeader> - CommonRowHeader& operator =(const OtherCommonRowHeader& other) - { - Flags = other.Flags; - return *this; - } - }; - - typedef typename Shortcutting::template ExtendedRowHeader<Scanner> ScannerRowHeader; - - Scanner() { Alias(Null()); } +// - transition table representation strategy +// - strategy for fast forwarding through memory ranges +template<class Relocation, class Shortcutting> +class Scanner { +protected: + enum { + FinalFlag = 1, + DeadFlag = 2, + Flags = FinalFlag | DeadFlag + }; + + static const size_t End = static_cast<size_t>(-1); + +public: + typedef typename Relocation::Transition Transition; + + typedef ui16 Letter; + typedef ui32 Action; + typedef ui8 Tag; + + /// Some properties of the particular state. + struct CommonRowHeader { + size_t Flags; ///< Holds FinalFlag, DeadFlag, etc... + + CommonRowHeader(): Flags(0) {} + + template <class OtherCommonRowHeader> + CommonRowHeader& operator =(const OtherCommonRowHeader& other) + { + Flags = other.Flags; + return *this; + } + }; + + typedef typename Shortcutting::template ExtendedRowHeader<Scanner> ScannerRowHeader; + + Scanner() { Alias(Null()); } explicit Scanner(Fsm& fsm, size_t distance = 0) - { + { if (distance) { fsm = CreateApproxFsm(fsm, distance); } - fsm.Canonize(); - Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1); - BuildScanner(fsm, *this); - } - - - size_t Size() const { return m.statesCount; } - bool Empty() const { return m_transitions == Null().m_transitions; } - - typedef size_t State; - - size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; } - size_t LettersCount() const { return m.lettersCount; } - - /// Checks whether specified state is in any of the final sets - bool Final(const State& state) const { return (Header(state).Common.Flags & FinalFlag) != 0; } - - /// Checks whether specified state is 'dead' (i.e. scanner will never - /// reach any final state from current one) - bool Dead(const State& state) const { return (Header(state).Common.Flags & DeadFlag) != 0; } - - ypair<const size_t*, const size_t*> AcceptedRegexps(const State& state) const - { - size_t idx = (state - reinterpret_cast<size_t>(m_transitions)) / - (RowSize() * sizeof(Transition)); - const size_t* b = m_final + m_finalIndex[idx]; - const size_t* e = b; - while (*e != End) - ++e; - return ymake_pair(b, e); - } - - /// Returns an initial state for this scanner - void Initialize(State& state) const { state = m.initial; } - + fsm.Canonize(); + Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1); + BuildScanner(fsm, *this); + } + + + size_t Size() const { return m.statesCount; } + bool Empty() const { return m_transitions == Null().m_transitions; } + + typedef size_t State; + + size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; } + size_t LettersCount() const { return m.lettersCount; } + + /// Checks whether specified state is in any of the final sets + bool Final(const State& state) const { return (Header(state).Common.Flags & FinalFlag) != 0; } + + /// Checks whether specified state is 'dead' (i.e. scanner will never + /// reach any final state from current one) + bool Dead(const State& state) const { return (Header(state).Common.Flags & DeadFlag) != 0; } + + ypair<const size_t*, const size_t*> AcceptedRegexps(const State& state) const + { + size_t idx = (state - reinterpret_cast<size_t>(m_transitions)) / + (RowSize() * sizeof(Transition)); + const size_t* b = m_final + m_finalIndex[idx]; + const size_t* e = b; + while (*e != End) + ++e; + return ymake_pair(b, e); + } + + /// Returns an initial state for this scanner + void Initialize(State& state) const { state = m.initial; } + Char Translate(Char ch) const - { + { return m_letters[static_cast<size_t>(ch)]; } /// Handles one letter Action NextTranslated(State& state, Char letter) const { - PIRE_IFDEBUG( + PIRE_IFDEBUG( Y_ASSERT(state >= (size_t)m_transitions); Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); - ); - + ); + state = Relocation::Go(state, reinterpret_cast<const Transition*>(state)[letter]); - - PIRE_IFDEBUG( + + PIRE_IFDEBUG( Y_ASSERT(state >= (size_t)m_transitions); Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); - ); - - return 0; - } - + ); + + return 0; + } + /// Handles one character Action Next(State& state, Char c) const { return NextTranslated(state, Translate(c)); } - void TakeAction(State&, Action) const {} - + void TakeAction(State&, Action) const {} + Scanner(const Scanner& s): m(s.m) - { - if (!s.m_buffer) { - // Empty or mmap()-ed scanner - Alias(s); - } else { - // In-memory scanner - DeepCopy(s); - } - } - + { + if (!s.m_buffer) { + // Empty or mmap()-ed scanner + Alias(s); + } else { + // In-memory scanner + DeepCopy(s); + } + } + Scanner(Scanner&& s) { Alias(Null()); Swap(s); } - template<class AnotherRelocation> + template<class AnotherRelocation> Scanner(const Scanner<AnotherRelocation, Shortcutting>& s) - { - if (s.Empty()) - Alias(Null()); - else - DeepCopy(s); - } - - void Swap(Scanner& s) - { + { + if (s.Empty()) + Alias(Null()); + else + DeepCopy(s); + } + + void Swap(Scanner& s) + { Y_ASSERT(m.relocationSignature == s.m.relocationSignature); Y_ASSERT(m.shortcuttingSignature == s.m.shortcuttingSignature); - DoSwap(m_buffer, s.m_buffer); - DoSwap(m.statesCount, s.m.statesCount); - DoSwap(m.lettersCount, s.m.lettersCount); - DoSwap(m.regexpsCount, s.m.regexpsCount); - DoSwap(m.initial, s.m.initial); - DoSwap(m_letters, s.m_letters); - DoSwap(m.finalTableSize, s.m.finalTableSize); - DoSwap(m_final, s.m_final); - DoSwap(m_finalIndex, s.m_finalIndex); - DoSwap(m_transitions, s.m_transitions); - } - - Scanner& operator = (const Scanner& s) { Scanner(s).Swap(*this); return *this; } - - /* - * Constructs the scanner from mmap()-ed memory range, returning a pointer - * to unconsumed part of the buffer. - */ - typename Relocation::RetvalForMmap Mmap(const void* ptr, size_t size) - { - Impl::CheckAlign(ptr, sizeof(size_t)); - Scanner s; - - const size_t* p = reinterpret_cast<const size_t*>(ptr); + DoSwap(m_buffer, s.m_buffer); + DoSwap(m.statesCount, s.m.statesCount); + DoSwap(m.lettersCount, s.m.lettersCount); + DoSwap(m.regexpsCount, s.m.regexpsCount); + DoSwap(m.initial, s.m.initial); + DoSwap(m_letters, s.m_letters); + DoSwap(m.finalTableSize, s.m.finalTableSize); + DoSwap(m_final, s.m_final); + DoSwap(m_finalIndex, s.m_finalIndex); + DoSwap(m_transitions, s.m_transitions); + } + + Scanner& operator = (const Scanner& s) { Scanner(s).Swap(*this); return *this; } + + /* + * Constructs the scanner from mmap()-ed memory range, returning a pointer + * to unconsumed part of the buffer. + */ + typename Relocation::RetvalForMmap Mmap(const void* ptr, size_t size) + { + Impl::CheckAlign(ptr, sizeof(size_t)); + Scanner s; + + const size_t* p = reinterpret_cast<const size_t*>(ptr); Impl::ValidateHeader(p, size, ScannerIOTypes::Scanner, sizeof(m)); - if (size < sizeof(s.m)) - throw Error("EOF reached while mapping Pire::Scanner"); - - memcpy(&s.m, p, sizeof(s.m)); - if (s.m.relocationSignature != Relocation::Signature) - throw Error("Type mismatch while mmapping Pire::Scanner"); - Impl::AdvancePtr(p, size, sizeof(s.m)); - Impl::AlignPtr(p, size); - - if (Shortcutting::Signature != s.m.shortcuttingSignature) - throw Error("This scanner has different shortcutting type"); - - bool empty = *((const bool*) p); - Impl::AdvancePtr(p, size, sizeof(empty)); - Impl::AlignPtr(p, size); - - if (empty) - s.Alias(Null()); - else { - if (size < s.BufSize()) - throw Error("EOF reached while mapping NPire::Scanner"); - s.Markup(const_cast<size_t*>(p)); - Impl::AdvancePtr(p, size, s.BufSize()); - s.m.initial += reinterpret_cast<size_t>(s.m_transitions); - } - - Swap(s); - return Impl::AlignPtr(p, size); - } - - size_t StateIndex(State s) const - { - return (s - reinterpret_cast<size_t>(m_transitions)) / (RowSize() * sizeof(Transition)); - } - - /** - * Agglutinates two scanners together, producing a larger scanner. - * Checkig a string against that scanner effectively checks them against both agglutinated regexps - * (detailed information about matched regexps can be obtained with AcceptedRegexps()). - * - * Returns default-constructed scanner in case of failure - * (consult Scanner::Empty() to find out whether the operation was successful). - */ - static Scanner Glue(const Scanner& a, const Scanner& b, size_t maxSize = 0); - - // Returns the size of the memory buffer used (or required) by scanner. - size_t BufSize() const - { - return AlignUp( - MaxChar * sizeof(Letter) // Letters translation table - + m.finalTableSize * sizeof(size_t) // Final table - + m.statesCount * sizeof(size_t) // Final index - + RowSize() * m.statesCount * sizeof(Transition), // Transitions table - sizeof(size_t)); - } - - void Save(yostream*) const; - void Load(yistream*); - - ScannerRowHeader& Header(State s) { return *(ScannerRowHeader*) s; } - const ScannerRowHeader& Header(State s) const { return *(const ScannerRowHeader*) s; } - + if (size < sizeof(s.m)) + throw Error("EOF reached while mapping Pire::Scanner"); + + memcpy(&s.m, p, sizeof(s.m)); + if (s.m.relocationSignature != Relocation::Signature) + throw Error("Type mismatch while mmapping Pire::Scanner"); + Impl::AdvancePtr(p, size, sizeof(s.m)); + Impl::AlignPtr(p, size); + + if (Shortcutting::Signature != s.m.shortcuttingSignature) + throw Error("This scanner has different shortcutting type"); + + bool empty = *((const bool*) p); + Impl::AdvancePtr(p, size, sizeof(empty)); + Impl::AlignPtr(p, size); + + if (empty) + s.Alias(Null()); + else { + if (size < s.BufSize()) + throw Error("EOF reached while mapping NPire::Scanner"); + s.Markup(const_cast<size_t*>(p)); + Impl::AdvancePtr(p, size, s.BufSize()); + s.m.initial += reinterpret_cast<size_t>(s.m_transitions); + } + + Swap(s); + return Impl::AlignPtr(p, size); + } + + size_t StateIndex(State s) const + { + return (s - reinterpret_cast<size_t>(m_transitions)) / (RowSize() * sizeof(Transition)); + } + + /** + * Agglutinates two scanners together, producing a larger scanner. + * Checkig a string against that scanner effectively checks them against both agglutinated regexps + * (detailed information about matched regexps can be obtained with AcceptedRegexps()). + * + * Returns default-constructed scanner in case of failure + * (consult Scanner::Empty() to find out whether the operation was successful). + */ + static Scanner Glue(const Scanner& a, const Scanner& b, size_t maxSize = 0); + + // Returns the size of the memory buffer used (or required) by scanner. + size_t BufSize() const + { + return AlignUp( + MaxChar * sizeof(Letter) // Letters translation table + + m.finalTableSize * sizeof(size_t) // Final table + + m.statesCount * sizeof(size_t) // Final index + + RowSize() * m.statesCount * sizeof(Transition), // Transitions table + sizeof(size_t)); + } + + void Save(yostream*) const; + void Load(yistream*); + + ScannerRowHeader& Header(State s) { return *(ScannerRowHeader*) s; } + const ScannerRowHeader& Header(State s) const { return *(const ScannerRowHeader*) s; } + protected: - - struct Locals { - ui32 statesCount; - ui32 lettersCount; - ui32 regexpsCount; - size_t initial; - ui32 finalTableSize; - size_t relocationSignature; - size_t shortcuttingSignature; - } m; - + + struct Locals { + ui32 statesCount; + ui32 lettersCount; + ui32 regexpsCount; + size_t initial; + ui32 finalTableSize; + size_t relocationSignature; + size_t shortcuttingSignature; + } m; + using BufferType = TArrayHolder<char>; BufferType m_buffer; - Letter* m_letters; - - size_t* m_final; - size_t* m_finalIndex; - - Transition* m_transitions; - - inline static const Scanner& Null() - { - static const Scanner n = Fsm::MakeFalse().Compile< Scanner<Relocation, Shortcutting> >(); + Letter* m_letters; + + size_t* m_final; + size_t* m_finalIndex; + + Transition* m_transitions; + + inline static const Scanner& Null() + { + static const Scanner n = Fsm::MakeFalse().Compile< Scanner<Relocation, Shortcutting> >(); return n; - } - - // Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord) - size_t RowSize() const { return AlignUp(m.lettersCount + HEADER_SIZE, sizeof(MaxSizeWord)/sizeof(Transition)); } - - static const size_t HEADER_SIZE = sizeof(ScannerRowHeader) / sizeof(Transition); - PIRE_STATIC_ASSERT(sizeof(ScannerRowHeader) % sizeof(Transition) == 0); - - template<class Eq> - void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1) - { + } + + // Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord) + size_t RowSize() const { return AlignUp(m.lettersCount + HEADER_SIZE, sizeof(MaxSizeWord)/sizeof(Transition)); } + + static const size_t HEADER_SIZE = sizeof(ScannerRowHeader) / sizeof(Transition); + PIRE_STATIC_ASSERT(sizeof(ScannerRowHeader) % sizeof(Transition) == 0); + + template<class Eq> + void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1) + { std::memset(&m, 0, sizeof(m)); - m.relocationSignature = Relocation::Signature; - m.shortcuttingSignature = Shortcutting::Signature; - m.statesCount = states; - m.lettersCount = letters.Size(); - m.regexpsCount = regexpsCount; - m.finalTableSize = finalStatesCount + states; - + m.relocationSignature = Relocation::Signature; + m.shortcuttingSignature = Shortcutting::Signature; + m.statesCount = states; + m.lettersCount = letters.Size(); + m.regexpsCount = regexpsCount; + m.finalTableSize = finalStatesCount + states; + m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]); memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t)); Markup(AlignUp(m_buffer.Get(), sizeof(size_t))); - - for (size_t i = 0; i != Size(); ++i) - Header(IndexToState(i)) = ScannerRowHeader(); - - m.initial = reinterpret_cast<size_t>(m_transitions + startState * RowSize()); - - // Build letter translation table + + for (size_t i = 0; i != Size(); ++i) + Header(IndexToState(i)) = ScannerRowHeader(); + + m.initial = reinterpret_cast<size_t>(m_transitions + startState * RowSize()); + + // Build letter translation table for (auto&& letter : letters) for (auto&& character : letter.second.second) m_letters[character] = letter.second.first + HEADER_SIZE; - } - - /* - * Initializes pointers depending on buffer start, letters and states count - */ - void Markup(void* ptr) - { - Impl::CheckAlign(ptr, sizeof(size_t)); - m_letters = reinterpret_cast<Letter*>(ptr); - m_final = reinterpret_cast<size_t*>(m_letters + MaxChar); - m_finalIndex = reinterpret_cast<size_t*>(m_final + m.finalTableSize); - m_transitions = reinterpret_cast<Transition*>(m_finalIndex + m.statesCount); - } - - // Makes a shallow ("weak") copy of the given scanner. - // The copied scanner does not maintain lifetime of the original's entrails. - void Alias(const Scanner<Relocation, Shortcutting>& s) - { - memcpy(&m, &s.m, sizeof(m)); + } + + /* + * Initializes pointers depending on buffer start, letters and states count + */ + void Markup(void* ptr) + { + Impl::CheckAlign(ptr, sizeof(size_t)); + m_letters = reinterpret_cast<Letter*>(ptr); + m_final = reinterpret_cast<size_t*>(m_letters + MaxChar); + m_finalIndex = reinterpret_cast<size_t*>(m_final + m.finalTableSize); + m_transitions = reinterpret_cast<Transition*>(m_finalIndex + m.statesCount); + } + + // Makes a shallow ("weak") copy of the given scanner. + // The copied scanner does not maintain lifetime of the original's entrails. + void Alias(const Scanner<Relocation, Shortcutting>& s) + { + memcpy(&m, &s.m, sizeof(m)); m_buffer.Reset(); - m_letters = s.m_letters; - m_final = s.m_final; - m_finalIndex = s.m_finalIndex; - m_transitions = s.m_transitions; - } - - template<class AnotherRelocation> - void DeepCopy(const Scanner<AnotherRelocation, Shortcutting>& s) - { - // Don't want memory leaks, but we cannot free the buffer because there might be aliased instances + m_letters = s.m_letters; + m_final = s.m_final; + m_finalIndex = s.m_finalIndex; + m_transitions = s.m_transitions; + } + + template<class AnotherRelocation> + void DeepCopy(const Scanner<AnotherRelocation, Shortcutting>& s) + { + // Don't want memory leaks, but we cannot free the buffer because there might be aliased instances Y_ASSERT(m_buffer == nullptr); - - // Ensure that specializations of Scanner across different Relocations do not touch its Locals + + // Ensure that specializations of Scanner across different Relocations do not touch its Locals static_assert(sizeof(m) == sizeof(s.m), "sizeof(m) == sizeof(s.m)"); - memcpy(&m, &s.m, sizeof(s.m)); - m.relocationSignature = Relocation::Signature; - m.shortcuttingSignature = Shortcutting::Signature; + memcpy(&m, &s.m, sizeof(s.m)); + m.relocationSignature = Relocation::Signature; + m.shortcuttingSignature = Shortcutting::Signature; m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]); std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t)); Markup(AlignUp(m_buffer.Get(), sizeof(size_t))); - - // Values in letter-to-leterclass table take into account row header size - for (size_t c = 0; c < MaxChar; ++c) { - m_letters[c] = s.m_letters[c] - s.HEADER_SIZE + HEADER_SIZE; + + // Values in letter-to-leterclass table take into account row header size + for (size_t c = 0; c < MaxChar; ++c) { + m_letters[c] = s.m_letters[c] - s.HEADER_SIZE + HEADER_SIZE; Y_ASSERT(c == Epsilon || m_letters[c] >= HEADER_SIZE); Y_ASSERT(c == Epsilon || m_letters[c] < RowSize()); - } - memcpy(m_final, s.m_final, m.finalTableSize * sizeof(*m_final)); - memcpy(m_finalIndex, s.m_finalIndex, m.statesCount * sizeof(*m_finalIndex)); - - m.initial = IndexToState(s.StateIndex(s.m.initial)); - - for (size_t st = 0; st != m.statesCount; ++st) { - size_t oldstate = s.IndexToState(st); - size_t newstate = IndexToState(st); - Header(newstate) = s.Header(oldstate); - const typename Scanner<AnotherRelocation, Shortcutting>::Transition* os - = reinterpret_cast<const typename Scanner<AnotherRelocation, Shortcutting>::Transition*>(oldstate); - Transition* ns = reinterpret_cast<Transition*>(newstate); - - for (size_t let = 0; let != LettersCount(); ++let) { - size_t destIndex = s.StateIndex(AnotherRelocation::Go(oldstate, os[let + s.HEADER_SIZE])); - Transition tr = Relocation::Diff(newstate, IndexToState(destIndex)); - ns[let + HEADER_SIZE] = tr; + } + memcpy(m_final, s.m_final, m.finalTableSize * sizeof(*m_final)); + memcpy(m_finalIndex, s.m_finalIndex, m.statesCount * sizeof(*m_finalIndex)); + + m.initial = IndexToState(s.StateIndex(s.m.initial)); + + for (size_t st = 0; st != m.statesCount; ++st) { + size_t oldstate = s.IndexToState(st); + size_t newstate = IndexToState(st); + Header(newstate) = s.Header(oldstate); + const typename Scanner<AnotherRelocation, Shortcutting>::Transition* os + = reinterpret_cast<const typename Scanner<AnotherRelocation, Shortcutting>::Transition*>(oldstate); + Transition* ns = reinterpret_cast<Transition*>(newstate); + + for (size_t let = 0; let != LettersCount(); ++let) { + size_t destIndex = s.StateIndex(AnotherRelocation::Go(oldstate, os[let + s.HEADER_SIZE])); + Transition tr = Relocation::Diff(newstate, IndexToState(destIndex)); + ns[let + HEADER_SIZE] = tr; Y_ASSERT(Relocation::Go(newstate, tr) >= (size_t)m_transitions); Y_ASSERT(Relocation::Go(newstate, tr) < (size_t)(m_transitions + RowSize()*Size())); - } - } - } - - - size_t IndexToState(size_t stateIndex) const - { - return reinterpret_cast<size_t>(m_transitions + stateIndex * RowSize()); - } - - void SetJump(size_t oldState, Char c, size_t newState, unsigned long /*payload*/ = 0) - { + } + } + } + + + size_t IndexToState(size_t stateIndex) const + { + return reinterpret_cast<size_t>(m_transitions + stateIndex * RowSize()); + } + + void SetJump(size_t oldState, Char c, size_t newState, unsigned long /*payload*/ = 0) + { Y_ASSERT(m_buffer); Y_ASSERT(oldState < m.statesCount); Y_ASSERT(newState < m.statesCount); - - m_transitions[oldState * RowSize() + m_letters[c]] - = Relocation::Diff(IndexToState(oldState), IndexToState(newState)); - } - - unsigned long RemapAction(unsigned long action) { return action; } - - void SetInitial(size_t state) - { + + m_transitions[oldState * RowSize() + m_letters[c]] + = Relocation::Diff(IndexToState(oldState), IndexToState(newState)); + } + + unsigned long RemapAction(unsigned long action) { return action; } + + void SetInitial(size_t state) + { Y_ASSERT(m_buffer); - m.initial = IndexToState(state); - } - - void SetTag(size_t state, size_t value) - { + m.initial = IndexToState(state); + } + + void SetTag(size_t state, size_t value) + { Y_ASSERT(m_buffer); - Header(IndexToState(state)).Common.Flags = value; - } - - // Fill shortcut masks for all the states - void BuildShortcuts() - { + Header(IndexToState(state)).Common.Flags = value; + } + + // Fill shortcut masks for all the states + void BuildShortcuts() + { Y_ASSERT(m_buffer); - - // Build the mapping from letter classes to characters + + // Build the mapping from letter classes to characters TVector< TVector<char> > letters(RowSize()); - for (unsigned ch = 0; ch != 1 << (sizeof(char)*8); ++ch) - letters[m_letters[ch]].push_back(ch); - - // Loop through all states in the transition table and - // check if it is possible to setup shortcuts - for (size_t i = 0; i != Size(); ++i) { - State st = IndexToState(i); - ScannerRowHeader& header = Header(st); - Shortcutting::SetNoExit(header); - size_t ind = 0; - size_t let = HEADER_SIZE; - for (; let != LettersCount() + HEADER_SIZE; ++let) { - // Check if the transition is not the same state - if (Relocation::Go(st, reinterpret_cast<const Transition*>(st)[let]) != st) { - if (ind + letters[let].size() > Shortcutting::ExitMaskCount) - break; - // For each character setup a mask + for (unsigned ch = 0; ch != 1 << (sizeof(char)*8); ++ch) + letters[m_letters[ch]].push_back(ch); + + // Loop through all states in the transition table and + // check if it is possible to setup shortcuts + for (size_t i = 0; i != Size(); ++i) { + State st = IndexToState(i); + ScannerRowHeader& header = Header(st); + Shortcutting::SetNoExit(header); + size_t ind = 0; + size_t let = HEADER_SIZE; + for (; let != LettersCount() + HEADER_SIZE; ++let) { + // Check if the transition is not the same state + if (Relocation::Go(st, reinterpret_cast<const Transition*>(st)[let]) != st) { + if (ind + letters[let].size() > Shortcutting::ExitMaskCount) + break; + // For each character setup a mask for (auto&& character : letters[let]) { Shortcutting::SetMask(header, ind, character); - ++ind; - } - } - } - - if (let != LettersCount() + HEADER_SIZE) { - // Not enough space in ExitMasks, so reset all masks (which leads to bypassing the optimization) - Shortcutting::SetNoShortcut(header); - } - // Fill the rest of the shortcut masks with the last used mask - Shortcutting::FinishMasks(header, ind); - } - } - - // Fills final states table and builds shortcuts if possible - void FinishBuild() - { + ++ind; + } + } + } + + if (let != LettersCount() + HEADER_SIZE) { + // Not enough space in ExitMasks, so reset all masks (which leads to bypassing the optimization) + Shortcutting::SetNoShortcut(header); + } + // Fill the rest of the shortcut masks with the last used mask + Shortcutting::FinishMasks(header, ind); + } + } + + // Fills final states table and builds shortcuts if possible + void FinishBuild() + { Y_ASSERT(m_buffer); auto finalWriter = m_final; - for (size_t state = 0; state != Size(); ++state) { + for (size_t state = 0; state != Size(); ++state) { m_finalIndex[state] = finalWriter - m_final; - if (Header(IndexToState(state)).Common.Flags & FinalFlag) + if (Header(IndexToState(state)).Common.Flags & FinalFlag) *finalWriter++ = 0; *finalWriter++ = static_cast<size_t>(-1); - } - BuildShortcuts(); - } - - size_t AcceptedRegexpsCount(size_t idx) const - { - const size_t* b = m_final + m_finalIndex[idx]; - const size_t* e = b; - while (*e != End) - ++e; - return e - b; - } - - template <class Scanner> - friend void Pire::BuildScanner(const Fsm&, Scanner&); - - typedef State InternalState; // Needed for agglutination - friend class ScannerGlueCommon<Scanner>; - friend class ScannerGlueTask<Scanner>; - - template<class AnotherRelocation, class AnotherShortcutting> - friend class Scanner; - - friend struct ScannerSaver; - -#ifndef PIRE_DEBUG - friend struct AlignedRunner< Scanner<Relocation, Shortcutting> >; -#endif -}; - -// Helper class for Save/Load partial specialization -struct ScannerSaver { - template<class Shortcutting> - static void SaveScanner(const Scanner<Relocatable, Shortcutting>& scanner, yostream* s) - { - typedef Scanner<Relocatable, Shortcutting> ScannerType; - - typename ScannerType::Locals mc = scanner.m; - mc.initial -= reinterpret_cast<size_t>(scanner.m_transitions); + } + BuildShortcuts(); + } + + size_t AcceptedRegexpsCount(size_t idx) const + { + const size_t* b = m_final + m_finalIndex[idx]; + const size_t* e = b; + while (*e != End) + ++e; + return e - b; + } + + template <class Scanner> + friend void Pire::BuildScanner(const Fsm&, Scanner&); + + typedef State InternalState; // Needed for agglutination + friend class ScannerGlueCommon<Scanner>; + friend class ScannerGlueTask<Scanner>; + + template<class AnotherRelocation, class AnotherShortcutting> + friend class Scanner; + + friend struct ScannerSaver; + +#ifndef PIRE_DEBUG + friend struct AlignedRunner< Scanner<Relocation, Shortcutting> >; +#endif +}; + +// Helper class for Save/Load partial specialization +struct ScannerSaver { + template<class Shortcutting> + static void SaveScanner(const Scanner<Relocatable, Shortcutting>& scanner, yostream* s) + { + typedef Scanner<Relocatable, Shortcutting> ScannerType; + + typename ScannerType::Locals mc = scanner.m; + mc.initial -= reinterpret_cast<size_t>(scanner.m_transitions); SavePodType(s, Pire::Header(ScannerIOTypes::Scanner, sizeof(mc))); - Impl::AlignSave(s, sizeof(Pire::Header)); - SavePodType(s, mc); - Impl::AlignSave(s, sizeof(mc)); - SavePodType(s, scanner.Empty()); - Impl::AlignSave(s, sizeof(scanner.Empty())); - if (!scanner.Empty()) + Impl::AlignSave(s, sizeof(Pire::Header)); + SavePodType(s, mc); + Impl::AlignSave(s, sizeof(mc)); + SavePodType(s, scanner.Empty()); + Impl::AlignSave(s, sizeof(scanner.Empty())); + if (!scanner.Empty()) Impl::AlignedSaveArray(s, scanner.m_buffer.Get(), scanner.BufSize()); - } - - template<class Shortcutting> - static void LoadScanner(Scanner<Relocatable, Shortcutting>& scanner, yistream* s) - { - typedef Scanner<Relocatable, Shortcutting> ScannerType; - - Scanner<Relocatable, Shortcutting> sc; + } + + template<class Shortcutting> + static void LoadScanner(Scanner<Relocatable, Shortcutting>& scanner, yistream* s) + { + typedef Scanner<Relocatable, Shortcutting> ScannerType; + + Scanner<Relocatable, Shortcutting> sc; Impl::ValidateHeader(s, ScannerIOTypes::Scanner, sizeof(sc.m)); - LoadPodType(s, sc.m); - Impl::AlignLoad(s, sizeof(sc.m)); - if (Shortcutting::Signature != sc.m.shortcuttingSignature) - throw Error("This scanner has different shortcutting type"); - bool empty; - LoadPodType(s, empty); - Impl::AlignLoad(s, sizeof(empty)); - - if (empty) { - sc.Alias(ScannerType::Null()); - } else { + LoadPodType(s, sc.m); + Impl::AlignLoad(s, sizeof(sc.m)); + if (Shortcutting::Signature != sc.m.shortcuttingSignature) + throw Error("This scanner has different shortcutting type"); + bool empty; + LoadPodType(s, empty); + Impl::AlignLoad(s, sizeof(empty)); + + if (empty) { + sc.Alias(ScannerType::Null()); + } else { sc.m_buffer = TArrayHolder<char>(new char[sc.BufSize()]); Impl::AlignedLoadArray(s, sc.m_buffer.Get(), sc.BufSize()); sc.Markup(sc.m_buffer.Get()); - sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions); - } - scanner.Swap(sc); - } - - // TODO: implement more effective serialization - // of nonrelocatable scanner if necessary - - template<class Shortcutting> - static void SaveScanner(const Scanner<Nonrelocatable, Shortcutting>& scanner, yostream* s) - { - Scanner<Relocatable, Shortcutting>(scanner).Save(s); - } - - template<class Shortcutting> - static void LoadScanner(Scanner<Nonrelocatable, Shortcutting>& scanner, yistream* s) - { - Scanner<Relocatable, Shortcutting> rs; - rs.Load(s); - Scanner<Nonrelocatable, Shortcutting>(rs).Swap(scanner); - } -}; - - -template<class Relocation, class Shortcutting> -void Scanner<Relocation, Shortcutting>::Save(yostream* s) const -{ - ScannerSaver::SaveScanner(*this, s); -} - -template<class Relocation, class Shortcutting> -void Scanner<Relocation, Shortcutting>::Load(yistream* s) -{ - ScannerSaver::LoadScanner(*this, s); -} - -// Shortcutting policy that checks state exit masks -template <size_t MaskCount> -class ExitMasks { -private: - enum { - NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts - NO_EXIT_MASK = 2 // the state has only transtions to itself (we can stop the scan) - }; - - template<class ScannerRowHeader, unsigned N> - struct MaskCheckerBase { + sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions); + } + scanner.Swap(sc); + } + + // TODO: implement more effective serialization + // of nonrelocatable scanner if necessary + + template<class Shortcutting> + static void SaveScanner(const Scanner<Nonrelocatable, Shortcutting>& scanner, yostream* s) + { + Scanner<Relocatable, Shortcutting>(scanner).Save(s); + } + + template<class Shortcutting> + static void LoadScanner(Scanner<Nonrelocatable, Shortcutting>& scanner, yistream* s) + { + Scanner<Relocatable, Shortcutting> rs; + rs.Load(s); + Scanner<Nonrelocatable, Shortcutting>(rs).Swap(scanner); + } +}; + + +template<class Relocation, class Shortcutting> +void Scanner<Relocation, Shortcutting>::Save(yostream* s) const +{ + ScannerSaver::SaveScanner(*this, s); +} + +template<class Relocation, class Shortcutting> +void Scanner<Relocation, Shortcutting>::Load(yistream* s) +{ + ScannerSaver::LoadScanner(*this, s); +} + +// Shortcutting policy that checks state exit masks +template <size_t MaskCount> +class ExitMasks { +private: + enum { + NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts + NO_EXIT_MASK = 2 // the state has only transtions to itself (we can stop the scan) + }; + + template<class ScannerRowHeader, unsigned N> + struct MaskCheckerBase { static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool Check(const ScannerRowHeader& hdr, size_t alignOffset, Word chunk) - { - Word mask = CheckBytes(hdr.Mask(N, alignOffset), chunk); - for (int i = N-1; i >= 0; --i) { - mask = Or(mask, CheckBytes(hdr.Mask(i, alignOffset), chunk)); - } - return !IsAnySet(mask); - } + bool Check(const ScannerRowHeader& hdr, size_t alignOffset, Word chunk) + { + Word mask = CheckBytes(hdr.Mask(N, alignOffset), chunk); + for (int i = N-1; i >= 0; --i) { + mask = Or(mask, CheckBytes(hdr.Mask(i, alignOffset), chunk)); + } + return !IsAnySet(mask); + } static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) - { - for (; begin != end && Check(hdr, alignOffset, ToLittleEndian(*begin)); ++begin) {} - return begin; - } - }; - - template<class ScannerRowHeader, unsigned N, unsigned Nmax> - struct MaskChecker : MaskCheckerBase<ScannerRowHeader, N> { - typedef MaskCheckerBase<ScannerRowHeader, N> Base; - typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next; + const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) + { + for (; begin != end && Check(hdr, alignOffset, ToLittleEndian(*begin)); ++begin) {} + return begin; + } + }; + + template<class ScannerRowHeader, unsigned N, unsigned Nmax> + struct MaskChecker : MaskCheckerBase<ScannerRowHeader, N> { + typedef MaskCheckerBase<ScannerRowHeader, N> Base; + typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next; static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) - { - if (hdr.Mask(N) == hdr.Mask(N + 1)) - return Base::DoRun(hdr, alignOffset, begin, end); - else - return Next::Run(hdr, alignOffset, begin, end); - } - }; - - template<class ScannerRowHeader, unsigned N> - struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N> { - typedef MaskCheckerBase<ScannerRowHeader, N> Base; + const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) + { + if (hdr.Mask(N) == hdr.Mask(N + 1)) + return Base::DoRun(hdr, alignOffset, begin, end); + else + return Next::Run(hdr, alignOffset, begin, end); + } + }; + + template<class ScannerRowHeader, unsigned N> + struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N> { + typedef MaskCheckerBase<ScannerRowHeader, N> Base; static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) - { - return Base::DoRun(hdr, alignOffset, begin, end); - } + const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) + { + return Base::DoRun(hdr, alignOffset, begin, end); + } }; - - // Compares the ExitMask[0] value without SSE reads which seems to be more optimal - template <class Relocation> + + // Compares the ExitMask[0] value without SSE reads which seems to be more optimal + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool CheckFirstMask(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t val) - { - return (scanner.Header(state).Mask(0) == val); - } - -public: - - static const size_t ExitMaskCount = MaskCount; - static const size_t Signature = 0x2000 + MaskCount; - - template <class Scanner> - struct ExtendedRowHeader { - private: - /// In order to allow transition table to be aligned at sizeof(size_t) instead of - /// sizeof(Word) and still be able to read Masks at Word-aligned addresses each mask - /// occupies 2x space and only properly aligned part of it is read - enum { - SizeTInMaxSizeWord = sizeof(MaxSizeWord) / sizeof(size_t), - MaskSizeInSizeT = 2 * SizeTInMaxSizeWord, - }; - + bool CheckFirstMask(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t val) + { + return (scanner.Header(state).Mask(0) == val); + } + +public: + + static const size_t ExitMaskCount = MaskCount; + static const size_t Signature = 0x2000 + MaskCount; + + template <class Scanner> + struct ExtendedRowHeader { + private: + /// In order to allow transition table to be aligned at sizeof(size_t) instead of + /// sizeof(Word) and still be able to read Masks at Word-aligned addresses each mask + /// occupies 2x space and only properly aligned part of it is read + enum { + SizeTInMaxSizeWord = sizeof(MaxSizeWord) / sizeof(size_t), + MaskSizeInSizeT = 2 * SizeTInMaxSizeWord, + }; + public: - static const size_t ExitMaskCount = MaskCount; - - inline - const Word& Mask(size_t i, size_t alignOffset) const - { + static const size_t ExitMaskCount = MaskCount; + + inline + const Word& Mask(size_t i, size_t alignOffset) const + { Y_ASSERT(i < ExitMaskCount); Y_ASSERT(alignOffset < SizeTInMaxSizeWord); - const Word* p = (const Word*)(ExitMasksArray + alignOffset + MaskSizeInSizeT * i); + const Word* p = (const Word*)(ExitMasksArray + alignOffset + MaskSizeInSizeT * i); Y_ASSERT(IsAligned(p, sizeof(Word))); - return *p; - } + return *p; + } PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - size_t Mask(size_t i) const - { + size_t Mask(size_t i) const + { Y_ASSERT(i < ExitMaskCount); - return ExitMasksArray[MaskSizeInSizeT*i]; - } - - void SetMask(size_t i, size_t val) - { - for (size_t j = 0; j < MaskSizeInSizeT; ++j) - ExitMasksArray[MaskSizeInSizeT*i + j] = val; - } - - ExtendedRowHeader() - { - for (size_t i = 0; i < ExitMaskCount; ++i) - SetMask(i, NO_SHORTCUT_MASK); - } - - template <class OtherScanner> - ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other) - { - PIRE_STATIC_ASSERT(ExitMaskCount == ExtendedRowHeader<OtherScanner>::ExitMaskCount); - Common = other.Common; - for (size_t i = 0; i < ExitMaskCount; ++i) - SetMask(i, other.Mask(i)); - return *this; - } - - private: - /// If this state loops for all letters except particular set - /// (common thing when matching something like /.*[Aa]/), - /// each ExitMask contains that letter in each byte of size_t. - /// - /// These masks are most commonly used for fast forwarding through parts - /// of the string matching /.*/ somewhere in the middle regexp. - size_t ExitMasksArray[ExitMaskCount * MaskSizeInSizeT]; - - public: - typename Scanner::CommonRowHeader Common; - }; - - template <class Header> - static void SetNoExit(Header& header) - { - header.SetMask(0, NO_EXIT_MASK); - } - - template <class Header> - static void SetNoShortcut(Header& header) - { - header.SetMask(0, NO_SHORTCUT_MASK); - } - - template <class Header> - static void SetMask(Header& header, size_t ind, char c) - { - header.SetMask(ind, FillSizeT(c)); - } - - template <class Header> - static void FinishMasks(Header& header, size_t ind) - { - if (ind == 0) - ind = 1; - // Fill the rest of the shortcut masks with the last used mask - size_t lastMask = header.Mask(ind - 1); - while (ind != ExitMaskCount) { - header.SetMask(ind, lastMask); - ++ind; - } - } - - template <class Relocation> + return ExitMasksArray[MaskSizeInSizeT*i]; + } + + void SetMask(size_t i, size_t val) + { + for (size_t j = 0; j < MaskSizeInSizeT; ++j) + ExitMasksArray[MaskSizeInSizeT*i + j] = val; + } + + ExtendedRowHeader() + { + for (size_t i = 0; i < ExitMaskCount; ++i) + SetMask(i, NO_SHORTCUT_MASK); + } + + template <class OtherScanner> + ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other) + { + PIRE_STATIC_ASSERT(ExitMaskCount == ExtendedRowHeader<OtherScanner>::ExitMaskCount); + Common = other.Common; + for (size_t i = 0; i < ExitMaskCount; ++i) + SetMask(i, other.Mask(i)); + return *this; + } + + private: + /// If this state loops for all letters except particular set + /// (common thing when matching something like /.*[Aa]/), + /// each ExitMask contains that letter in each byte of size_t. + /// + /// These masks are most commonly used for fast forwarding through parts + /// of the string matching /.*/ somewhere in the middle regexp. + size_t ExitMasksArray[ExitMaskCount * MaskSizeInSizeT]; + + public: + typename Scanner::CommonRowHeader Common; + }; + + template <class Header> + static void SetNoExit(Header& header) + { + header.SetMask(0, NO_EXIT_MASK); + } + + template <class Header> + static void SetNoShortcut(Header& header) + { + header.SetMask(0, NO_SHORTCUT_MASK); + } + + template <class Header> + static void SetMask(Header& header, size_t ind, char c) + { + header.SetMask(ind, FillSizeT(c)); + } + + template <class Header> + static void FinishMasks(Header& header, size_t ind) + { + if (ind == 0) + ind = 1; + // Fill the rest of the shortcut masks with the last used mask + size_t lastMask = header.Mask(ind - 1); + while (ind != ExitMaskCount) { + header.SetMask(ind, lastMask); + ++ind; + } + } + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool NoExit(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state) - { - return CheckFirstMask(scanner, state, NO_EXIT_MASK); - } - - template <class Relocation> + bool NoExit(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state) + { + return CheckFirstMask(scanner, state, NO_EXIT_MASK); + } + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool NoShortcut(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state) - { - return CheckFirstMask(scanner, state, NO_SHORTCUT_MASK); - } - - template <class Relocation> + bool NoShortcut(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state) + { + return CheckFirstMask(scanner, state, NO_SHORTCUT_MASK); + } + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - const Word* Run(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t alignOffset, const Word* begin, const Word* end) - { - return MaskChecker<typename Scanner<Relocation, ExitMasks<MaskCount> >::ScannerRowHeader, 0, MaskCount - 1>::Run(scanner.Header(state), alignOffset, begin, end); - } - -}; - - -// Shortcutting policy that doesn't do shortcuts -struct NoShortcuts { - - static const size_t ExitMaskCount = 0; - static const size_t Signature = 0x1000; - - template <class Scanner> - struct ExtendedRowHeader { - typename Scanner::CommonRowHeader Common; - - template <class OtherScanner> - ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other) - { - PIRE_STATIC_ASSERT(sizeof(ExtendedRowHeader) == sizeof(ExtendedRowHeader<OtherScanner>)); - Common = other.Common; - return *this; - } - }; - - template <class Header> - static void SetNoExit(Header&) {} - - template <class Header> - static void SetNoShortcut(Header&) {} - - template <class Header> - static void SetMask(Header&, size_t, char) {} - - template <class Header> - static void FinishMasks(Header&, size_t) {} - - template <class Relocation> + const Word* Run(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t alignOffset, const Word* begin, const Word* end) + { + return MaskChecker<typename Scanner<Relocation, ExitMasks<MaskCount> >::ScannerRowHeader, 0, MaskCount - 1>::Run(scanner.Header(state), alignOffset, begin, end); + } + +}; + + +// Shortcutting policy that doesn't do shortcuts +struct NoShortcuts { + + static const size_t ExitMaskCount = 0; + static const size_t Signature = 0x1000; + + template <class Scanner> + struct ExtendedRowHeader { + typename Scanner::CommonRowHeader Common; + + template <class OtherScanner> + ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other) + { + PIRE_STATIC_ASSERT(sizeof(ExtendedRowHeader) == sizeof(ExtendedRowHeader<OtherScanner>)); + Common = other.Common; + return *this; + } + }; + + template <class Header> + static void SetNoExit(Header&) {} + + template <class Header> + static void SetNoShortcut(Header&) {} + + template <class Header> + static void SetMask(Header&, size_t, char) {} + + template <class Header> + static void FinishMasks(Header&, size_t) {} + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool NoExit(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State) - { - // Cannot exit prematurely - return false; - } - - template <class Relocation> + bool NoExit(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State) + { + // Cannot exit prematurely + return false; + } + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - bool NoShortcut(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State) - { - // There's no shortcut regardless of the state - return true; - } - - template <class Relocation> + bool NoShortcut(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State) + { + // There's no shortcut regardless of the state + return true; + } + + template <class Relocation> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - const Word* Run(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State, size_t, const Word* begin, const Word*) - { - // Stop shortcutting right at the beginning - return begin; - } -}; - -#ifndef PIRE_DEBUG - -// The purpose of this template is to produce a number of ProcessChunk() calls -// instead of writing for(...){ProcessChunk()} loop that GCC refuses to unroll. -// Manually unrolled code proves to be faster -template <class Scanner, unsigned Count> -struct MultiChunk { - // Process Word-sized chunk which consist of >=1 size_t-sized chunks - template<class Pred> + const Word* Run(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State, size_t, const Word* begin, const Word*) + { + // Stop shortcutting right at the beginning + return begin; + } +}; + +#ifndef PIRE_DEBUG + +// The purpose of this template is to produce a number of ProcessChunk() calls +// instead of writing for(...){ProcessChunk()} loop that GCC refuses to unroll. +// Manually unrolled code proves to be faster +template <class Scanner, unsigned Count> +struct MultiChunk { + // Process Word-sized chunk which consist of >=1 size_t-sized chunks + template<class Pred> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action Process(const Scanner& scanner, typename Scanner::State& state, const size_t* p, Pred pred) - { - if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Continue) - return MultiChunk<Scanner, Count-1>::Process(scanner, state, ++p, pred); - else - return Stop; - } -}; - -template <class Scanner> -struct MultiChunk<Scanner, 0> { - // Process Word-sized chunk which consist of >=1 size_t-sized chunks - template<class Pred> + Action Process(const Scanner& scanner, typename Scanner::State& state, const size_t* p, Pred pred) + { + if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Continue) + return MultiChunk<Scanner, Count-1>::Process(scanner, state, ++p, pred); + else + return Stop; + } +}; + +template <class Scanner> +struct MultiChunk<Scanner, 0> { + // Process Word-sized chunk which consist of >=1 size_t-sized chunks + template<class Pred> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action Process(const Scanner&, typename Scanner::State, const size_t*, Pred) - { - return Continue; - } -}; - -// Efficiently runs a scanner through size_t-aligned memory range -template<class Relocation, class Shortcutting> -struct AlignedRunner< Scanner<Relocation, Shortcutting> > { -private: - typedef Scanner<Relocation, Shortcutting> ScannerType; - - // Processes Word-sized chuck of memory (depending on the platform a Word might - // consist of multiple size_t chuncks) - template <class Pred> + Action Process(const Scanner&, typename Scanner::State, const size_t*, Pred) + { + return Continue; + } +}; + +// Efficiently runs a scanner through size_t-aligned memory range +template<class Relocation, class Shortcutting> +struct AlignedRunner< Scanner<Relocation, Shortcutting> > { +private: + typedef Scanner<Relocation, Shortcutting> ScannerType; + + // Processes Word-sized chuck of memory (depending on the platform a Word might + // consist of multiple size_t chuncks) + template <class Pred> static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - Action RunMultiChunk(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, Pred pred) - { - return MultiChunk<ScannerType, sizeof(Word)/sizeof(size_t)>::Process(scanner, st, begin, pred); - } - - // Asserts if the scanner changes state while processing the byte range that is - // supposed to be skipped by a shortcut - static void ValidateSkip(const ScannerType& scanner, typename ScannerType::State st, const char* begin, const char* end) - { - typename ScannerType::State stateBefore = st; - for (const char* pos = begin; pos != end; ++pos) { - Step(scanner, st, (unsigned char)*pos); + Action RunMultiChunk(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, Pred pred) + { + return MultiChunk<ScannerType, sizeof(Word)/sizeof(size_t)>::Process(scanner, st, begin, pred); + } + + // Asserts if the scanner changes state while processing the byte range that is + // supposed to be skipped by a shortcut + static void ValidateSkip(const ScannerType& scanner, typename ScannerType::State st, const char* begin, const char* end) + { + typename ScannerType::State stateBefore = st; + for (const char* pos = begin; pos != end; ++pos) { + Step(scanner, st, (unsigned char)*pos); Y_ASSERT(st == stateBefore); - } - } - -public: - - template<class Pred> - static inline PIRE_HOT_FUNCTION - Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred) - { + } + } + +public: + + template<class Pred> + static inline PIRE_HOT_FUNCTION + Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred) + { typename ScannerType::State state = st; - const Word* head = AlignUp((const Word*) begin, sizeof(Word)); - const Word* tail = AlignDown((const Word*) end, sizeof(Word)); - for (; begin != (const size_t*) head && begin != end; ++begin) - if (RunChunk(scanner, state, begin, 0, sizeof(void*), pred) == Stop) { - st = state; - return Stop; - } - - if (begin == end) { - st = state; - return Continue; - } - if (Shortcutting::NoExit(scanner, state)) { - st = state; - return pred(scanner, state, ((const char*) end)); - } - - // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state + const Word* head = AlignUp((const Word*) begin, sizeof(Word)); + const Word* tail = AlignDown((const Word*) end, sizeof(Word)); + for (; begin != (const size_t*) head && begin != end; ++begin) + if (RunChunk(scanner, state, begin, 0, sizeof(void*), pred) == Stop) { + st = state; + return Stop; + } + + if (begin == end) { + st = state; + return Continue; + } + if (Shortcutting::NoExit(scanner, state)) { + st = state; + return pred(scanner, state, ((const char*) end)); + } + + // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0); - size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t); - - bool noShortcut = Shortcutting::NoShortcut(scanner, state); - - while (true) { - // Do normal processing until a shortcut is possible - while (noShortcut && head != tail) { - if (RunMultiChunk(scanner, state, (const size_t*)head, pred) == Stop) { - st = state; - return Stop; - } - ++head; - noShortcut = Shortcutting::NoShortcut(scanner, state); - } - if (head == tail) - break; - - if (Shortcutting::NoExit(scanner, state)) { - st = state; - return pred(scanner, state, ((const char*) end)); - } - - // Do fast forwarding while it is possible - const Word* skipEnd = Shortcutting::Run(scanner, state, alignOffset, head, tail); - PIRE_IF_CHECKED(ValidateSkip(scanner, state, (const char*)head, (const char*)skipEnd)); - head = skipEnd; - noShortcut = true; - } - - for (size_t* p = (size_t*) tail; p != end; ++p) { - if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) { - st = state; - return Stop; - } - } - - st = state; - return Continue; - } -}; - -#endif - -template<class Scanner> -class ScannerGlueTask: public ScannerGlueCommon<Scanner> { -public: - typedef ScannerGlueCommon<Scanner> Base; - typedef typename Base::State State; - using Base::Lhs; - using Base::Rhs; - using Base::Sc; - using Base::Letters; - - typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates; - - ScannerGlueTask(const Scanner& lhs, const Scanner& rhs) - : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters)) - { - } + size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t); + + bool noShortcut = Shortcutting::NoShortcut(scanner, state); + + while (true) { + // Do normal processing until a shortcut is possible + while (noShortcut && head != tail) { + if (RunMultiChunk(scanner, state, (const size_t*)head, pred) == Stop) { + st = state; + return Stop; + } + ++head; + noShortcut = Shortcutting::NoShortcut(scanner, state); + } + if (head == tail) + break; + + if (Shortcutting::NoExit(scanner, state)) { + st = state; + return pred(scanner, state, ((const char*) end)); + } + + // Do fast forwarding while it is possible + const Word* skipEnd = Shortcutting::Run(scanner, state, alignOffset, head, tail); + PIRE_IF_CHECKED(ValidateSkip(scanner, state, (const char*)head, (const char*)skipEnd)); + head = skipEnd; + noShortcut = true; + } + + for (size_t* p = (size_t*) tail; p != end; ++p) { + if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) { + st = state; + return Stop; + } + } + + st = state; + return Continue; + } +}; + +#endif + +template<class Scanner> +class ScannerGlueTask: public ScannerGlueCommon<Scanner> { +public: + typedef ScannerGlueCommon<Scanner> Base; + typedef typename Base::State State; + using Base::Lhs; + using Base::Rhs; + using Base::Sc; + using Base::Letters; + + typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates; + + ScannerGlueTask(const Scanner& lhs, const Scanner& rhs) + : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters)) + { + } void AcceptStates(const TVector<State>& states) - { - // Make up a new scanner and fill in the final table + { + // Make up a new scanner and fill in the final table - size_t finalTableSize = 0; + size_t finalTableSize = 0; for (auto&& i : states) finalTableSize += RangeLen(Lhs().AcceptedRegexps(i.first)) + RangeLen(Rhs().AcceptedRegexps(i.second)); this->SetSc(THolder<Scanner>(new Scanner)); - Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount()); + Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount()); auto finalWriter = Sc().m_final; - for (size_t state = 0; state != states.size(); ++state) { + for (size_t state = 0; state != states.size(); ++state) { Sc().m_finalIndex[state] = finalWriter - Sc().m_final; finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter); finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter); *finalWriter++ = static_cast<size_t>(-1); - Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0) - | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0)); - } - } - - void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); } + Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0) + | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0)); + } + } - const Scanner& Success() - { - Sc().BuildShortcuts(); - return Sc(); - } + void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); } + + const Scanner& Success() + { + Sc().BuildShortcuts(); + return Sc(); + } private: - template<class Iter> - size_t RangeLen(ypair<Iter, Iter> range) const - { - return std::distance(range.first, range.second); - } - - template<class Iter, class OutIter> - OutIter Shift(ypair<Iter, Iter> range, size_t shift, OutIter out) const - { - for (; range.first != range.second; ++range.first, ++out) - *out = *range.first + shift; - return out; - } -}; - -} - - -template<class Relocation, class Shortcutting> -struct StDumper< Impl::Scanner<Relocation, Shortcutting> > { - - typedef Impl::Scanner<Relocation, Shortcutting> ScannerType; - - StDumper(const ScannerType& sc, typename ScannerType::State st): m_sc(&sc), m_st(st) {} - - void Dump(yostream& stream) const - { - stream << m_sc->StateIndex(m_st); - if (m_sc->Final(m_st)) - stream << " [final]"; - if (m_sc->Dead(m_st)) - stream << " [dead]"; - } -private: - const ScannerType* m_sc; - typename ScannerType::State m_st; -}; - - -template<class Relocation, class Shortcutting> -Impl::Scanner<Relocation, Shortcutting> Impl::Scanner<Relocation, Shortcutting>::Glue(const Impl::Scanner<Relocation, Shortcutting>& lhs, const Impl::Scanner<Relocation, Shortcutting>& rhs, size_t maxSize /* = 0 */) -{ - if (lhs.Empty()) - return rhs; - if (rhs.Empty()) - return lhs; - - static const size_t DefMaxSize = 80000; - Impl::ScannerGlueTask< Impl::Scanner<Relocation, Shortcutting> > task(lhs, rhs); - return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); -} - - -/** - * A compiled multiregexp. - * Can only find out whether a string matches the regexps or not, - * but takes O( str.length() ) time. - * - * In addition, multiple scanners can be agglutinated together, - * producting a scanner which can be used for checking - * strings against several regexps in a single pass. - */ -typedef Impl::Scanner<Impl::Relocatable, Impl::ExitMasks<2> > Scanner; -typedef Impl::Scanner<Impl::Relocatable, Impl::NoShortcuts> ScannerNoMask; - -/** - * Same as above, but does not allow relocation or mmap()-ing. - * On the other hand, runs almost twice as fast as the Scanner. - */ -typedef Impl::Scanner<Impl::Nonrelocatable, Impl::ExitMasks<2> > NonrelocScanner; -typedef Impl::Scanner<Impl::Nonrelocatable, Impl::NoShortcuts> NonrelocScannerNoMask; - -} - -namespace std { + template<class Iter> + size_t RangeLen(ypair<Iter, Iter> range) const + { + return std::distance(range.first, range.second); + } + + template<class Iter, class OutIter> + OutIter Shift(ypair<Iter, Iter> range, size_t shift, OutIter out) const + { + for (; range.first != range.second; ++range.first, ++out) + *out = *range.first + shift; + return out; + } +}; + +} + + +template<class Relocation, class Shortcutting> +struct StDumper< Impl::Scanner<Relocation, Shortcutting> > { + + typedef Impl::Scanner<Relocation, Shortcutting> ScannerType; + + StDumper(const ScannerType& sc, typename ScannerType::State st): m_sc(&sc), m_st(st) {} + + void Dump(yostream& stream) const + { + stream << m_sc->StateIndex(m_st); + if (m_sc->Final(m_st)) + stream << " [final]"; + if (m_sc->Dead(m_st)) + stream << " [dead]"; + } +private: + const ScannerType* m_sc; + typename ScannerType::State m_st; +}; + + +template<class Relocation, class Shortcutting> +Impl::Scanner<Relocation, Shortcutting> Impl::Scanner<Relocation, Shortcutting>::Glue(const Impl::Scanner<Relocation, Shortcutting>& lhs, const Impl::Scanner<Relocation, Shortcutting>& rhs, size_t maxSize /* = 0 */) +{ + if (lhs.Empty()) + return rhs; + if (rhs.Empty()) + return lhs; + + static const size_t DefMaxSize = 80000; + Impl::ScannerGlueTask< Impl::Scanner<Relocation, Shortcutting> > task(lhs, rhs); + return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); +} + + +/** + * A compiled multiregexp. + * Can only find out whether a string matches the regexps or not, + * but takes O( str.length() ) time. + * + * In addition, multiple scanners can be agglutinated together, + * producting a scanner which can be used for checking + * strings against several regexps in a single pass. + */ +typedef Impl::Scanner<Impl::Relocatable, Impl::ExitMasks<2> > Scanner; +typedef Impl::Scanner<Impl::Relocatable, Impl::NoShortcuts> ScannerNoMask; + +/** + * Same as above, but does not allow relocation or mmap()-ing. + * On the other hand, runs almost twice as fast as the Scanner. + */ +typedef Impl::Scanner<Impl::Nonrelocatable, Impl::ExitMasks<2> > NonrelocScanner; +typedef Impl::Scanner<Impl::Nonrelocatable, Impl::NoShortcuts> NonrelocScannerNoMask; + +} + +namespace std { inline void swap(Pire::Scanner& a, Pire::Scanner& b) { - a.Swap(b); - } - + a.Swap(b); + } + inline void swap(Pire::NonrelocScanner& a, Pire::NonrelocScanner& b) { - a.Swap(b); - } -} - - -#endif + a.Swap(b); + } +} + + +#endif diff --git a/contrib/libs/pire/pire/scanners/null.cpp b/contrib/libs/pire/pire/scanners/null.cpp index f0e21ce4d3..3a7fee7220 100644 --- a/contrib/libs/pire/pire/scanners/null.cpp +++ b/contrib/libs/pire/pire/scanners/null.cpp @@ -1,6 +1,6 @@ #include <contrib/libs/pire/pire/fsm.h> -#include "multi.h" +#include "multi.h" #include "half_final.h" -#include "simple.h" -#include "slow.h" -#include "loaded.h" +#include "simple.h" +#include "slow.h" +#include "loaded.h" diff --git a/contrib/libs/pire/pire/scanners/pair.h b/contrib/libs/pire/pire/scanners/pair.h index c12338a2a0..16fc14a59f 100644 --- a/contrib/libs/pire/pire/scanners/pair.h +++ b/contrib/libs/pire/pire/scanners/pair.h @@ -1,99 +1,99 @@ -/* - * pair.h -- definition of the pair of scanners - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * pair.h -- definition of the pair of scanners * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#ifndef PIRE_SCANNER_PAIR_INCLUDED -#define PIRE_SCANNER_PAIR_INCLUDED - -namespace Pire { - - /** - * A pair of scanner, providing the interface of a scanner itself. - * If you need to run two scanners on the same string, using ScannerPair - * is usually faster then running those scanners sequentially. - */ - template<class Scanner1, class Scanner2> - class ScannerPair { - public: - typedef ypair<typename Scanner1::State, typename Scanner2::State> State; - typedef ypair<typename Scanner1::Action, typename Scanner2::Action> Action; - - ScannerPair() - : m_scanner1() - , m_scanner2() - { - } - ScannerPair(const Scanner1& s1, const Scanner2& s2) - : m_scanner1(&s1) - , m_scanner2(&s2) - { - } - - void Initialize(State& state) const - { - m_scanner1->Initialize(state.first); - m_scanner2->Initialize(state.second); - } - - Action Next(State& state, Char ch) const - { - return ymake_pair( - m_scanner1->Next(state.first, ch), - m_scanner2->Next(state.second, ch) - ); - } - - void TakeAction(State& s, Action a) const - { - m_scanner1->TakeAction(s.first, a.first); - m_scanner2->TakeAction(s.second, a.second); - } - - bool Final(const State& state) const - { - return m_scanner1->Final(state.first) || m_scanner2->Final(state.second); - } - - bool Dead(const State& state) const - { - return m_scanner1->Dead(state.first) && m_scanner2->Dead(state.second); - } - - ypair<size_t, size_t> StateIndex(const State& state) const - { - return ymake_pair(m_scanner1->StateIndex(state.first), m_scanner2->StateIndex(state.second)); - } - - Scanner1& First() { return *m_scanner1; } - Scanner2& Second() { return *m_scanner2; } - - const Scanner1& First() const { return *m_scanner1; } - const Scanner2& Second() const { return *m_scanner2; } - - private: - const Scanner1* m_scanner1; - const Scanner2* m_scanner2; - }; - - -} - -#endif + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#ifndef PIRE_SCANNER_PAIR_INCLUDED +#define PIRE_SCANNER_PAIR_INCLUDED + +namespace Pire { + + /** + * A pair of scanner, providing the interface of a scanner itself. + * If you need to run two scanners on the same string, using ScannerPair + * is usually faster then running those scanners sequentially. + */ + template<class Scanner1, class Scanner2> + class ScannerPair { + public: + typedef ypair<typename Scanner1::State, typename Scanner2::State> State; + typedef ypair<typename Scanner1::Action, typename Scanner2::Action> Action; + + ScannerPair() + : m_scanner1() + , m_scanner2() + { + } + ScannerPair(const Scanner1& s1, const Scanner2& s2) + : m_scanner1(&s1) + , m_scanner2(&s2) + { + } + + void Initialize(State& state) const + { + m_scanner1->Initialize(state.first); + m_scanner2->Initialize(state.second); + } + + Action Next(State& state, Char ch) const + { + return ymake_pair( + m_scanner1->Next(state.first, ch), + m_scanner2->Next(state.second, ch) + ); + } + + void TakeAction(State& s, Action a) const + { + m_scanner1->TakeAction(s.first, a.first); + m_scanner2->TakeAction(s.second, a.second); + } + + bool Final(const State& state) const + { + return m_scanner1->Final(state.first) || m_scanner2->Final(state.second); + } + + bool Dead(const State& state) const + { + return m_scanner1->Dead(state.first) && m_scanner2->Dead(state.second); + } + + ypair<size_t, size_t> StateIndex(const State& state) const + { + return ymake_pair(m_scanner1->StateIndex(state.first), m_scanner2->StateIndex(state.second)); + } + + Scanner1& First() { return *m_scanner1; } + Scanner2& Second() { return *m_scanner2; } + + const Scanner1& First() const { return *m_scanner1; } + const Scanner2& Second() const { return *m_scanner2; } + + private: + const Scanner1* m_scanner1; + const Scanner2* m_scanner2; + }; + + +} + +#endif diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h index ef959aeed1..3175e105da 100644 --- a/contrib/libs/pire/pire/scanners/simple.h +++ b/contrib/libs/pire/pire/scanners/simple.h @@ -1,190 +1,190 @@ -/* - * simple.h -- the definition of the SimpleScanner +/* + * simple.h -- the definition of the SimpleScanner + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_SCANNERS_SIMPLE_H -#define PIRE_SCANNERS_SIMPLE_H - + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_SCANNERS_SIMPLE_H +#define PIRE_SCANNERS_SIMPLE_H + #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/stub/saveload.h> -#include "common.h" - -namespace Pire { - -/** - * More faster version than the Scanner, but incapable of storing multiple - * regexps and taking more memory for the same regexp. - */ -class SimpleScanner { -private: - static const size_t STATE_ROW_SIZE = MaxChar + 1; // All characters + 1 element to store final state flag - -public: - typedef size_t Transition; - typedef ui16 Letter; - typedef ui32 Action; - typedef ui8 Tag; - - SimpleScanner() { Alias(Null()); } +#include "common.h" + +namespace Pire { + +/** + * More faster version than the Scanner, but incapable of storing multiple + * regexps and taking more memory for the same regexp. + */ +class SimpleScanner { +private: + static const size_t STATE_ROW_SIZE = MaxChar + 1; // All characters + 1 element to store final state flag + +public: + typedef size_t Transition; + typedef ui16 Letter; + typedef ui32 Action; + typedef ui8 Tag; + + SimpleScanner() { Alias(Null()); } explicit SimpleScanner(Fsm& fsm, size_t distance = 0); - - size_t Size() const { return m.statesCount; } - bool Empty() const { return m_transitions == Null().m_transitions; } - - typedef size_t State; - - size_t RegexpsCount() const { return Empty() ? 0 : 1; } - size_t LettersCount() const { return MaxChar; } - - /// Checks whether specified state is in any of the final sets - bool Final(const State& state) const { return *(((const Transition*) state) - 1) != 0; } - - bool Dead(const State&) const { return false; } - + + size_t Size() const { return m.statesCount; } + bool Empty() const { return m_transitions == Null().m_transitions; } + + typedef size_t State; + + size_t RegexpsCount() const { return Empty() ? 0 : 1; } + size_t LettersCount() const { return MaxChar; } + + /// Checks whether specified state is in any of the final sets + bool Final(const State& state) const { return *(((const Transition*) state) - 1) != 0; } + + bool Dead(const State&) const { return false; } + ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const { return Final(s) ? Accept() : Deny(); } - /// returns an initial state for this scanner - void Initialize(State& state) const { state = m.initial; } - - /// Handles one characters - Action Next(State& state, Char c) const - { - Transition shift = reinterpret_cast<const Transition*>(state)[c]; - state += shift; - return 0; - } - - bool TakeAction(State&, Action) const { return false; } - - SimpleScanner(const SimpleScanner& s): m(s.m) - { - if (!s.m_buffer) { - // Empty or mmap()-ed scanner, just copy pointers - m_buffer = 0; - m_transitions = s.m_transitions; - } else { - // In-memory scanner, perform deep copy + /// returns an initial state for this scanner + void Initialize(State& state) const { state = m.initial; } + + /// Handles one characters + Action Next(State& state, Char c) const + { + Transition shift = reinterpret_cast<const Transition*>(state)[c]; + state += shift; + return 0; + } + + bool TakeAction(State&, Action) const { return false; } + + SimpleScanner(const SimpleScanner& s): m(s.m) + { + if (!s.m_buffer) { + // Empty or mmap()-ed scanner, just copy pointers + m_buffer = 0; + m_transitions = s.m_transitions; + } else { + // In-memory scanner, perform deep copy m_buffer = BufferType(new char[BufSize()]); memcpy(m_buffer.Get(), s.m_buffer.Get(), BufSize()); Markup(m_buffer.Get()); - - m.initial += (m_transitions - s.m_transitions) * sizeof(Transition); - } - } - - // Makes a shallow ("weak") copy of the given scanner. - // The copied scanner does not maintain lifetime of the original's entrails. - void Alias(const SimpleScanner& s) - { - m = s.m; + + m.initial += (m_transitions - s.m_transitions) * sizeof(Transition); + } + } + + // Makes a shallow ("weak") copy of the given scanner. + // The copied scanner does not maintain lifetime of the original's entrails. + void Alias(const SimpleScanner& s) + { + m = s.m; m_buffer.Reset(); - m_transitions = s.m_transitions; - } - - void Swap(SimpleScanner& s) - { - DoSwap(m_buffer, s.m_buffer); - DoSwap(m.statesCount, s.m.statesCount); - DoSwap(m.initial, s.m.initial); - DoSwap(m_transitions, s.m_transitions); - } - - SimpleScanner& operator = (const SimpleScanner& s) { SimpleScanner(s).Swap(*this); return *this; } - + m_transitions = s.m_transitions; + } + + void Swap(SimpleScanner& s) + { + DoSwap(m_buffer, s.m_buffer); + DoSwap(m.statesCount, s.m.statesCount); + DoSwap(m.initial, s.m.initial); + DoSwap(m_transitions, s.m_transitions); + } + + SimpleScanner& operator = (const SimpleScanner& s) { SimpleScanner(s).Swap(*this); return *this; } + ~SimpleScanner() = default; - - /* - * Constructs the scanner from mmap()-ed memory range, returning a pointer - * to unconsumed part of the buffer. - */ - const void* Mmap(const void* ptr, size_t size) - { - Impl::CheckAlign(ptr); - SimpleScanner s; - - const size_t* p = reinterpret_cast<const size_t*>(ptr); + + /* + * Constructs the scanner from mmap()-ed memory range, returning a pointer + * to unconsumed part of the buffer. + */ + const void* Mmap(const void* ptr, size_t size) + { + Impl::CheckAlign(ptr); + SimpleScanner s; + + const size_t* p = reinterpret_cast<const size_t*>(ptr); Impl::ValidateHeader(p, size, ScannerIOTypes::SimpleScanner, sizeof(m)); - if (size < sizeof(s.m)) - throw Error("EOF reached while mapping NPire::Scanner"); - - memcpy(&s.m, p, sizeof(s.m)); - Impl::AdvancePtr(p, size, sizeof(s.m)); - Impl::AlignPtr(p, size); - - bool empty = *((const bool*) p); - Impl::AdvancePtr(p, size, sizeof(empty)); - Impl::AlignPtr(p, size); - - if (empty) - s.Alias(Null()); - else { - if (size < s.BufSize()) - throw Error("EOF reached while mapping NPire::Scanner"); - s.Markup(const_cast<size_t*>(p)); - s.m.initial += reinterpret_cast<size_t>(s.m_transitions); - - Swap(s); - Impl::AdvancePtr(p, size, BufSize()); - } - return Impl::AlignPtr(p, size); - } - - size_t StateIndex(State s) const - { - return (s - reinterpret_cast<size_t>(m_transitions)) / (STATE_ROW_SIZE * sizeof(Transition)); - } - - // Returns the size of the memory buffer used (or required) by scanner. - size_t BufSize() const - { - return STATE_ROW_SIZE * m.statesCount * sizeof(Transition); // Transitions table - } - - void Save(yostream*) const; - void Load(yistream*); - -protected: - struct Locals { - size_t statesCount; - size_t initial; - } m; - + if (size < sizeof(s.m)) + throw Error("EOF reached while mapping NPire::Scanner"); + + memcpy(&s.m, p, sizeof(s.m)); + Impl::AdvancePtr(p, size, sizeof(s.m)); + Impl::AlignPtr(p, size); + + bool empty = *((const bool*) p); + Impl::AdvancePtr(p, size, sizeof(empty)); + Impl::AlignPtr(p, size); + + if (empty) + s.Alias(Null()); + else { + if (size < s.BufSize()) + throw Error("EOF reached while mapping NPire::Scanner"); + s.Markup(const_cast<size_t*>(p)); + s.m.initial += reinterpret_cast<size_t>(s.m_transitions); + + Swap(s); + Impl::AdvancePtr(p, size, BufSize()); + } + return Impl::AlignPtr(p, size); + } + + size_t StateIndex(State s) const + { + return (s - reinterpret_cast<size_t>(m_transitions)) / (STATE_ROW_SIZE * sizeof(Transition)); + } + + // Returns the size of the memory buffer used (or required) by scanner. + size_t BufSize() const + { + return STATE_ROW_SIZE * m.statesCount * sizeof(Transition); // Transitions table + } + + void Save(yostream*) const; + void Load(yistream*); + +protected: + struct Locals { + size_t statesCount; + size_t initial; + } m; + using BufferType = TArrayHolder<char>; BufferType m_buffer; - - Transition* m_transitions; - - inline static const SimpleScanner& Null() - { - static const SimpleScanner n = Fsm::MakeFalse().Compile<SimpleScanner>(); - return n; - } - + + Transition* m_transitions; + + inline static const SimpleScanner& Null() + { + static const SimpleScanner n = Fsm::MakeFalse().Compile<SimpleScanner>(); + return n; + } + static ypair<const size_t*, const size_t*> Accept() { static size_t v[1] = { 0 }; @@ -197,65 +197,65 @@ protected: return ymake_pair(v, v); } - /* - * Initializes pointers depending on buffer start, letters and states count - */ - void Markup(void* ptr) - { - m_transitions = reinterpret_cast<Transition*>(ptr); - } - - void SetJump(size_t oldState, Char c, size_t newState) - { + /* + * Initializes pointers depending on buffer start, letters and states count + */ + void Markup(void* ptr) + { + m_transitions = reinterpret_cast<Transition*>(ptr); + } + + void SetJump(size_t oldState, Char c, size_t newState) + { Y_ASSERT(m_buffer); Y_ASSERT(oldState < m.statesCount); Y_ASSERT(newState < m.statesCount); - m_transitions[oldState * STATE_ROW_SIZE + 1 + c] - = (((newState - oldState) * STATE_ROW_SIZE) * sizeof(Transition)); - } - - unsigned long RemapAction(unsigned long action) { return action; } - - void SetInitial(size_t state) - { + m_transitions[oldState * STATE_ROW_SIZE + 1 + c] + = (((newState - oldState) * STATE_ROW_SIZE) * sizeof(Transition)); + } + + unsigned long RemapAction(unsigned long action) { return action; } + + void SetInitial(size_t state) + { Y_ASSERT(m_buffer); - m.initial = reinterpret_cast<size_t>(m_transitions + state * STATE_ROW_SIZE + 1); - } - - void SetTag(size_t state, size_t tag) - { + m.initial = reinterpret_cast<size_t>(m_transitions + state * STATE_ROW_SIZE + 1); + } + + void SetTag(size_t state, size_t tag) + { Y_ASSERT(m_buffer); - m_transitions[state * STATE_ROW_SIZE] = tag; - } - -}; + m_transitions[state * STATE_ROW_SIZE] = tag; + } + +}; inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) -{ +{ if (distance) { fsm = CreateApproxFsm(fsm, distance); } - fsm.Canonize(); + fsm.Canonize(); - m.statesCount = fsm.Size(); + m.statesCount = fsm.Size(); m_buffer = BufferType(new char[BufSize()]); memset(m_buffer.Get(), 0, BufSize()); Markup(m_buffer.Get()); - m.initial = reinterpret_cast<size_t>(m_transitions + fsm.Initial() * STATE_ROW_SIZE + 1); - for (size_t state = 0; state < fsm.Size(); ++state) - SetTag(state, fsm.Tag(state) | (fsm.IsFinal(state) ? 1 : 0)); - - for (size_t from = 0; from != fsm.Size(); ++from) + m.initial = reinterpret_cast<size_t>(m_transitions + fsm.Initial() * STATE_ROW_SIZE + 1); + for (size_t state = 0; state < fsm.Size(); ++state) + SetTag(state, fsm.Tag(state) | (fsm.IsFinal(state) ? 1 : 0)); + + for (size_t from = 0; from != fsm.Size(); ++from) for (auto&& i : fsm.Letters()) { const auto& tos = fsm.Destinations(from, i.first); - if (tos.empty()) - continue; + if (tos.empty()) + continue; for (auto&& l : i.second.second) for (auto&& to : tos) SetJump(from, l, to); - } -} - - -} + } +} + -#endif +} + +#endif diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h index 6adfcb8c1d..fa449bb1c5 100644 --- a/contrib/libs/pire/pire/scanners/slow.h +++ b/contrib/libs/pire/pire/scanners/slow.h @@ -1,29 +1,29 @@ -/* - * slow.h -- definition of the SlowScanner +/* + * slow.h -- definition of the SlowScanner + * + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_SCANNERS_SLOW_H -#define PIRE_SCANNERS_SLOW_H - + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_SCANNERS_SLOW_H +#define PIRE_SCANNERS_SLOW_H + #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/partition.h> #include <contrib/libs/pire/pire/vbitset.h> @@ -32,271 +32,271 @@ #include <contrib/libs/pire/pire/stub/saveload.h> #include <contrib/libs/pire/pire/stub/stl.h> -#include "common.h" - -#ifdef PIRE_DEBUG -#include <iostream> +#include "common.h" + +#ifdef PIRE_DEBUG +#include <iostream> #include <contrib/libs/pire/pire/stub/lexical_cast.h> -#endif - -namespace Pire { - -/** - * A 'slow' scanner. - * Takes O( str.length() * this->m_states.size() ) time to scan string, - * but does not require FSM to be deterministic. - * Thus can be used to handle something sorta /x.{40}$/, - * where deterministic FSM contains 2^40 states and hence cannot fit - * in memory. - */ -class SlowScanner { -public: - typedef size_t Transition; - typedef ui16 Letter; - typedef ui32 Action; - typedef ui8 Tag; - +#endif + +namespace Pire { + +/** + * A 'slow' scanner. + * Takes O( str.length() * this->m_states.size() ) time to scan string, + * but does not require FSM to be deterministic. + * Thus can be used to handle something sorta /x.{40}$/, + * where deterministic FSM contains 2^40 states and hence cannot fit + * in memory. + */ +class SlowScanner { +public: + typedef size_t Transition; + typedef ui16 Letter; + typedef ui32 Action; + typedef ui8 Tag; + enum { - FinalFlag = 1, - DeadFlag = 0 - }; - - struct State { + FinalFlag = 1, + DeadFlag = 0 + }; + + struct State { TVector<unsigned> states; - BitSet flags; - - State() {} - State(size_t size): flags(size) { states.reserve(size); } - void Swap(State& s) { states.swap(s.states); flags.Swap(s.flags); } - -#ifdef PIRE_DEBUG - friend yostream& operator << (yostream& stream, const State& state) { return stream << Join(state.states.begin(), state.states.end(), ", "); } -#endif - }; - + BitSet flags; + + State() {} + State(size_t size): flags(size) { states.reserve(size); } + void Swap(State& s) { states.swap(s.states); flags.Swap(s.flags); } + +#ifdef PIRE_DEBUG + friend yostream& operator << (yostream& stream, const State& state) { return stream << Join(state.states.begin(), state.states.end(), ", "); } +#endif + }; + SlowScanner(bool needActions = false) { Alias(Null()); need_actions = needActions; } - + size_t GetLettersCount() const {return m.lettersCount; }; size_t Size() const { return GetSize(); } size_t GetSize() const { return m.statesCount; } - bool Empty() const { return m_finals == Null().m_finals; } - - size_t Id() const {return (size_t) -1;} - size_t RegexpsCount() const { return Empty() ? 0 : 1; } - - void Initialize(State& state) const - { - state.states.clear(); - state.states.reserve(m.statesCount); - state.states.push_back(m.start); - BitSet(m.statesCount).Swap(state.flags); - } - + bool Empty() const { return m_finals == Null().m_finals; } + + size_t Id() const {return (size_t) -1;} + size_t RegexpsCount() const { return Empty() ? 0 : 1; } + + void Initialize(State& state) const + { + state.states.clear(); + state.states.reserve(m.statesCount); + state.states.push_back(m.start); + BitSet(m.statesCount).Swap(state.flags); + } + Char Translate(Char ch) const - { + { return m_letters[static_cast<size_t>(ch)]; } Action NextTranslated(const State& current, State& next, Char l) const { - next.flags.Clear(); - next.states.clear(); + next.flags.Clear(); + next.states.clear(); for (auto&& state : current.states) { - const unsigned* begin = 0; - const unsigned* end = 0; - if (!m_vecptr) { + const unsigned* begin = 0; + const unsigned* end = 0; + if (!m_vecptr) { const size_t* pos = m_jumpPos + state * m.lettersCount + l; - begin = m_jumps + pos[0]; - end = m_jumps + pos[1]; - } else { + begin = m_jumps + pos[0]; + end = m_jumps + pos[1]; + } else { const auto& v = (*m_vecptr)[state * m.lettersCount + l]; - if (!v.empty()) { - begin = &v[0]; - end = &v[0] + v.size(); - } - } - - for (; begin != end; ++begin) - if (!next.flags.Test(*begin)) { - next.flags.Set(*begin); - next.states.push_back(*begin); - } - } - - return 0; - } - + if (!v.empty()) { + begin = &v[0]; + end = &v[0] + v.size(); + } + } + + for (; begin != end; ++begin) + if (!next.flags.Test(*begin)) { + next.flags.Set(*begin); + next.states.push_back(*begin); + } + } + + return 0; + } + Action Next(const State& current, State& next, Char c) const { return NextTranslated(current, next, Translate(c)); } - bool TakeAction(State&, Action) const { return false; } - + bool TakeAction(State&, Action) const { return false; } + Action NextTranslated(State& s, Char l) const - { - State dest(m.statesCount); + { + State dest(m.statesCount); Action a = NextTranslated(s, dest, l); - s.Swap(dest); - return a; - } - + s.Swap(dest); + return a; + } + Action Next(State& s, Char c) const { return NextTranslated(s, Translate(c)); } - bool Final(const State& s) const - { + bool Final(const State& s) const + { for (auto&& state : s.states) if (m_finals[state]) - return true; - return false; - } - - bool Dead(const State&) const - { - return false; - } - - ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const { - return Final(s) ? Accept() : Deny(); - } - - bool CanStop(const State& s) const { - return Final(s); - } - - const void* Mmap(const void* ptr, size_t size) - { - Impl::CheckAlign(ptr); - SlowScanner s; - const size_t* p = reinterpret_cast<const size_t*>(ptr); - + return true; + return false; + } + + bool Dead(const State&) const + { + return false; + } + + ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const { + return Final(s) ? Accept() : Deny(); + } + + bool CanStop(const State& s) const { + return Final(s); + } + + const void* Mmap(const void* ptr, size_t size) + { + Impl::CheckAlign(ptr); + SlowScanner s; + const size_t* p = reinterpret_cast<const size_t*>(ptr); + Impl::ValidateHeader(p, size, ScannerIOTypes::SlowScanner, sizeof(s.m)); - Locals* locals; - Impl::MapPtr(locals, 1, p, size); - memcpy(&s.m, locals, sizeof(s.m)); - - bool empty = *((const bool*) p); - Impl::AdvancePtr(p, size, sizeof(empty)); - Impl::AlignPtr(p, size); - - if (empty) - s.Alias(Null()); - else { - s.m_vecptr = 0; - Impl::MapPtr(s.m_letters, MaxChar, p, size); - Impl::MapPtr(s.m_finals, s.m.statesCount, p, size); - Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size); - Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); + Locals* locals; + Impl::MapPtr(locals, 1, p, size); + memcpy(&s.m, locals, sizeof(s.m)); + + bool empty = *((const bool*) p); + Impl::AdvancePtr(p, size, sizeof(empty)); + Impl::AlignPtr(p, size); + + if (empty) + s.Alias(Null()); + else { + s.m_vecptr = 0; + Impl::MapPtr(s.m_letters, MaxChar, p, size); + Impl::MapPtr(s.m_finals, s.m.statesCount, p, size); + Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size); + Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); if (need_actions) Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); - Swap(s); - } - return (const void*) p; - } - - void Swap(SlowScanner& s) - { - DoSwap(m_finals, s.m_finals); - DoSwap(m_jumps, s.m_jumps); + Swap(s); + } + return (const void*) p; + } + + void Swap(SlowScanner& s) + { + DoSwap(m_finals, s.m_finals); + DoSwap(m_jumps, s.m_jumps); DoSwap(m_actions, s.m_actions); - DoSwap(m_jumpPos, s.m_jumpPos); - DoSwap(m.statesCount, s.m.statesCount); - DoSwap(m.lettersCount, s.m.lettersCount); - DoSwap(m.start, s.m.start); - DoSwap(m_letters, s.m_letters); - DoSwap(m_pool, s.m_pool); - DoSwap(m_vec, s.m_vec); - - DoSwap(m_vecptr, s.m_vecptr); + DoSwap(m_jumpPos, s.m_jumpPos); + DoSwap(m.statesCount, s.m.statesCount); + DoSwap(m.lettersCount, s.m.lettersCount); + DoSwap(m.start, s.m.start); + DoSwap(m_letters, s.m_letters); + DoSwap(m_pool, s.m_pool); + DoSwap(m_vec, s.m_vec); + + DoSwap(m_vecptr, s.m_vecptr); DoSwap(need_actions, s.need_actions); DoSwap(m_actionsvec, s.m_actionsvec); - if (m_vecptr == &s.m_vec) - m_vecptr = &m_vec; - if (s.m_vecptr == &m_vec) - s.m_vecptr = &s.m_vec; - } - - SlowScanner(const SlowScanner& s) - : m(s.m) - , m_vec(s.m_vec) + if (m_vecptr == &s.m_vec) + m_vecptr = &m_vec; + if (s.m_vecptr == &m_vec) + s.m_vecptr = &s.m_vec; + } + + SlowScanner(const SlowScanner& s) + : m(s.m) + , m_vec(s.m_vec) , need_actions(s.need_actions) , m_actionsvec(s.m_actionsvec) - { - if (s.m_vec.empty()) { - // Empty or mmap()-ed scanner, just copy pointers - m_finals = s.m_finals; - m_jumps = s.m_jumps; + { + if (s.m_vec.empty()) { + // Empty or mmap()-ed scanner, just copy pointers + m_finals = s.m_finals; + m_jumps = s.m_jumps; m_actions = s.m_actions; - m_jumpPos = s.m_jumpPos; - m_letters = s.m_letters; - m_vecptr = 0; - } else { - // In-memory scanner, perform deep copy - alloc(m_letters, MaxChar); - memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar); - m_jumps = 0; - m_jumpPos = 0; + m_jumpPos = s.m_jumpPos; + m_letters = s.m_letters; + m_vecptr = 0; + } else { + // In-memory scanner, perform deep copy + alloc(m_letters, MaxChar); + memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar); + m_jumps = 0; + m_jumpPos = 0; m_actions = 0; - alloc(m_finals, m.statesCount); - memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount); - m_vecptr = &m_vec; - } - } + alloc(m_finals, m.statesCount); + memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount); + m_vecptr = &m_vec; + } + } explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) : need_actions(needActions) - { + { if (distance) { fsm = CreateApproxFsm(fsm, distance); } if (removeEpsilons) fsm.RemoveEpsilons(); fsm.Sparse(!removeEpsilons); - - m.statesCount = fsm.Size(); - m.lettersCount = fsm.Letters().Size(); - - m_vec.resize(m.statesCount * m.lettersCount); + + m.statesCount = fsm.Size(); + m.lettersCount = fsm.Letters().Size(); + + m_vec.resize(m.statesCount * m.lettersCount); if (need_actions) m_actionsvec.resize(m.statesCount * m.lettersCount); - m_vecptr = &m_vec; - alloc(m_letters, MaxChar); - m_jumps = 0; + m_vecptr = &m_vec; + alloc(m_letters, MaxChar); + m_jumps = 0; m_actions = 0; - m_jumpPos = 0; - alloc(m_finals, m.statesCount); - - // Build letter translation table + m_jumpPos = 0; + alloc(m_finals, m.statesCount); + + // Build letter translation table Fill(m_letters, m_letters + MaxChar, 0); for (auto&& letter : fsm.Letters()) for (auto&& character : letter.second.second) m_letters[character] = letter.second.first; - - m.start = fsm.Initial(); - BuildScanner(fsm, *this); - } - - - SlowScanner& operator = (const SlowScanner& s) { SlowScanner(s).Swap(*this); return *this; } - - ~SlowScanner() - { + + m.start = fsm.Initial(); + BuildScanner(fsm, *this); + } + + + SlowScanner& operator = (const SlowScanner& s) { SlowScanner(s).Swap(*this); return *this; } + + ~SlowScanner() + { for (auto&& i : m_pool) free(i); - } - - void Save(yostream*) const; - void Load(yistream*); - - const State& StateIndex(const State& s) const { return s; } - + } + + void Save(yostream*) const; + void Load(yistream*); + + const State& StateIndex(const State& s) const { return s; } + protected: bool IsMmaped() const { @@ -338,84 +338,84 @@ protected: return m_finals[pos]; } -private: - - struct Locals { - size_t statesCount; - size_t lettersCount; - size_t start; - } m; - - bool* m_finals; - unsigned* m_jumps; +private: + + struct Locals { + size_t statesCount; + size_t lettersCount; + size_t start; + } m; + + bool* m_finals; + unsigned* m_jumps; Action* m_actions; - size_t* m_jumpPos; - size_t* m_letters; - + size_t* m_jumpPos; + size_t* m_letters; + TVector<void*> m_pool; TVector< TVector<unsigned> > m_vec, *m_vecptr; - + bool need_actions; TVector<TVector<Action>> m_actionsvec; static const SlowScanner& Null(); - - template<class T> void alloc(T*& p, size_t size) - { - p = static_cast<T*>(malloc(size * sizeof(T))); - memset(p, 0, size * sizeof(T)); - m_pool.push_back(p); - } - - void Alias(const SlowScanner& s) + + template<class T> void alloc(T*& p, size_t size) + { + p = static_cast<T*>(malloc(size * sizeof(T))); + memset(p, 0, size * sizeof(T)); + m_pool.push_back(p); + } + + void Alias(const SlowScanner& s) { - memcpy(&m, &s.m, sizeof(m)); - m_vec.clear(); + memcpy(&m, &s.m, sizeof(m)); + m_vec.clear(); need_actions = s.need_actions; m_actionsvec.clear(); - m_finals = s.m_finals; - m_jumps = s.m_jumps; + m_finals = s.m_finals; + m_jumps = s.m_jumps; m_actions = s.m_actions; - m_jumpPos = s.m_jumpPos; - m_letters = s.m_letters; - m_vecptr = s.m_vecptr; - m_pool.clear(); - } + m_jumpPos = s.m_jumpPos; + m_letters = s.m_letters; + m_vecptr = s.m_vecptr; + m_pool.clear(); + } void SetJump(size_t oldState, Char c, size_t newState, unsigned long action) - { + { Y_ASSERT(!m_vec.empty()); Y_ASSERT(oldState < m.statesCount); Y_ASSERT(newState < m.statesCount); - - size_t idx = oldState * m.lettersCount + m_letters[c]; - m_vec[idx].push_back(newState); + + size_t idx = oldState * m.lettersCount + m_letters[c]; + m_vec[idx].push_back(newState); if (need_actions) m_actionsvec[idx].push_back(action); - } - - unsigned long RemapAction(unsigned long action) { return action; } - - void SetInitial(size_t state) { m.start = state; } - void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); } - - void FinishBuild() {} - - static ypair<const size_t*, const size_t*> Accept() - { - static size_t v[1] = { 0 }; - - return ymake_pair(v, v + 1); - } - - static ypair<const size_t*, const size_t*> Deny() - { - static size_t v[1] = { 0 }; - return ymake_pair(v, v); - } - - friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&); -}; - + } + + unsigned long RemapAction(unsigned long action) { return action; } + + void SetInitial(size_t state) { m.start = state; } + void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); } + + void FinishBuild() {} + + static ypair<const size_t*, const size_t*> Accept() + { + static size_t v[1] = { 0 }; + + return ymake_pair(v, v + 1); + } + + static ypair<const size_t*, const size_t*> Deny() + { + static size_t v[1] = { 0 }; + return ymake_pair(v, v); + } + + friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&); +}; + template<> inline SlowScanner Fsm::Compile(size_t distance) { return SlowScanner(*this, false, true, distance); @@ -428,27 +428,27 @@ inline const SlowScanner& SlowScanner::Null() } #ifndef PIRE_DEBUG -/// A specialization of Run(), since its state is much heavier than other ones -/// and we thus want to avoid copying states. -template<> +/// A specialization of Run(), since its state is much heavier than other ones +/// and we thus want to avoid copying states. +template<> inline void Run<SlowScanner>(const SlowScanner& scanner, SlowScanner::State& state, TStringBuf str) -{ - SlowScanner::State temp; - scanner.Initialize(temp); - - SlowScanner::State* src = &state; - SlowScanner::State* dest = &temp; - +{ + SlowScanner::State temp; + scanner.Initialize(temp); + + SlowScanner::State* src = &state; + SlowScanner::State* dest = &temp; + for (auto it = str.begin(); it != str.end(); ++it) { scanner.Next(*src, *dest, static_cast<unsigned char>(*it)); - DoSwap(src, dest); - } - if (src != &state) - state = *src; -} -#endif - -} - - -#endif + DoSwap(src, dest); + } + if (src != &state) + state = *src; +} +#endif + +} + + +#endif diff --git a/contrib/libs/pire/pire/static_assert.h b/contrib/libs/pire/pire/static_assert.h index 90dd0ff4f0..f56a899ae7 100644 --- a/contrib/libs/pire/pire/static_assert.h +++ b/contrib/libs/pire/pire/static_assert.h @@ -1,36 +1,36 @@ -/* - * static_assert.h -- compile-time assertions - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * static_assert.h -- compile-time assertions * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - -#ifndef PIRE_ASSERT_H_INCLUDED -#define PIRE_ASSERT_H_INCLUDED - -namespace Pire { namespace Impl { - - // A static (compile-tile) assertion. - // The idea was shamelessly borrowed from Boost. - template<bool x> struct StaticAssertion; - template<> struct StaticAssertion<true> {}; -#define PIRE_STATIC_ASSERT(x) \ - enum { PireStaticAssertion ## __LINE__ = sizeof(Pire::Impl::StaticAssertion<(bool) (x)>) } -}} - -#endif + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + +#ifndef PIRE_ASSERT_H_INCLUDED +#define PIRE_ASSERT_H_INCLUDED + +namespace Pire { namespace Impl { + + // A static (compile-tile) assertion. + // The idea was shamelessly borrowed from Boost. + template<bool x> struct StaticAssertion; + template<> struct StaticAssertion<true> {}; +#define PIRE_STATIC_ASSERT(x) \ + enum { PireStaticAssertion ## __LINE__ = sizeof(Pire::Impl::StaticAssertion<(bool) (x)>) } +}} + +#endif diff --git a/contrib/libs/pire/pire/stub/defaults.h b/contrib/libs/pire/pire/stub/defaults.h index 561c23251b..50900a8731 100644 --- a/contrib/libs/pire/pire/stub/defaults.h +++ b/contrib/libs/pire/pire/stub/defaults.h @@ -1,3 +1,3 @@ -#include <util/system/defaults.h> -#include <errno.h> - +#include <util/system/defaults.h> +#include <errno.h> + diff --git a/contrib/libs/pire/pire/stub/hacks.h b/contrib/libs/pire/pire/stub/hacks.h index 07319b0b37..6172a0ee2a 100644 --- a/contrib/libs/pire/pire/stub/hacks.h +++ b/contrib/libs/pire/pire/stub/hacks.h @@ -1,7 +1,7 @@ -#ifndef PIRE_STUB_USE_PIRE_H_INCLUDED -#define PIRE_STUB_USE_PIRE_H_INCLUDED - -namespace Pire {} -using namespace Pire; - -#endif +#ifndef PIRE_STUB_USE_PIRE_H_INCLUDED +#define PIRE_STUB_USE_PIRE_H_INCLUDED + +namespace Pire {} +using namespace Pire; + +#endif diff --git a/contrib/libs/pire/pire/stub/lexical_cast.h b/contrib/libs/pire/pire/stub/lexical_cast.h index a060c4dddb..68d6dc92ae 100644 --- a/contrib/libs/pire/pire/stub/lexical_cast.h +++ b/contrib/libs/pire/pire/stub/lexical_cast.h @@ -1 +1 @@ -#include <util/string/cast.h> +#include <util/string/cast.h> diff --git a/contrib/libs/pire/pire/stub/memstreams.h b/contrib/libs/pire/pire/stub/memstreams.h index 92c75ca6c9..5cfd9a7896 100644 --- a/contrib/libs/pire/pire/stub/memstreams.h +++ b/contrib/libs/pire/pire/stub/memstreams.h @@ -1,11 +1,11 @@ -#include <util/stream/mem.h> -#include <util/stream/aligned.h> -#include <util/stream/buffer.h> -#include <util/generic/buffer.h> - -namespace Pire { - typedef TBuffer Buffer; - typedef TBuffer::TIterator BufferIterator; - typedef TBufferOutput BufferOutput; - typedef TAlignedOutput AlignedOutput; +#include <util/stream/mem.h> +#include <util/stream/aligned.h> +#include <util/stream/buffer.h> +#include <util/generic/buffer.h> + +namespace Pire { + typedef TBuffer Buffer; + typedef TBuffer::TIterator BufferIterator; + typedef TBufferOutput BufferOutput; + typedef TAlignedOutput AlignedOutput; } diff --git a/contrib/libs/pire/pire/stub/noncopyable.h b/contrib/libs/pire/pire/stub/noncopyable.h index ab18546e51..1791f43638 100644 --- a/contrib/libs/pire/pire/stub/noncopyable.h +++ b/contrib/libs/pire/pire/stub/noncopyable.h @@ -1,5 +1,5 @@ -#pragma once -#include <util/generic/noncopyable.h> -namespace Pire { - typedef TNonCopyable NonCopyable; -} +#pragma once +#include <util/generic/noncopyable.h> +namespace Pire { + typedef TNonCopyable NonCopyable; +} diff --git a/contrib/libs/pire/pire/stub/saveload.h b/contrib/libs/pire/pire/stub/saveload.h index 97768ff463..6808c7a400 100644 --- a/contrib/libs/pire/pire/stub/saveload.h +++ b/contrib/libs/pire/pire/stub/saveload.h @@ -1,2 +1,2 @@ -#pragma once -#include <util/ysaveload.h> +#pragma once +#include <util/ysaveload.h> diff --git a/contrib/libs/pire/pire/stub/singleton.h b/contrib/libs/pire/pire/stub/singleton.h index f24e924460..193817f100 100644 --- a/contrib/libs/pire/pire/stub/singleton.h +++ b/contrib/libs/pire/pire/stub/singleton.h @@ -1,8 +1,8 @@ -#pragma once -#include <util/generic/singleton.h> -namespace Pire { - template<class T> - const T& DefaultValue() { - return Default<T>(); - } -} +#pragma once +#include <util/generic/singleton.h> +namespace Pire { + template<class T> + const T& DefaultValue() { + return Default<T>(); + } +} diff --git a/contrib/libs/pire/pire/stub/stl.h b/contrib/libs/pire/pire/stub/stl.h index 98ebd9f7c6..705981a7e6 100644 --- a/contrib/libs/pire/pire/stub/stl.h +++ b/contrib/libs/pire/pire/stub/stl.h @@ -1,66 +1,66 @@ -#ifndef PIRE_COMPAT_H_INCLUDED -#define PIRE_COMPAT_H_INCLUDED - -#include <bitset> -#include <algorithm> -#include <iterator> -#include <functional> -#include <utility> -#include <memory> - +#ifndef PIRE_COMPAT_H_INCLUDED +#define PIRE_COMPAT_H_INCLUDED + +#include <bitset> +#include <algorithm> +#include <iterator> +#include <functional> +#include <utility> +#include <memory> + #include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/generic/deque.h> -#include <util/generic/list.h> -#include <util/generic/map.h> -#include <util/generic/set.h> -#include <util/generic/hash.h> -#include <util/generic/hash_set.h> -#include <util/generic/ptr.h> -#include <util/generic/yexception.h> -#include <util/generic/utility.h> -#include <util/generic/algorithm.h> -#include <util/stream/input.h> -#include <util/stream/output.h> +#include <util/generic/vector.h> +#include <util/generic/deque.h> +#include <util/generic/list.h> +#include <util/generic/map.h> +#include <util/generic/set.h> +#include <util/generic/hash.h> +#include <util/generic/hash_set.h> +#include <util/generic/ptr.h> +#include <util/generic/yexception.h> +#include <util/generic/utility.h> +#include <util/generic/algorithm.h> +#include <util/stream/input.h> +#include <util/stream/output.h> #include <util/string/reverse.h> -#include <util/string/vector.h> - +#include <util/string/vector.h> + namespace Pire { using ystring = TString; template<size_t N> using ybitset = std::bitset<N>; template<typename T1, typename T2> using ypair = std::pair<T1, T2>; template<typename T> using yauto_ptr = std::auto_ptr<T>; template<typename Arg1, typename Arg2, typename Result> using ybinary_function = std::binary_function<Arg1, Arg2, Result>; - + template<typename T1, typename T2> inline ypair<T1, T2> ymake_pair(T1 v1, T2 v2) { return std::make_pair(v1, v2); } - + template<typename T> inline T ymax(T v1, T v2) { return std::max(v1, v2); } - + template<typename T> inline T ymin(T v1, T v2) { return std::min(v1, v2); } - + template<class Iter, class T> void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); } - + class Error: public yexception { public: Error(const char* msg) { *this << msg; } Error(const ystring& msg) { *this << msg; } }; - + typedef IOutputStream yostream; typedef IInputStream yistream; template<class Iter> ystring Join(Iter begin, Iter end, const ystring& separator) { return JoinStrings(begin, end, separator); } -} - -#endif +} + +#endif diff --git a/contrib/libs/pire/pire/stub/utf8.h b/contrib/libs/pire/pire/stub/utf8.h index 51ea0479d4..189520d2cb 100644 --- a/contrib/libs/pire/pire/stub/utf8.h +++ b/contrib/libs/pire/pire/stub/utf8.h @@ -1,7 +1,7 @@ #pragma once #include <library/cpp/charset/codepage.h> -#include <util/charset/unidata.h> +#include <util/charset/unidata.h> inline wchar32 to_lower(wchar32 c) { return ToLower(c); diff --git a/contrib/libs/pire/pire/vbitset.h b/contrib/libs/pire/pire/vbitset.h index 69cb5aeba3..904c27d1cb 100644 --- a/contrib/libs/pire/pire/vbitset.h +++ b/contrib/libs/pire/pire/vbitset.h @@ -1,120 +1,120 @@ -/* - * vbitset.h -- a bitset of variable size. - * - * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, - * Alexander Gololobov <agololobov@gmail.com> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* + * vbitset.h -- a bitset of variable size. * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#ifndef PIRE_VBITSET_H -#define PIRE_VBITSET_H - - -#include <string.h> - + * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>, + * Alexander Gololobov <agololobov@gmail.com> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#ifndef PIRE_VBITSET_H +#define PIRE_VBITSET_H + + +#include <string.h> + #include <contrib/libs/pire/pire/stub/stl.h> -namespace Pire { - -#ifdef _DEBUG -#define VBITSET_CHECK_SIZE(x) CheckSize(x) -#else -#define VBITSET_CHECK_SIZE(x) x -#endif - -/// A bitset with variable width -class BitSet { -public: - typedef size_t value_type; - typedef size_t* pointer; - typedef size_t& reference; - typedef const size_t& const_reference; - - class const_iterator; - - BitSet() - : m_data(1, 1) - { - } - BitSet(size_t size) - : m_data(RoundUp(size + 1) + 1) - , m_size(size) - { - m_data[RoundDown(size)] |= (1U << Remainder(size)); - } - - void Swap(BitSet& s) - { - m_data.swap(s.m_data); - DoSwap(m_size, s.m_size); - } - - /// Sets the specified bit to 1. - void Set(size_t pos) { - m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] |= (1U << Remainder(pos)); - } - - /// Resets the specified bit to 0. - void Reset(size_t pos) { - m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] &= ~(1U << Remainder(pos)); - } - - /// Checks whether the specified bit is set to 1. - bool Test(size_t pos) const { - return (m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] & (1U << Remainder(pos))) != 0; - } - - size_t Size() const { - return m_size; - } - - void Resize(size_t newsize) - { - m_data.resize(RoundUp(newsize + 1)); - if (Remainder(newsize) && !m_data.empty()) - m_data[m_data.size() - 1] &= ((1U << Remainder(newsize)) - 1); // Clear tail - m_data[RoundDown(newsize)] |= (1U << Remainder(newsize)); - } - - /// Resets all bits to 0. - void Clear() { memset(&m_data[0], 0, m_data.size() * sizeof(ContainerType)); } - -private: - typedef unsigned char ContainerType; - static const size_t ItemSize = sizeof(ContainerType) * 8; +namespace Pire { + +#ifdef _DEBUG +#define VBITSET_CHECK_SIZE(x) CheckSize(x) +#else +#define VBITSET_CHECK_SIZE(x) x +#endif + +/// A bitset with variable width +class BitSet { +public: + typedef size_t value_type; + typedef size_t* pointer; + typedef size_t& reference; + typedef const size_t& const_reference; + + class const_iterator; + + BitSet() + : m_data(1, 1) + { + } + BitSet(size_t size) + : m_data(RoundUp(size + 1) + 1) + , m_size(size) + { + m_data[RoundDown(size)] |= (1U << Remainder(size)); + } + + void Swap(BitSet& s) + { + m_data.swap(s.m_data); + DoSwap(m_size, s.m_size); + } + + /// Sets the specified bit to 1. + void Set(size_t pos) { + m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] |= (1U << Remainder(pos)); + } + + /// Resets the specified bit to 0. + void Reset(size_t pos) { + m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] &= ~(1U << Remainder(pos)); + } + + /// Checks whether the specified bit is set to 1. + bool Test(size_t pos) const { + return (m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] & (1U << Remainder(pos))) != 0; + } + + size_t Size() const { + return m_size; + } + + void Resize(size_t newsize) + { + m_data.resize(RoundUp(newsize + 1)); + if (Remainder(newsize) && !m_data.empty()) + m_data[m_data.size() - 1] &= ((1U << Remainder(newsize)) - 1); // Clear tail + m_data[RoundDown(newsize)] |= (1U << Remainder(newsize)); + } + + /// Resets all bits to 0. + void Clear() { memset(&m_data[0], 0, m_data.size() * sizeof(ContainerType)); } + +private: + typedef unsigned char ContainerType; + static const size_t ItemSize = sizeof(ContainerType) * 8; TVector<ContainerType> m_data; - size_t m_size; - - static size_t RoundUp(size_t x) { return x / ItemSize + ((x % ItemSize) ? 1 : 0); } - static size_t RoundDown(size_t x) { return x / ItemSize; } - static size_t Remainder(size_t x) { return x % ItemSize; } - -#ifdef _DEBUG - size_t CheckSize(size_t size) const - { - if (size < m_size) - return size; - else - throw Error("BitSet: subscript out of range"); - } -#endif -}; - -} - -#endif + size_t m_size; + + static size_t RoundUp(size_t x) { return x / ItemSize + ((x % ItemSize) ? 1 : 0); } + static size_t RoundDown(size_t x) { return x / ItemSize; } + static size_t Remainder(size_t x) { return x % ItemSize; } + +#ifdef _DEBUG + size_t CheckSize(size_t size) const + { + if (size < m_size) + return size; + else + throw Error("BitSet: subscript out of range"); + } +#endif +}; + +} + +#endif |