aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/pire
diff options
context:
space:
mode:
authorAlexander Gololobov <davenger@yandex-team.com>2022-02-10 16:47:37 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:37 +0300
commit39608cdb86363c75ce55b2b9a69841c3b71f22cf (patch)
tree4ec132c1665bd4d68e3628aa18d937c70d32413b /contrib/libs/pire
parent54295b9bd4dc45c54d804084fd846d945148a7f0 (diff)
downloadydb-39608cdb86363c75ce55b2b9a69841c3b71f22cf.tar.gz
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/pire')
-rw-r--r--contrib/libs/pire/Makefile.am4
-rw-r--r--contrib/libs/pire/README12
-rw-r--r--contrib/libs/pire/configure.ac94
-rw-r--r--contrib/libs/pire/pire/Makefile.am228
-rw-r--r--contrib/libs/pire/pire/align.h198
-rw-r--r--contrib/libs/pire/pire/any.h246
-rw-r--r--contrib/libs/pire/pire/classes.cpp260
-rw-r--r--contrib/libs/pire/pire/defs.h224
-rw-r--r--contrib/libs/pire/pire/determine.h252
-rw-r--r--contrib/libs/pire/pire/easy.cpp60
-rw-r--r--contrib/libs/pire/pire/easy.h444
-rw-r--r--contrib/libs/pire/pire/encoding.cpp198
-rw-r--r--contrib/libs/pire/pire/encoding.h136
-rw-r--r--contrib/libs/pire/pire/extra.h58
-rw-r--r--contrib/libs/pire/pire/extra/capture.cpp166
-rw-r--r--contrib/libs/pire/pire/extra/capture.h264
-rw-r--r--contrib/libs/pire/pire/extra/count.cpp274
-rw-r--r--contrib/libs/pire/pire/extra/count.h218
-rw-r--r--contrib/libs/pire/pire/extra/glyphs.cpp244
-rw-r--r--contrib/libs/pire/pire/extra/glyphs.h78
-rw-r--r--contrib/libs/pire/pire/fsm.cpp1584
-rw-r--r--contrib/libs/pire/pire/fsm.h512
-rw-r--r--contrib/libs/pire/pire/fwd.h82
-rw-r--r--contrib/libs/pire/pire/glue.h306
-rw-r--r--contrib/libs/pire/pire/inline.l510
-rw-r--r--contrib/libs/pire/pire/partition.h324
-rw-r--r--contrib/libs/pire/pire/pire.h64
-rw-r--r--contrib/libs/pire/pire/platform.h316
-rw-r--r--contrib/libs/pire/pire/re_lexer.cpp198
-rw-r--r--contrib/libs/pire/pire/re_lexer.h442
-rw-r--r--contrib/libs/pire/pire/re_parser.y208
-rw-r--r--contrib/libs/pire/pire/run.h604
-rw-r--r--contrib/libs/pire/pire/scanner_io.cpp276
-rw-r--r--contrib/libs/pire/pire/scanners/common.h188
-rw-r--r--contrib/libs/pire/pire/scanners/loaded.h420
-rw-r--r--contrib/libs/pire/pire/scanners/multi.h1986
-rw-r--r--contrib/libs/pire/pire/scanners/null.cpp8
-rw-r--r--contrib/libs/pire/pire/scanners/pair.h196
-rw-r--r--contrib/libs/pire/pire/scanners/simple.h420
-rw-r--r--contrib/libs/pire/pire/scanners/slow.h620
-rw-r--r--contrib/libs/pire/pire/static_assert.h70
-rw-r--r--contrib/libs/pire/pire/stub/defaults.h6
-rw-r--r--contrib/libs/pire/pire/stub/hacks.h14
-rw-r--r--contrib/libs/pire/pire/stub/lexical_cast.h2
-rw-r--r--contrib/libs/pire/pire/stub/memstreams.h20
-rw-r--r--contrib/libs/pire/pire/stub/noncopyable.h10
-rw-r--r--contrib/libs/pire/pire/stub/saveload.h4
-rw-r--r--contrib/libs/pire/pire/stub/singleton.h16
-rw-r--r--contrib/libs/pire/pire/stub/stl.h68
-rw-r--r--contrib/libs/pire/pire/stub/utf8.h2
-rw-r--r--contrib/libs/pire/pire/vbitset.h232
51 files changed, 6683 insertions, 6683 deletions
diff --git a/contrib/libs/pire/Makefile.am b/contrib/libs/pire/Makefile.am
index a9e8908fb6..31eb7b3e7c 100644
--- a/contrib/libs/pire/Makefile.am
+++ b/contrib/libs/pire/Makefile.am
@@ -1,2 +1,2 @@
-ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = pire tests pkg samples
+ACLOCAL_AMFLAGS = -I m4
+SUBDIRS = pire tests pkg samples
diff --git a/contrib/libs/pire/README b/contrib/libs/pire/README
index 1791486f8e..13ed2d6c7a 100644
--- a/contrib/libs/pire/README
+++ b/contrib/libs/pire/README
@@ -1,6 +1,6 @@
-This is PIRE, Perl Incompatible Regular Expressions library.
-
-For detailed information about what it is, how to build and use it,
-see http://wiki.yandex-team.ru/DmitrijjProkopcev/pire .
-
-Please report bugs to dprokoptsev@yandex-team.ru or davenger@yandex-team.ru.
+This is PIRE, Perl Incompatible Regular Expressions library.
+
+For detailed information about what it is, how to build and use it,
+see http://wiki.yandex-team.ru/DmitrijjProkopcev/pire .
+
+Please report bugs to dprokoptsev@yandex-team.ru or davenger@yandex-team.ru.
diff --git a/contrib/libs/pire/configure.ac b/contrib/libs/pire/configure.ac
index 49f235129c..2068c63a7e 100644
--- a/contrib/libs/pire/configure.ac
+++ b/contrib/libs/pire/configure.ac
@@ -1,47 +1,47 @@
-AC_PREREQ([2.63])
-AC_INIT([pire], [0.0.2], [dprokoptsev@yandex-team.ru])
-AM_INIT_AUTOMAKE([foreign -Wall])
-AC_CONFIG_SRCDIR([pire/classes.cpp])
-AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_MACRO_DIR([m4])
-
-AC_LANG_CPLUSPLUS
-
-# Require neccessary binaries to build ourselves
-AC_PROG_CXX
-AC_PROG_CC
-AC_PROG_LEX
-AC_PROG_YACC
-AC_PROG_LIBTOOL
-
-# Check for cppunit
-AM_PATH_CPPUNIT([0.0.0],[with_unittests=yes],[
- AC_WARN([cppunit not found. Unit tests will not compile and run.])
- with_unittests=no
-])
-AM_CONDITIONAL([WITH_UNITTESTS], [test x"$with_unittests" = xyes])
-
-# Just for conscience' sake
-AC_CHECK_HEADERS([stdlib.h string.h sys/time.h])
-AC_HEADER_STDBOOL
-AC_C_INLINE
-AC_TYPE_SIZE_T
-AC_CHECK_TYPES([ptrdiff_t])
-AC_FUNC_ERROR_AT_LINE
-AC_FUNC_MALLOC
-AC_CHECK_FUNCS([memset strchr])
-
-# Require little-endian platform
-AC_C_BIGENDIAN
-if test x"$ac_cv_c_bigendian" = xyes; then
- AC_ERROR([pire has not been ported to big-endian platforms yet.])
-fi
-
-# Optional features
-AC_ARG_ENABLE([extra], AS_HELP_STRING([--enable-extra], [Add extra functionality (capturing scanner, etc...)]))
-AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Make Pire dump all constructed FSMs to std::clog (useless unless debugging Pire)]))
-AM_CONDITIONAL([ENABLE_EXTRA], [test x"$enable_extra" = xyes])
-AM_CONDITIONAL([ENABLE_DEBUG], [test x"$enable_debug" = xyes])
-
-AC_CONFIG_FILES([Makefile pire/Makefile tests/Makefile pkg/Makefile samples/Makefile samples/bench/Makefile])
-AC_OUTPUT
+AC_PREREQ([2.63])
+AC_INIT([pire], [0.0.2], [dprokoptsev@yandex-team.ru])
+AM_INIT_AUTOMAKE([foreign -Wall])
+AC_CONFIG_SRCDIR([pire/classes.cpp])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_MACRO_DIR([m4])
+
+AC_LANG_CPLUSPLUS
+
+# Require neccessary binaries to build ourselves
+AC_PROG_CXX
+AC_PROG_CC
+AC_PROG_LEX
+AC_PROG_YACC
+AC_PROG_LIBTOOL
+
+# Check for cppunit
+AM_PATH_CPPUNIT([0.0.0],[with_unittests=yes],[
+ AC_WARN([cppunit not found. Unit tests will not compile and run.])
+ with_unittests=no
+])
+AM_CONDITIONAL([WITH_UNITTESTS], [test x"$with_unittests" = xyes])
+
+# Just for conscience' sake
+AC_CHECK_HEADERS([stdlib.h string.h sys/time.h])
+AC_HEADER_STDBOOL
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_CHECK_TYPES([ptrdiff_t])
+AC_FUNC_ERROR_AT_LINE
+AC_FUNC_MALLOC
+AC_CHECK_FUNCS([memset strchr])
+
+# Require little-endian platform
+AC_C_BIGENDIAN
+if test x"$ac_cv_c_bigendian" = xyes; then
+ AC_ERROR([pire has not been ported to big-endian platforms yet.])
+fi
+
+# Optional features
+AC_ARG_ENABLE([extra], AS_HELP_STRING([--enable-extra], [Add extra functionality (capturing scanner, etc...)]))
+AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Make Pire dump all constructed FSMs to std::clog (useless unless debugging Pire)]))
+AM_CONDITIONAL([ENABLE_EXTRA], [test x"$enable_extra" = xyes])
+AM_CONDITIONAL([ENABLE_DEBUG], [test x"$enable_debug" = xyes])
+
+AC_CONFIG_FILES([Makefile pire/Makefile tests/Makefile pkg/Makefile samples/Makefile samples/bench/Makefile])
+AC_OUTPUT
diff --git a/contrib/libs/pire/pire/Makefile.am b/contrib/libs/pire/pire/Makefile.am
index 09ef211704..f2d09a2fb7 100644
--- a/contrib/libs/pire/pire/Makefile.am
+++ b/contrib/libs/pire/pire/Makefile.am
@@ -1,121 +1,121 @@
-
-AM_CXXFLAGS = -Wall
-if ENABLE_DEBUG
-AM_CXXFLAGS += -DPIRE_DEBUG
-endif
-if ENABLE_CHECKED
-AM_CXXFLAGS += -DPIRE_CHECKED
-endif
-
-lib_LTLIBRARIES = libpire.la
-libpire_la_SOURCES = \
- align.h \
- any.h \
- classes.cpp \
- defs.h \
- determine.h \
- encoding.cpp \
- encoding.h \
- extra.h \
- fsm.cpp \
- fsm.h \
- fwd.h \
- glue.cpp \
- glue.h \
+
+AM_CXXFLAGS = -Wall
+if ENABLE_DEBUG
+AM_CXXFLAGS += -DPIRE_DEBUG
+endif
+if ENABLE_CHECKED
+AM_CXXFLAGS += -DPIRE_CHECKED
+endif
+
+lib_LTLIBRARIES = libpire.la
+libpire_la_SOURCES = \
+ align.h \
+ any.h \
+ classes.cpp \
+ defs.h \
+ determine.h \
+ encoding.cpp \
+ encoding.h \
+ extra.h \
+ fsm.cpp \
+ fsm.h \
+ fwd.h \
+ glue.cpp \
+ glue.h \
minimize.h \
half_final_fsm.cpp \
half_final_fsm.h \
- partition.h \
- pire.h \
- re_lexer.cpp \
- re_lexer.h \
- run.h \
- scanner_io.cpp \
- vbitset.h \
- re_parser.ypp \
+ partition.h \
+ pire.h \
+ re_lexer.cpp \
+ re_lexer.h \
+ run.h \
+ scanner_io.cpp \
+ vbitset.h \
+ re_parser.ypp \
scanners/half_final.h \
- scanners/loaded.h \
- scanners/multi.h \
- scanners/slow.h \
- scanners/simple.h \
- scanners/common.h \
- scanners/pair.h \
- stub/stl.h \
- stub/lexical_cast.h \
- stub/saveload.h \
- stub/singleton.h \
- stub/utf8.cpp \
- stub/utf8.h \
- stub/noncopyable.h \
- stub/codepage_h.h \
- stub/doccodes_h.h \
- stub/unidata_h.h \
- stub/unidata_cpp.h
-
-if ENABLE_EXTRA
-libpire_la_SOURCES += \
- extra/capture.cpp \
- extra/capture.h \
- extra/count.cpp \
- extra/count.h \
- extra/glyphs.cpp \
- extra/glyphs.h
-endif
-
-pire_hdrdir = $(includedir)/pire
-pire_hdr_HEADERS = \
- align.h \
- any.h \
- defs.h \
- determine.h \
- encoding.h \
- extra.h \
- fsm.h \
- fwd.h \
- glue.h \
+ scanners/loaded.h \
+ scanners/multi.h \
+ scanners/slow.h \
+ scanners/simple.h \
+ scanners/common.h \
+ scanners/pair.h \
+ stub/stl.h \
+ stub/lexical_cast.h \
+ stub/saveload.h \
+ stub/singleton.h \
+ stub/utf8.cpp \
+ stub/utf8.h \
+ stub/noncopyable.h \
+ stub/codepage_h.h \
+ stub/doccodes_h.h \
+ stub/unidata_h.h \
+ stub/unidata_cpp.h
+
+if ENABLE_EXTRA
+libpire_la_SOURCES += \
+ extra/capture.cpp \
+ extra/capture.h \
+ extra/count.cpp \
+ extra/count.h \
+ extra/glyphs.cpp \
+ extra/glyphs.h
+endif
+
+pire_hdrdir = $(includedir)/pire
+pire_hdr_HEADERS = \
+ align.h \
+ any.h \
+ defs.h \
+ determine.h \
+ encoding.h \
+ extra.h \
+ fsm.h \
+ fwd.h \
+ glue.h \
minimize.h \
half_final_fsm.h \
- partition.h \
- pire.h \
- re_lexer.h \
- re_parser.h \
- run.h \
- static_assert.h \
- vbitset.h
-
-if ENABLE_EXTRA
-pire_extradir = $(includedir)/pire/extra
-pire_extra_HEADERS = \
- extra/capture.h \
- extra/count.h \
- extra/glyphs.h
-endif
-
-pire_scannersdir = $(includedir)/pire/scanners
-pire_scanners_HEADERS = \
- scanners/common.h \
+ partition.h \
+ pire.h \
+ re_lexer.h \
+ re_parser.h \
+ run.h \
+ static_assert.h \
+ vbitset.h
+
+if ENABLE_EXTRA
+pire_extradir = $(includedir)/pire/extra
+pire_extra_HEADERS = \
+ extra/capture.h \
+ extra/count.h \
+ extra/glyphs.h
+endif
+
+pire_scannersdir = $(includedir)/pire/scanners
+pire_scanners_HEADERS = \
+ scanners/common.h \
scanners/half_final.h \
- scanners/multi.h \
- scanners/slow.h \
- scanners/simple.h \
- scanners/loaded.h \
- scanners/pair.h
-
-pire_stubdir = $(includedir)/pire/stub
-pire_stub_HEADERS = \
- stub/stl.h \
- stub/defaults.h \
- stub/singleton.h \
- stub/saveload.h \
- stub/lexical_cast.h
-
-bin_PROGRAMS = pire_inline
-
-pire_inline_SOURCES = inline.lpp stub/hacks.h stub/memstreams.h
-pire_inline_LDADD = libpire.la
-
-BUILT_SOURCES = re_parser.h re_parser.cpp
-CLEANFILES = re_parser.h re_parser.cpp
-
-AM_YFLAGS = -d
-
+ scanners/multi.h \
+ scanners/slow.h \
+ scanners/simple.h \
+ scanners/loaded.h \
+ scanners/pair.h
+
+pire_stubdir = $(includedir)/pire/stub
+pire_stub_HEADERS = \
+ stub/stl.h \
+ stub/defaults.h \
+ stub/singleton.h \
+ stub/saveload.h \
+ stub/lexical_cast.h
+
+bin_PROGRAMS = pire_inline
+
+pire_inline_SOURCES = inline.lpp stub/hacks.h stub/memstreams.h
+pire_inline_LDADD = libpire.la
+
+BUILT_SOURCES = re_parser.h re_parser.cpp
+CLEANFILES = re_parser.h re_parser.cpp
+
+AM_YFLAGS = -d
+
diff --git a/contrib/libs/pire/pire/align.h b/contrib/libs/pire/pire/align.h
index fea084b598..c1941b7120 100644
--- a/contrib/libs/pire/pire/align.h
+++ b/contrib/libs/pire/pire/align.h
@@ -1,103 +1,103 @@
-/*
- * align.h -- functions for positioning streams and memory pointers
- * to word boundaries
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * align.h -- functions for positioning streams and memory pointers
+ * to word boundaries
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_ALIGN_H
-#define PIRE_ALIGN_H
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_ALIGN_H
+#define PIRE_ALIGN_H
+
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/saveload.h>
-#include "platform.h"
-
-namespace Pire {
-
- namespace Impl {
-
- template<class T>
- inline T AlignUp(T t, size_t bound)
- {
- return (T) (((size_t) t + (bound-1)) & ~(bound-1));
- }
-
- template<class T>
- inline T AlignDown(T t, size_t bound)
- {
- return (T) ((size_t) t & ~(bound-1));
- }
-
- inline void AlignSave(yostream* s, size_t size)
- {
- size_t tail = AlignUp(size, sizeof(size_t)) - size;
- if (tail) {
- static const char buf[sizeof(MaxSizeWord)] = {0};
- SavePodArray(s, buf, tail);
- }
- }
-
- inline void AlignLoad(yistream* s, size_t size)
- {
- size_t tail = AlignUp(size, sizeof(size_t)) - size;
- if (tail) {
- char buf[sizeof(MaxSizeWord)];
- LoadPodArray(s, buf, tail);
- }
- }
-
- template<class T>
- inline void AlignedSaveArray(yostream* s, const T* array, size_t count)
- {
- SavePodArray(s, array, count);
- AlignSave(s, sizeof(*array) * count);
- }
-
- template<class T>
- inline void AlignedLoadArray(yistream* s, T* array, size_t count)
- {
- LoadPodArray(s, array, count);
- AlignLoad(s, sizeof(*array) * count);
- }
-
- template<class T>
- inline bool IsAligned(T t, size_t bound)
- {
- return ((size_t) t & (bound-1)) == 0;
- }
-
- inline const void* AlignPtr(const size_t*& p, size_t& size)
- {
- if (!IsAligned(p, sizeof(size_t))) {
- const size_t* next = AlignUp(p, sizeof(size_t));
- if (next > p+size)
- throw Error("EOF reached in NPire::Impl::align");
- size -= (next - p);
- p = next;
- }
- return (const void*) p;
- }
-
- }
-
-}
-
-#endif
+#include "platform.h"
+
+namespace Pire {
+
+ namespace Impl {
+
+ template<class T>
+ inline T AlignUp(T t, size_t bound)
+ {
+ return (T) (((size_t) t + (bound-1)) & ~(bound-1));
+ }
+
+ template<class T>
+ inline T AlignDown(T t, size_t bound)
+ {
+ return (T) ((size_t) t & ~(bound-1));
+ }
+
+ inline void AlignSave(yostream* s, size_t size)
+ {
+ size_t tail = AlignUp(size, sizeof(size_t)) - size;
+ if (tail) {
+ static const char buf[sizeof(MaxSizeWord)] = {0};
+ SavePodArray(s, buf, tail);
+ }
+ }
+
+ inline void AlignLoad(yistream* s, size_t size)
+ {
+ size_t tail = AlignUp(size, sizeof(size_t)) - size;
+ if (tail) {
+ char buf[sizeof(MaxSizeWord)];
+ LoadPodArray(s, buf, tail);
+ }
+ }
+
+ template<class T>
+ inline void AlignedSaveArray(yostream* s, const T* array, size_t count)
+ {
+ SavePodArray(s, array, count);
+ AlignSave(s, sizeof(*array) * count);
+ }
+
+ template<class T>
+ inline void AlignedLoadArray(yistream* s, T* array, size_t count)
+ {
+ LoadPodArray(s, array, count);
+ AlignLoad(s, sizeof(*array) * count);
+ }
+
+ template<class T>
+ inline bool IsAligned(T t, size_t bound)
+ {
+ return ((size_t) t & (bound-1)) == 0;
+ }
+
+ inline const void* AlignPtr(const size_t*& p, size_t& size)
+ {
+ if (!IsAligned(p, sizeof(size_t))) {
+ const size_t* next = AlignUp(p, sizeof(size_t));
+ if (next > p+size)
+ throw Error("EOF reached in NPire::Impl::align");
+ size -= (next - p);
+ p = next;
+ }
+ return (const void*) p;
+ }
+
+ }
+
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/any.h b/contrib/libs/pire/pire/any.h
index 4646d25781..f1c8ab9ab9 100644
--- a/contrib/libs/pire/pire/any.h
+++ b/contrib/libs/pire/pire/any.h
@@ -1,131 +1,131 @@
-/*
- * any.h -- a wrapper capable of holding a value of arbitrary type.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * any.h -- a wrapper capable of holding a value of arbitrary type.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_ANY_H
-#define PIRE_ANY_H
-
-
-#include <typeinfo>
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_ANY_H
+#define PIRE_ANY_H
+
+
+#include <typeinfo>
+
#include <contrib/libs/pire/pire/stub/stl.h>
-
-namespace Pire {
-
-class Any {
-
-public:
+
+namespace Pire {
+
+class Any {
+
+public:
Any() = default;
-
- Any(const Any& any)
- {
- if (any.h)
- h = any.h->Duplicate();
- }
-
- Any& operator= (Any any)
- {
- any.Swap(*this);
- return *this;
- }
-
- template <class T>
- Any(const T& t)
- : h(new Holder<T>(t))
- {
- }
-
- bool Empty() const {
- return !h;
- }
- template <class T>
- bool IsA() const {
- return h && h->IsA(typeid(T));
- }
-
- template <class T>
- T& As()
- {
- if (h && IsA<T>())
- return *reinterpret_cast<T*>(h->Ptr());
- else
- throw Pire::Error("type mismatch");
- }
-
- template <class T>
- const T& As() const
- {
- if (h && IsA<T>())
- return *reinterpret_cast<const T*>(h->Ptr());
- else
- throw Pire::Error("type mismatch");
- }
-
+
+ Any(const Any& any)
+ {
+ if (any.h)
+ h = any.h->Duplicate();
+ }
+
+ Any& operator= (Any any)
+ {
+ any.Swap(*this);
+ return *this;
+ }
+
+ template <class T>
+ Any(const T& t)
+ : h(new Holder<T>(t))
+ {
+ }
+
+ bool Empty() const {
+ return !h;
+ }
+ template <class T>
+ bool IsA() const {
+ return h && h->IsA(typeid(T));
+ }
+
+ template <class T>
+ T& As()
+ {
+ if (h && IsA<T>())
+ return *reinterpret_cast<T*>(h->Ptr());
+ else
+ throw Pire::Error("type mismatch");
+ }
+
+ template <class T>
+ const T& As() const
+ {
+ if (h && IsA<T>())
+ return *reinterpret_cast<const T*>(h->Ptr());
+ else
+ throw Pire::Error("type mismatch");
+ }
+
void Swap(Any& a) noexcept {
- DoSwap(h, a.h);
- }
-
-private:
-
- struct AbstractHolder {
- virtual ~AbstractHolder() {
- }
+ DoSwap(h, a.h);
+ }
+
+private:
+
+ struct AbstractHolder {
+ virtual ~AbstractHolder() {
+ }
virtual THolder<AbstractHolder> Duplicate() const = 0;
- virtual bool IsA(const std::type_info& id) const = 0;
- virtual void* Ptr() = 0;
- virtual const void* Ptr() const = 0;
- };
-
- template <class T>
- struct Holder: public AbstractHolder {
- Holder(T t)
- : d(t)
- {
- }
+ virtual bool IsA(const std::type_info& id) const = 0;
+ virtual void* Ptr() = 0;
+ virtual const void* Ptr() const = 0;
+ };
+
+ template <class T>
+ struct Holder: public AbstractHolder {
+ Holder(T t)
+ : d(t)
+ {
+ }
THolder<AbstractHolder> Duplicate() const {
return THolder<AbstractHolder>(new Holder<T>(d));
- }
- bool IsA(const std::type_info& id) const {
- return id == typeid(T);
- }
- void* Ptr() {
- return &d;
- }
- const void* Ptr() const {
- return &d;
- }
- private:
- T d;
- };
-
+ }
+ bool IsA(const std::type_info& id) const {
+ return id == typeid(T);
+ }
+ void* Ptr() {
+ return &d;
+ }
+ const void* Ptr() const {
+ return &d;
+ }
+ private:
+ T d;
+ };
+
THolder<AbstractHolder> h;
-};
-
-}
-
-namespace std {
- inline void swap(Pire::Any& a, Pire::Any& b) {
- a.Swap(b);
- }
-}
-
-#endif
+};
+
+}
+
+namespace std {
+ inline void swap(Pire::Any& a, Pire::Any& b) {
+ a.Swap(b);
+ }
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/classes.cpp b/contrib/libs/pire/pire/classes.cpp
index bbf021737d..d928d76866 100644
--- a/contrib/libs/pire/pire/classes.cpp
+++ b/contrib/libs/pire/pire/classes.cpp
@@ -1,152 +1,152 @@
-/*
- * classes.cpp -- implementation for Pire::CharClasses feature.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * classes.cpp -- implementation for Pire::CharClasses feature.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/singleton.h>
#include <contrib/libs/pire/pire/stub/noncopyable.h>
#include <contrib/libs/pire/pire/stub/utf8.h>
-#include "re_lexer.h"
-
-namespace Pire {
-
-namespace {
-
- class CharClassesTable: private NonCopyable {
- private:
- class CharClass {
- public:
- CharClass() {}
- explicit CharClass(wchar32 ch) { m_bounds.push_back(ymake_pair(ch, ch)); }
- CharClass(wchar32 lower, wchar32 upper) { m_bounds.push_back(ymake_pair(lower, upper)); }
-
- CharClass& operator |= (const CharClass& cc)
- {
- std::copy(cc.m_bounds.begin(), cc.m_bounds.end(), std::back_inserter(m_bounds));
- return *this;
- }
-
- CharClass operator | (const CharClass& cc) const
- {
- CharClass r(*this);
- r |= cc;
- return r;
- }
-
+#include "re_lexer.h"
+
+namespace Pire {
+
+namespace {
+
+ class CharClassesTable: private NonCopyable {
+ private:
+ class CharClass {
+ public:
+ CharClass() {}
+ explicit CharClass(wchar32 ch) { m_bounds.push_back(ymake_pair(ch, ch)); }
+ CharClass(wchar32 lower, wchar32 upper) { m_bounds.push_back(ymake_pair(lower, upper)); }
+
+ CharClass& operator |= (const CharClass& cc)
+ {
+ std::copy(cc.m_bounds.begin(), cc.m_bounds.end(), std::back_inserter(m_bounds));
+ return *this;
+ }
+
+ CharClass operator | (const CharClass& cc) const
+ {
+ CharClass r(*this);
+ r |= cc;
+ return r;
+ }
+
TSet<wchar32> ToSet() const
- {
+ {
TSet<wchar32> ret;
for (auto&& bound : m_bounds)
for (wchar32 c = bound.first; c <= bound.second; ++c)
- ret.insert(c);
- return ret;
- }
-
- private:
+ ret.insert(c);
+ return ret;
+ }
+
+ private:
TVector<ypair<wchar32, wchar32> > m_bounds;
- };
-
- public:
- bool Has(wchar32 wc) const
- {
- return (m_classes.find(to_lower(wc & ~ControlMask)) != m_classes.end());
- }
-
+ };
+
+ public:
+ bool Has(wchar32 wc) const
+ {
+ return (m_classes.find(to_lower(wc & ~ControlMask)) != m_classes.end());
+ }
+
TSet<wchar32> Get(wchar32 wc) const
- {
+ {
auto it = m_classes.find(to_lower(wc & ~ControlMask));
- if (it == m_classes.end())
- throw Error("Unknown character class");
- return it->second.ToSet();
- }
-
- CharClassesTable()
- {
- m_classes['l'] = CharClass('A', 'Z') | CharClass('a', 'z');
- m_classes['c']
- = CharClass(0x0410, 0x044F) // Russian capital A to Russan capital YA, Russian small A to Russian small YA
- | CharClass(0x0401) // Russian capital Yo
- | CharClass(0x0451) // Russian small Yo
- ;
-
- m_classes['w'] = m_classes['l'] | m_classes['c'];
- m_classes['d'] = CharClass('0', '9');
- m_classes['s']
- = CharClass(' ') | CharClass('\t') | CharClass('\r') | CharClass('\n')
- | CharClass(0x00A0) // Non-breaking space
- ;
-
- // A few special classes which do not have any negation
- m_classes['n'] = CharClass('\n');
- m_classes['r'] = CharClass('\r');
- m_classes['t'] = CharClass('\t');
- }
-
+ if (it == m_classes.end())
+ throw Error("Unknown character class");
+ return it->second.ToSet();
+ }
+
+ CharClassesTable()
+ {
+ m_classes['l'] = CharClass('A', 'Z') | CharClass('a', 'z');
+ m_classes['c']
+ = CharClass(0x0410, 0x044F) // Russian capital A to Russan capital YA, Russian small A to Russian small YA
+ | CharClass(0x0401) // Russian capital Yo
+ | CharClass(0x0451) // Russian small Yo
+ ;
+
+ m_classes['w'] = m_classes['l'] | m_classes['c'];
+ m_classes['d'] = CharClass('0', '9');
+ m_classes['s']
+ = CharClass(' ') | CharClass('\t') | CharClass('\r') | CharClass('\n')
+ | CharClass(0x00A0) // Non-breaking space
+ ;
+
+ // A few special classes which do not have any negation
+ m_classes['n'] = CharClass('\n');
+ m_classes['r'] = CharClass('\r');
+ m_classes['t'] = CharClass('\t');
+ }
+
TMap<wchar32, CharClass> m_classes;
- };
-
- class CharClassesImpl: public Feature {
- public:
- CharClassesImpl(): m_table(Singleton<CharClassesTable>()) {}
- int Priority() const { return 10; }
-
- void Alter(Term& t)
- {
- if (t.Value().IsA<Term::CharacterRange>()) {
- const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>();
- typedef Term::CharacterRange::first_type CharSet;
- const CharSet& old = range.first;
- CharSet altered;
- bool pos = false;
- bool neg = false;
+ };
+
+ class CharClassesImpl: public Feature {
+ public:
+ CharClassesImpl(): m_table(Singleton<CharClassesTable>()) {}
+ int Priority() const { return 10; }
+
+ void Alter(Term& t)
+ {
+ if (t.Value().IsA<Term::CharacterRange>()) {
+ const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>();
+ typedef Term::CharacterRange::first_type CharSet;
+ const CharSet& old = range.first;
+ CharSet altered;
+ bool pos = false;
+ bool neg = false;
for (auto&& i : old)
if (i.size() == 1 && (i[0] & ControlMask) == Control && m_table->Has(i[0])) {
if (is_upper(i[0] & ~ControlMask))
- neg = true;
- else
- pos = true;
-
+ neg = true;
+ else
+ pos = true;
+
TSet<wchar32> klass = m_table->Get(i[0]);
for (auto&& j : klass)
altered.insert(Term::String(1, j));
- } else
+ } else
altered.insert(i);
-
- if (neg && (pos || range.second))
- Error("Positive and negative character ranges mixed");
- t = Term(t.Type(), Term::CharacterRange(altered, neg || range.second));
- }
- }
-
- private:
- CharClassesTable* m_table;
- };
-
-}
-
-namespace Features {
+
+ if (neg && (pos || range.second))
+ Error("Positive and negative character ranges mixed");
+ t = Term(t.Type(), Term::CharacterRange(altered, neg || range.second));
+ }
+ }
+
+ private:
+ CharClassesTable* m_table;
+ };
+
+}
+
+namespace Features {
Feature::Ptr CharClasses() { return Feature::Ptr(new CharClassesImpl); }
-}
-
-}
-
+}
+
+}
+
diff --git a/contrib/libs/pire/pire/defs.h b/contrib/libs/pire/pire/defs.h
index 19d785d7d7..c1e7780ef9 100644
--- a/contrib/libs/pire/pire/defs.h
+++ b/contrib/libs/pire/pire/defs.h
@@ -1,112 +1,112 @@
-/*
- * defs.h -- common Pire definitions.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_DEFS_H
-#define PIRE_DEFS_H
-
-#ifndef PIRE_NO_CONFIG
-#include <pire/config.h>
-#endif
-#include <stdlib.h>
-
-#if defined(_MSC_VER)
-#define PIRE_HAVE_DECLSPEC_ALIGN
-#else
-#define PIRE_HAVE_ALIGNAS
-#endif
-
-#define PIRE_HAVE_LAMBDAS
-
-namespace Pire {
-
-#ifdef PIRE_DEBUG
-# define PIRE_IFDEBUG(x) x
-#else
-# define PIRE_IFDEBUG(x)
-#endif
-
-#ifdef PIRE_CHECKED
-# define PIRE_IF_CHECKED(e) e
-#else
-# define PIRE_IF_CHECKED(e)
-#endif
-
-
- typedef unsigned short Char;
-
- namespace SpecialChar {
- enum {
- Epsilon = 257,
- BeginMark = 258,
- EndMark = 259,
-
- // Actual size of input alphabet
- MaxCharUnaligned = 260,
-
- // Size of letter transition tables, must be a multiple of the machine word size
- MaxChar = (MaxCharUnaligned + (sizeof(void*)-1)) & ~(sizeof(void*)-1)
- };
- }
-
- using namespace SpecialChar;
-
- namespace Impl {
-#ifndef PIRE_WORDS_BIGENDIAN
- inline size_t ToLittleEndian(size_t val) { return val; }
-#else
- template<unsigned N>
- inline size_t SwapBytes(size_t val)
- {
- static const size_t Mask = (1 << (N/2)) - 1;
- return ((SwapBytes<N/2>(val) & Mask) << (N/2)) | SwapBytes<N/2>(val >> (N/2));
- }
-
- template<>
- inline size_t SwapBytes<8>(size_t val) { return val & 0xFF; }
-
- inline size_t ToLittleEndian(size_t val) { return SwapBytes<sizeof(val)*8>(val); }
-#endif
-
- struct Struct { void* p; };
- }
-}
-
-#ifndef PIRE_ALIGNED_DECL
-# if defined(PIRE_HAVE_ALIGNAS)
-# define PIRE_ALIGNED_DECL(x) alignas(::Pire::Impl::Struct) static const char x[]
-# elif defined(PIRE_HAVE_ATTR_ALIGNED)
-# define PIRE_ALIGNED_DECL(x) static const char x[] __attribute__((aligned(sizeof(void*))))
-# elif defined(PIRE_HAVE_DECLSPEC_ALIGN)
-# define PIRE_ALIGNED_DECL(x) __declspec(align(8)) static const char x[]
-# endif
-#endif
-
-#ifndef PIRE_LITERAL
-# if defined(PIRE_HAVE_LAMBDAS)
-# define PIRE_LITERAL(data) ([]() -> const char* { PIRE_ALIGNED_DECL(__pire_regexp__) = data; return __pire_regexp__; })()
-# elif defined(PIRE_HAVE_SCOPED_EXPR)
-# define PIRE_LITERAL(data) ({ PIRE_ALIGNED_DECL(__pire_regexp__) = data; __pire_regexp__; })
-# endif
-#endif
-
-#endif
+/*
+ * defs.h -- common Pire definitions.
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_DEFS_H
+#define PIRE_DEFS_H
+
+#ifndef PIRE_NO_CONFIG
+#include <pire/config.h>
+#endif
+#include <stdlib.h>
+
+#if defined(_MSC_VER)
+#define PIRE_HAVE_DECLSPEC_ALIGN
+#else
+#define PIRE_HAVE_ALIGNAS
+#endif
+
+#define PIRE_HAVE_LAMBDAS
+
+namespace Pire {
+
+#ifdef PIRE_DEBUG
+# define PIRE_IFDEBUG(x) x
+#else
+# define PIRE_IFDEBUG(x)
+#endif
+
+#ifdef PIRE_CHECKED
+# define PIRE_IF_CHECKED(e) e
+#else
+# define PIRE_IF_CHECKED(e)
+#endif
+
+
+ typedef unsigned short Char;
+
+ namespace SpecialChar {
+ enum {
+ Epsilon = 257,
+ BeginMark = 258,
+ EndMark = 259,
+
+ // Actual size of input alphabet
+ MaxCharUnaligned = 260,
+
+ // Size of letter transition tables, must be a multiple of the machine word size
+ MaxChar = (MaxCharUnaligned + (sizeof(void*)-1)) & ~(sizeof(void*)-1)
+ };
+ }
+
+ using namespace SpecialChar;
+
+ namespace Impl {
+#ifndef PIRE_WORDS_BIGENDIAN
+ inline size_t ToLittleEndian(size_t val) { return val; }
+#else
+ template<unsigned N>
+ inline size_t SwapBytes(size_t val)
+ {
+ static const size_t Mask = (1 << (N/2)) - 1;
+ return ((SwapBytes<N/2>(val) & Mask) << (N/2)) | SwapBytes<N/2>(val >> (N/2));
+ }
+
+ template<>
+ inline size_t SwapBytes<8>(size_t val) { return val & 0xFF; }
+
+ inline size_t ToLittleEndian(size_t val) { return SwapBytes<sizeof(val)*8>(val); }
+#endif
+
+ struct Struct { void* p; };
+ }
+}
+
+#ifndef PIRE_ALIGNED_DECL
+# if defined(PIRE_HAVE_ALIGNAS)
+# define PIRE_ALIGNED_DECL(x) alignas(::Pire::Impl::Struct) static const char x[]
+# elif defined(PIRE_HAVE_ATTR_ALIGNED)
+# define PIRE_ALIGNED_DECL(x) static const char x[] __attribute__((aligned(sizeof(void*))))
+# elif defined(PIRE_HAVE_DECLSPEC_ALIGN)
+# define PIRE_ALIGNED_DECL(x) __declspec(align(8)) static const char x[]
+# endif
+#endif
+
+#ifndef PIRE_LITERAL
+# if defined(PIRE_HAVE_LAMBDAS)
+# define PIRE_LITERAL(data) ([]() -> const char* { PIRE_ALIGNED_DECL(__pire_regexp__) = data; return __pire_regexp__; })()
+# elif defined(PIRE_HAVE_SCOPED_EXPR)
+# define PIRE_LITERAL(data) ({ PIRE_ALIGNED_DECL(__pire_regexp__) = data; __pire_regexp__; })
+# endif
+#endif
+
+#endif
diff --git a/contrib/libs/pire/pire/determine.h b/contrib/libs/pire/pire/determine.h
index fb48fdd0b3..ddadfa1c75 100644
--- a/contrib/libs/pire/pire/determine.h
+++ b/contrib/libs/pire/pire/determine.h
@@ -1,145 +1,145 @@
-/*
- * determine.h -- the FSM determination routine.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * determine.h -- the FSM determination routine.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_DETERMINE_H
-#define PIRE_DETERMINE_H
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_DETERMINE_H
+#define PIRE_DETERMINE_H
+
#include <contrib/libs/pire/pire/stub/stl.h>
-#include "partition.h"
-
-namespace Pire {
- namespace Impl {
-
- /**
- * An interface of a determination task.
- * You don't have to derive from this class; it is just a start point template.
- */
- class DetermineTask {
- private:
- struct ImplementationSpecific1;
- struct ImplementationSpecific2;
-
- public:
- /// A type representing a new state (may be a set of old states, a pair of them, etc...)
- typedef ImplementationSpecific1 State;
-
- /// A type of letter equivalence classes table.
- typedef Partition<char, ImplementationSpecific2> LettersTbl;
-
- /// A container used for storing map of states to thier indices.
+#include "partition.h"
+
+namespace Pire {
+ namespace Impl {
+
+ /**
+ * An interface of a determination task.
+ * You don't have to derive from this class; it is just a start point template.
+ */
+ class DetermineTask {
+ private:
+ struct ImplementationSpecific1;
+ struct ImplementationSpecific2;
+
+ public:
+ /// A type representing a new state (may be a set of old states, a pair of them, etc...)
+ typedef ImplementationSpecific1 State;
+
+ /// A type of letter equivalence classes table.
+ typedef Partition<char, ImplementationSpecific2> LettersTbl;
+
+ /// A container used for storing map of states to thier indices.
typedef TMap<State, size_t> InvStates;
-
- /// Should return used letters' partition.
- const LettersTbl& Letters() const;
-
- /// Should return initial state (surprise!)
- State Initial() const;
-
- /// Should calculate next state, given the current state and a letter.
- State Next(State state, Char letter) const;
-
- /// Should return true iff the state need to be processed.
- bool IsRequired(const State& /*state*/) const { return true; }
-
- /// Called when the set of new states is closed.
+
+ /// Should return used letters' partition.
+ const LettersTbl& Letters() const;
+
+ /// Should return initial state (surprise!)
+ State Initial() const;
+
+ /// Should calculate next state, given the current state and a letter.
+ State Next(State state, Char letter) const;
+
+ /// Should return true iff the state need to be processed.
+ bool IsRequired(const State& /*state*/) const { return true; }
+
+ /// Called when the set of new states is closed.
void AcceptStates(const TVector<State>& newstates);
-
- /// Called for each transition from one new state to another.
- void Connect(size_t from, size_t to, Char letter);
-
- typedef bool Result;
- Result Success() { return true; }
- Result Failure() { return false; }
- };
-
- /**
- * A helper function for FSM determining and all determine-like algorithms
- * like scanners' agglutination.
- *
- * Given an indirectly specified automaton (through Task::Initial() and Task::Next()
- * functions, see above), performs a breadth-first traversal, finding and enumerating
- * all effectively reachable states. Then passes all found states and transitions
- * between them back to the task.
- *
- * Initial state is always placed at zero position.
- *
- * Please note that the function does not take care of any payload (including final flags);
- * it is the task's responsibility to agglutinate them properly.
- *
- * Returns task.Succeed() if everything was done; task.Failure() if maximum limit of state count was reached.
- */
- template<class Task>
- typename Task::Result Determine(Task& task, size_t maxSize)
- {
- typedef typename Task::State State;
- typedef typename Task::InvStates InvStates;
+
+ /// Called for each transition from one new state to another.
+ void Connect(size_t from, size_t to, Char letter);
+
+ typedef bool Result;
+ Result Success() { return true; }
+ Result Failure() { return false; }
+ };
+
+ /**
+ * A helper function for FSM determining and all determine-like algorithms
+ * like scanners' agglutination.
+ *
+ * Given an indirectly specified automaton (through Task::Initial() and Task::Next()
+ * functions, see above), performs a breadth-first traversal, finding and enumerating
+ * all effectively reachable states. Then passes all found states and transitions
+ * between them back to the task.
+ *
+ * Initial state is always placed at zero position.
+ *
+ * Please note that the function does not take care of any payload (including final flags);
+ * it is the task's responsibility to agglutinate them properly.
+ *
+ * Returns task.Succeed() if everything was done; task.Failure() if maximum limit of state count was reached.
+ */
+ template<class Task>
+ typename Task::Result Determine(Task& task, size_t maxSize)
+ {
+ typedef typename Task::State State;
+ typedef typename Task::InvStates InvStates;
typedef TDeque< TVector<size_t> > TransitionTable;
-
+
TVector<State> states;
- InvStates invstates;
- TransitionTable transitions;
+ InvStates invstates;
+ TransitionTable transitions;
TVector<size_t> stateIndices;
-
- states.push_back(task.Initial());
- invstates.insert(typename InvStates::value_type(states[0], 0));
-
- for (size_t stateIdx = 0; stateIdx < states.size(); ++stateIdx) {
- if (!task.IsRequired(states[stateIdx]))
- continue;
- TransitionTable::value_type row(task.Letters().Size());
+
+ states.push_back(task.Initial());
+ invstates.insert(typename InvStates::value_type(states[0], 0));
+
+ for (size_t stateIdx = 0; stateIdx < states.size(); ++stateIdx) {
+ if (!task.IsRequired(states[stateIdx]))
+ continue;
+ TransitionTable::value_type row(task.Letters().Size());
for (auto&& letter : task.Letters()) {
State newState = task.Next(states[stateIdx], letter.first);
auto i = invstates.find(newState);
- if (i == invstates.end()) {
- if (!maxSize--)
- return task.Failure();
- i = invstates.insert(typename InvStates::value_type(newState, states.size())).first;
- states.push_back(newState);
- }
+ if (i == invstates.end()) {
+ if (!maxSize--)
+ return task.Failure();
+ i = invstates.insert(typename InvStates::value_type(newState, states.size())).first;
+ states.push_back(newState);
+ }
row[letter.second.first] = i->second;
- }
- transitions.push_back(row);
- stateIndices.push_back(stateIdx);
- }
-
+ }
+ transitions.push_back(row);
+ stateIndices.push_back(stateIdx);
+ }
+
TVector<Char> invletters(task.Letters().Size());
for (auto&& letter : task.Letters())
invletters[letter.second.first] = letter.first;
-
- task.AcceptStates(states);
- size_t from = 0;
- for (TransitionTable::iterator i = transitions.begin(), ie = transitions.end(); i != ie; ++i, ++from) {
+
+ task.AcceptStates(states);
+ size_t from = 0;
+ for (TransitionTable::iterator i = transitions.begin(), ie = transitions.end(); i != ie; ++i, ++from) {
TVector<Char>::iterator l = invletters.begin();
- for (TransitionTable::value_type::iterator j = i->begin(), je = i->end(); j != je; ++j, ++l)
- task.Connect(stateIndices[from], *j, *l);
- }
- return task.Success();
- }
+ for (TransitionTable::value_type::iterator j = i->begin(), je = i->end(); j != je; ++j, ++l)
+ task.Connect(stateIndices[from], *j, *l);
+ }
+ return task.Success();
+ }
// Faster transition table representation for determined FSM
typedef TVector<size_t> DeterminedTransitions;
- }
-}
-
-#endif
+ }
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/easy.cpp b/contrib/libs/pire/pire/easy.cpp
index bcb56c693b..61e4384fab 100644
--- a/contrib/libs/pire/pire/easy.cpp
+++ b/contrib/libs/pire/pire/easy.cpp
@@ -1,33 +1,33 @@
-/*
- * easy.cpp -- static variables for Pire Easy facilities.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * easy.cpp -- static variables for Pire Easy facilities.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#include "easy.h"
-
-namespace Pire {
-
-const Option<const Encoding&> UTF8(&Pire::Encodings::Utf8);
-const Option<const Encoding&> LATIN1(&Pire::Encodings::Latin1);
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#include "easy.h"
+
+namespace Pire {
+
+const Option<const Encoding&> UTF8(&Pire::Encodings::Utf8);
+const Option<const Encoding&> LATIN1(&Pire::Encodings::Latin1);
+
const Option<Feature::Ptr> I(&Pire::Features::CaseInsensitive);
const Option<Feature::Ptr> ANDNOT(&Pire::Features::AndNotSupport);
-
-}
+
+}
diff --git a/contrib/libs/pire/pire/easy.h b/contrib/libs/pire/pire/easy.h
index c70e965353..a784252c5f 100644
--- a/contrib/libs/pire/pire/easy.h
+++ b/contrib/libs/pire/pire/easy.h
@@ -1,249 +1,249 @@
-/*
- * easy.h -- Pire Easy facilities.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * easy.h -- Pire Easy facilities.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-/**
- * For those who never reads documentation, does not need any mysterious features
- * there is a fast and easy way to start using Pire.
- *
- * Just type:
- *
- * Pire::Regexp sc("pattern of (my regexp)*", Pire::UTF8 | Pire::I);
- * if (sc.Matches("pattern of my regexp"))
- * std::cout << "Hooray!" << std::endl;
- *
- * Or, to go more crazy:
- *
- * if ("pattern of my regexp" ==~ sc)
- * std::cout << "What a perversion..." << std::endl;
- *
- * Scanner's constructor takes a pattern and a "bitwise ORed" combination of "flags".
- * Available "flags" are:
- * I - case insensitivity;
- * ANDNOT - support for additional operations (& and ~) inside the pattern;
- * UTF8 - treat pattern input sequence as UTF-8 (surprise!)
- * LATIN1 - guess what?
- *
- * (In fact, those are not "flags" and not "bitwise ORed". See code for details.)
- */
-
-#ifndef PIRE_EASY_H_INCLUDED
-#define PIRE_EASY_H_INCLUDED
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+/**
+ * For those who never reads documentation, does not need any mysterious features
+ * there is a fast and easy way to start using Pire.
+ *
+ * Just type:
+ *
+ * Pire::Regexp sc("pattern of (my regexp)*", Pire::UTF8 | Pire::I);
+ * if (sc.Matches("pattern of my regexp"))
+ * std::cout << "Hooray!" << std::endl;
+ *
+ * Or, to go more crazy:
+ *
+ * if ("pattern of my regexp" ==~ sc)
+ * std::cout << "What a perversion..." << std::endl;
+ *
+ * Scanner's constructor takes a pattern and a "bitwise ORed" combination of "flags".
+ * Available "flags" are:
+ * I - case insensitivity;
+ * ANDNOT - support for additional operations (& and ~) inside the pattern;
+ * UTF8 - treat pattern input sequence as UTF-8 (surprise!)
+ * LATIN1 - guess what?
+ *
+ * (In fact, those are not "flags" and not "bitwise ORed". See code for details.)
+ */
+
+#ifndef PIRE_EASY_H_INCLUDED
+#define PIRE_EASY_H_INCLUDED
+
#include <iterator>
#include <contrib/libs/pire/pire/stub/stl.h>
-#include "pire.h"
-#include "vbitset.h"
-
-namespace Pire {
-
-template<class Arg> class Option;
-
-class Options {
-public:
- Options(): m_encoding(&Pire::Encodings::Latin1()) {}
- ~Options() { Clear(); }
-
- void Add(const Pire::Encoding& encoding) { m_encoding = &encoding; }
+#include "pire.h"
+#include "vbitset.h"
+
+namespace Pire {
+
+template<class Arg> class Option;
+
+class Options {
+public:
+ Options(): m_encoding(&Pire::Encodings::Latin1()) {}
+ ~Options() { Clear(); }
+
+ void Add(const Pire::Encoding& encoding) { m_encoding = &encoding; }
void Add(Feature::Ptr&& feature) { m_features.push_back(std::move(feature)); }
-
- struct Proxy {
- Options* const o;
- /*implicit*/ Proxy(Options* opts): o(opts) {}
- };
- operator Proxy() { return Proxy(this); }
-
- Options(Options& o): m_encoding(o.m_encoding) { m_features.swap(o.m_features); }
+
+ struct Proxy {
+ Options* const o;
+ /*implicit*/ Proxy(Options* opts): o(opts) {}
+ };
+ operator Proxy() { return Proxy(this); }
+
+ Options(Options& o): m_encoding(o.m_encoding) { m_features.swap(o.m_features); }
Options& operator = (Options& o) { m_encoding = o.m_encoding; m_features = std::move(o.m_features); o.Clear(); return *this; }
-
- Options(Proxy p): m_encoding(p.o->m_encoding) { m_features.swap(p.o->m_features); }
+
+ Options(Proxy p): m_encoding(p.o->m_encoding) { m_features.swap(p.o->m_features); }
Options& operator = (Proxy p) { m_encoding = p.o->m_encoding; m_features = std::move(p.o->m_features); p.o->Clear(); return *this; }
-
- void Apply(Lexer& lexer)
- {
- lexer.SetEncoding(*m_encoding);
+
+ void Apply(Lexer& lexer)
+ {
+ lexer.SetEncoding(*m_encoding);
for (auto&& i : m_features) {
lexer.AddFeature(i);
i = 0;
- }
- m_features.clear();
- }
-
- template<class ArgT>
- /*implicit*/ Options(const Option<ArgT>& option);
-
- const Pire::Encoding& Encoding() const { return *m_encoding; }
-
-private:
- const Pire::Encoding* m_encoding;
+ }
+ m_features.clear();
+ }
+
+ template<class ArgT>
+ /*implicit*/ Options(const Option<ArgT>& option);
+
+ const Pire::Encoding& Encoding() const { return *m_encoding; }
+
+private:
+ const Pire::Encoding* m_encoding;
TVector<Feature::Ptr> m_features;
-
- void Clear()
- {
- m_features.clear();
- }
-};
-
-template<class Arg>
-class Option {
-public:
- typedef Arg (*Ctor)();
-
- Option(Ctor ctor): m_ctor(ctor) {}
-
- friend Options operator | (Options::Proxy options, const Option<Arg>& self)
- {
- Options ret(options);
- ret.Add((*self.m_ctor)());
- return ret;
- }
-
- template<class Arg2>
- friend Options operator | (const Option<Arg2>& a, const Option<Arg>& b)
- {
- return Options() | a | b;
- }
-
-private:
- Ctor m_ctor;
-};
-
-
-extern const Option<const Encoding&> UTF8;
-extern const Option<const Encoding&> LATIN1;
-
+
+ void Clear()
+ {
+ m_features.clear();
+ }
+};
+
+template<class Arg>
+class Option {
+public:
+ typedef Arg (*Ctor)();
+
+ Option(Ctor ctor): m_ctor(ctor) {}
+
+ friend Options operator | (Options::Proxy options, const Option<Arg>& self)
+ {
+ Options ret(options);
+ ret.Add((*self.m_ctor)());
+ return ret;
+ }
+
+ template<class Arg2>
+ friend Options operator | (const Option<Arg2>& a, const Option<Arg>& b)
+ {
+ return Options() | a | b;
+ }
+
+private:
+ Ctor m_ctor;
+};
+
+
+extern const Option<const Encoding&> UTF8;
+extern const Option<const Encoding&> LATIN1;
+
extern const Option<Feature::Ptr> I;
extern const Option<Feature::Ptr> ANDNOT;
-
-
-class Regexp {
-public:
- template<class Pattern>
- explicit Regexp(Pattern pattern, Options options = Options())
- {
- Init(PatternBounds(pattern), options);
- }
-
- template<class Pattern, class Arg>
- Regexp(Pattern pattern, Option<Arg> option)
- {
- Init(PatternBounds(pattern), Options() | option);
- }
-
- explicit Regexp(Scanner sc): m_scanner(sc) {}
- explicit Regexp(SlowScanner ssc): m_slow(ssc) {}
-
+
+
+class Regexp {
+public:
+ template<class Pattern>
+ explicit Regexp(Pattern pattern, Options options = Options())
+ {
+ Init(PatternBounds(pattern), options);
+ }
+
+ template<class Pattern, class Arg>
+ Regexp(Pattern pattern, Option<Arg> option)
+ {
+ Init(PatternBounds(pattern), Options() | option);
+ }
+
+ explicit Regexp(Scanner sc): m_scanner(sc) {}
+ explicit Regexp(SlowScanner ssc): m_slow(ssc) {}
+
bool Matches(TStringBuf buf) const
- {
- if (!m_scanner.Empty())
+ {
+ if (!m_scanner.Empty())
return Runner(m_scanner).Begin().Run(buf).End();
- else
+ else
return Runner(m_slow).Begin().Run(buf).End();
- }
+ }
bool Matches(const char* begin, const char* end) const
- {
+ {
return Matches(TStringBuf(begin, end));
- }
-
- /// A helper class allowing '==~' operator for regexps
- class MatchProxy {
- public:
- MatchProxy(const Regexp& re): m_re(&re) {}
- friend bool operator == (const char* str, const MatchProxy& re) { return re.m_re->Matches(str); }
- friend bool operator == (const ystring& str, const MatchProxy& re) { return re.m_re->Matches(str); }
-
- private:
- const Regexp* m_re;
- };
- MatchProxy operator ~() const { return MatchProxy(*this); }
-
-private:
- Scanner m_scanner;
- SlowScanner m_slow;
-
- ypair<const char*, const char*> PatternBounds(const ystring& pattern)
- {
- static const char c = 0;
- return pattern.empty() ? ymake_pair(&c, &c) : ymake_pair(pattern.c_str(), pattern.c_str() + pattern.size());
- }
-
- ypair<const char*, const char*> PatternBounds(const char* pattern)
- {
- return ymake_pair(pattern, pattern + strlen(pattern));
- }
-
- void Init(ypair<const char*, const char*> rawPattern, Options options)
- {
+ }
+
+ /// A helper class allowing '==~' operator for regexps
+ class MatchProxy {
+ public:
+ MatchProxy(const Regexp& re): m_re(&re) {}
+ friend bool operator == (const char* str, const MatchProxy& re) { return re.m_re->Matches(str); }
+ friend bool operator == (const ystring& str, const MatchProxy& re) { return re.m_re->Matches(str); }
+
+ private:
+ const Regexp* m_re;
+ };
+ MatchProxy operator ~() const { return MatchProxy(*this); }
+
+private:
+ Scanner m_scanner;
+ SlowScanner m_slow;
+
+ ypair<const char*, const char*> PatternBounds(const ystring& pattern)
+ {
+ static const char c = 0;
+ return pattern.empty() ? ymake_pair(&c, &c) : ymake_pair(pattern.c_str(), pattern.c_str() + pattern.size());
+ }
+
+ ypair<const char*, const char*> PatternBounds(const char* pattern)
+ {
+ return ymake_pair(pattern, pattern + strlen(pattern));
+ }
+
+ void Init(ypair<const char*, const char*> rawPattern, Options options)
+ {
TVector<wchar32> pattern;
- options.Encoding().FromLocal(rawPattern.first, rawPattern.second, std::back_inserter(pattern));
-
- Lexer lexer(pattern);
- options.Apply(lexer);
- Fsm fsm = lexer.Parse();
-
- if (!BeginsWithCircumflex(fsm))
- fsm.PrependAnything();
- fsm.AppendAnything();
-
- if (fsm.Determine())
- m_scanner = fsm.Compile<Scanner>();
- else
- m_slow = fsm.Compile<SlowScanner>();
- }
-
- static bool BeginsWithCircumflex(const Fsm& fsm)
- {
- typedef Fsm::StatesSet Set;
+ options.Encoding().FromLocal(rawPattern.first, rawPattern.second, std::back_inserter(pattern));
+
+ Lexer lexer(pattern);
+ options.Apply(lexer);
+ Fsm fsm = lexer.Parse();
+
+ if (!BeginsWithCircumflex(fsm))
+ fsm.PrependAnything();
+ fsm.AppendAnything();
+
+ if (fsm.Determine())
+ m_scanner = fsm.Compile<Scanner>();
+ else
+ m_slow = fsm.Compile<SlowScanner>();
+ }
+
+ static bool BeginsWithCircumflex(const Fsm& fsm)
+ {
+ typedef Fsm::StatesSet Set;
TDeque<size_t> queue;
- BitSet handled(fsm.Size());
-
- queue.push_back(fsm.Initial());
- handled.Set(fsm.Initial());
-
- while (!queue.empty()) {
- Set s = fsm.Destinations(queue.front(), SpecialChar::Epsilon);
+ BitSet handled(fsm.Size());
+
+ queue.push_back(fsm.Initial());
+ handled.Set(fsm.Initial());
+
+ while (!queue.empty()) {
+ Set s = fsm.Destinations(queue.front(), SpecialChar::Epsilon);
for (auto&& i : s) {
if (!handled.Test(i)) {
handled.Set(i);
queue.push_back(i);
- }
- }
-
+ }
+ }
+
TSet<Char> lets = fsm.OutgoingLetters(queue.front());
- lets.erase(SpecialChar::Epsilon);
- lets.erase(SpecialChar::BeginMark);
- if (!lets.empty())
- return false;
-
- queue.pop_front();
- }
-
- return true;
- }
-};
-
-};
-
-#endif
+ lets.erase(SpecialChar::Epsilon);
+ lets.erase(SpecialChar::BeginMark);
+ if (!lets.empty())
+ return false;
+
+ queue.pop_front();
+ }
+
+ return true;
+ }
+};
+
+};
+
+#endif
diff --git a/contrib/libs/pire/pire/encoding.cpp b/contrib/libs/pire/pire/encoding.cpp
index 842e2b534d..37ea1225bb 100644
--- a/contrib/libs/pire/pire/encoding.cpp
+++ b/contrib/libs/pire/pire/encoding.cpp
@@ -1,134 +1,134 @@
-/*
- * encoding.cpp -- implementation of the encodings shipped with Pire.
+/*
+ * encoding.cpp -- implementation of the encodings shipped with Pire.
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <stdexcept>
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <stdexcept>
#include <util/charset/utf8.h>
-#include <utility>
+#include <utility>
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/stub/utf8.h>
#include <contrib/libs/pire/pire/stub/singleton.h>
-#include "encoding.h"
-#include "fsm.h"
-
-
-namespace Pire {
-
-namespace {
-
- class Latin1: public Encoding {
- public:
+#include "encoding.h"
+#include "fsm.h"
+
+
+namespace Pire {
+
+namespace {
+
+ class Latin1: public Encoding {
+ public:
Latin1() : Encoding() {}
- wchar32 FromLocal(const char*& begin, const char* end) const
- {
- if (begin == end)
- throw Error("EOF reached in Pire::Latin1::fromLocal()");
- else if (static_cast<unsigned char>(*begin) >= 0x80)
- throw Error("Pire::Latin1::fromLocal(): wrong character encountered (>=0x80)");
- else
- return (wchar32) *begin++;
- }
-
- ystring ToLocal(wchar32 ch) const
- {
- if (ch < 0x80)
- return ystring(1, (char) ch);
- else
- return ystring();
- }
-
- void AppendDot(Fsm& fsm) const { fsm.AppendDot(); }
- };
-
- namespace UtfRanges {
-
- static const size_t MaxLen = 4;
+ wchar32 FromLocal(const char*& begin, const char* end) const
+ {
+ if (begin == end)
+ throw Error("EOF reached in Pire::Latin1::fromLocal()");
+ else if (static_cast<unsigned char>(*begin) >= 0x80)
+ throw Error("Pire::Latin1::fromLocal(): wrong character encountered (>=0x80)");
+ else
+ return (wchar32) *begin++;
+ }
+
+ ystring ToLocal(wchar32 ch) const
+ {
+ if (ch < 0x80)
+ return ystring(1, (char) ch);
+ else
+ return ystring();
+ }
+
+ void AppendDot(Fsm& fsm) const { fsm.AppendDot(); }
+ };
+
+ namespace UtfRanges {
+
+ static const size_t MaxLen = 4;
static const size_t First[MaxLen][2] = {
{0x00, 0x80},
{0xC0, 0xE0},
{0xE0, 0xF0},
{0xF0, 0xF8}
- };
+ };
static const size_t Next[2] = {0x80, 0xC0};
- }
+ }
+
-
- class Utf8: public Encoding {
- public:
+ class Utf8: public Encoding {
+ public:
Utf8() : Encoding() {}
- wchar32 FromLocal(const char*& begin, const char* end) const
- {
- wchar32 rune;
- size_t len;
+ wchar32 FromLocal(const char*& begin, const char* end) const
+ {
+ wchar32 rune;
+ size_t len;
if (SafeReadUTF8Char(rune, len, reinterpret_cast<const unsigned char*>(begin), reinterpret_cast<const unsigned char*>(end)) != RECODE_OK)
- throw Error("Error reading UTF8 sequence");
- begin += len;
- return rune;
- }
-
- ystring ToLocal(wchar32 c) const
- {
+ throw Error("Error reading UTF8 sequence");
+ begin += len;
+ return rune;
+ }
+
+ ystring ToLocal(wchar32 c) const
+ {
ystring ret(UTF8RuneLenByUCS(c), ' ');
- size_t len;
- unsigned char* p = (unsigned char*) &*ret.begin();
+ size_t len;
+ unsigned char* p = (unsigned char*) &*ret.begin();
if (SafeWriteUTF8Char(c, len, p, p + ret.size()) != RECODE_OK)
Y_ASSERT(!"Pire::UTF8::toLocal(): Internal error");
- return ret;
- }
-
- void AppendDot(Fsm& fsm) const
- {
- size_t last = fsm.Resize(fsm.Size() + UtfRanges::MaxLen);
- for (size_t i = 0; i < UtfRanges::MaxLen; ++i)
+ return ret;
+ }
+
+ void AppendDot(Fsm& fsm) const
+ {
+ size_t last = fsm.Resize(fsm.Size() + UtfRanges::MaxLen);
+ for (size_t i = 0; i < UtfRanges::MaxLen; ++i)
for (size_t letter = UtfRanges::First[i][0]; letter < UtfRanges::First[i][1]; ++letter)
- fsm.ConnectFinal(fsm.Size() - i - 1, letter);
- for (size_t i = 0; i < UtfRanges::MaxLen - 1; ++i)
+ fsm.ConnectFinal(fsm.Size() - i - 1, letter);
+ for (size_t i = 0; i < UtfRanges::MaxLen - 1; ++i)
for (size_t letter = UtfRanges::Next[0]; letter < UtfRanges::Next[1]; ++letter)
- fsm.Connect(last + i, last + i + 1, letter);
- fsm.ClearFinal();
- fsm.SetFinal(fsm.Size() - 1, true);
- fsm.SetIsDetermined(false);
- }
- };
-}
-
-namespace Encodings {
-
+ fsm.Connect(last + i, last + i + 1, letter);
+ fsm.ClearFinal();
+ fsm.SetFinal(fsm.Size() - 1, true);
+ fsm.SetIsDetermined(false);
+ }
+ };
+}
+
+namespace Encodings {
+
const Encoding& Utf8()
{
static const Pire::Utf8 utf8;
return utf8;
}
-
+
const Encoding& Latin1()
{
static const Pire::Latin1 latin1;
return latin1;
}
-}
-
-}
+}
+
+}
diff --git a/contrib/libs/pire/pire/encoding.h b/contrib/libs/pire/pire/encoding.h
index b2c8bb9b41..b4117afa45 100644
--- a/contrib/libs/pire/pire/encoding.h
+++ b/contrib/libs/pire/pire/encoding.h
@@ -1,71 +1,71 @@
-/*
- * encoding.h -- the interface of Encoding.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * encoding.h -- the interface of Encoding.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_ENCODING_H
-#define PIRE_ENCODING_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_ENCODING_H
+#define PIRE_ENCODING_H
+
+
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/stub/stl.h>
-
-namespace Pire {
-
-class Fsm;
-
-class Encoding {
-public:
- virtual ~Encoding() {}
-
- /// Should read bytes from @p begin and return the corresponding Unicode
- /// character, advancing @p begin.
- virtual wchar32 FromLocal(const char*& begin, const char* end) const = 0;
-
- /// Opposite to FromLocal(), transforms given Unicode character into
- /// the string in the encoding.
- virtual ystring ToLocal(wchar32 c) const = 0;
-
- /// Given the FSM, should append the representation of a dot in the ecoding
- /// to that FSM.
- virtual void AppendDot(Fsm&) const = 0;
-
- template<class OutputIter>
- OutputIter FromLocal(const char* begin, const char* end, OutputIter iter) const
- {
- while (begin != end) {
- *iter = FromLocal(begin, end);
- ++iter;
- }
- return iter;
- }
-};
-
-namespace Encodings {
- const Encoding& Latin1();
- const Encoding& Utf8();
-
-};
-
-
-};
-
-#endif
+
+namespace Pire {
+
+class Fsm;
+
+class Encoding {
+public:
+ virtual ~Encoding() {}
+
+ /// Should read bytes from @p begin and return the corresponding Unicode
+ /// character, advancing @p begin.
+ virtual wchar32 FromLocal(const char*& begin, const char* end) const = 0;
+
+ /// Opposite to FromLocal(), transforms given Unicode character into
+ /// the string in the encoding.
+ virtual ystring ToLocal(wchar32 c) const = 0;
+
+ /// Given the FSM, should append the representation of a dot in the ecoding
+ /// to that FSM.
+ virtual void AppendDot(Fsm&) const = 0;
+
+ template<class OutputIter>
+ OutputIter FromLocal(const char* begin, const char* end, OutputIter iter) const
+ {
+ while (begin != end) {
+ *iter = FromLocal(begin, end);
+ ++iter;
+ }
+ return iter;
+ }
+};
+
+namespace Encodings {
+ const Encoding& Latin1();
+ const Encoding& Utf8();
+
+};
+
+
+};
+
+#endif
diff --git a/contrib/libs/pire/pire/extra.h b/contrib/libs/pire/pire/extra.h
index 2e4358acdd..373607838d 100644
--- a/contrib/libs/pire/pire/extra.h
+++ b/contrib/libs/pire/pire/extra.h
@@ -1,33 +1,33 @@
-/*
- * extra.h -- a single include file, which enables additional features,
- * unnecessary for major part of users.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * extra.h -- a single include file, which enables additional features,
+ * unnecessary for major part of users.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_EXTRA_H
-#define PIRE_EXTRA_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_EXTRA_H
+#define PIRE_EXTRA_H
+
+
#include <contrib/libs/pire/pire/extra/capture.h>
#include <contrib/libs/pire/pire/extra/count.h>
#include <contrib/libs/pire/pire/extra/glyphs.h>
-
-#endif
+
+#endif
diff --git a/contrib/libs/pire/pire/extra/capture.cpp b/contrib/libs/pire/pire/extra/capture.cpp
index fb4cdf6d81..ea9e287f00 100644
--- a/contrib/libs/pire/pire/extra/capture.cpp
+++ b/contrib/libs/pire/pire/extra/capture.cpp
@@ -1,48 +1,48 @@
-/*
- * capture.cpp -- a helper for compiling CapturingScanner
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * capture.cpp -- a helper for compiling CapturingScanner
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <stdexcept>
-
-#include "capture.h"
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <stdexcept>
-namespace Pire {
-
-namespace {
- class CaptureImpl: public Feature {
- public:
- CaptureImpl(size_t pos)
- : State(0)
- , Pos(pos)
- , Level(0)
+#include "capture.h"
+
+namespace Pire {
+
+namespace {
+ class CaptureImpl: public Feature {
+ public:
+ CaptureImpl(size_t pos)
+ : State(0)
+ , Pos(pos)
+ , Level(0)
, StateRepetition(NoRepetition)
- {}
-
+ {}
+
bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; }
- Term Lex()
- {
+ Term Lex()
+ {
wchar32 c = GetChar();
if (!Accepts(c))
- Error("How did we get here?!..");
+ Error("How did we get here?!..");
if (c != '(') {
wchar32 next = PeekChar();
if (next == '?') {
@@ -53,13 +53,13 @@ namespace {
StateRepetition = GreedyRepetition;
}
else if (State == 0 && Pos > 1)
- --Pos;
- else if (State == 0 && Pos == 1) {
- State = 1;
- Level = 0;
- } else if (State == 1) {
- ++Level;
- }
+ --Pos;
+ else if (State == 0 && Pos == 1) {
+ State = 1;
+ Level = 0;
+ } else if (State == 1) {
+ ++Level;
+ }
if (c == '(')
return Term(TokenTypes::Open);
else if (c == '+')
@@ -72,24 +72,24 @@ namespace {
UngetChar(c);
return Term(0);
}
- }
-
- void Parenthesized(Fsm& fsm)
- {
+ }
+
+ void Parenthesized(Fsm& fsm)
+ {
if (StateRepetition != NoRepetition) {
bool greedy = (StateRepetition == GreedyRepetition);
SetRepetitionMark(fsm, greedy);
StateRepetition = NoRepetition;
} else if (State == 1 && Level == 0) {
- SetCaptureMark(fsm);
- State = 2;
- } else if (State == 1 && Level > 0)
- --Level;
- }
- private:
- unsigned State;
- size_t Pos;
- size_t Level;
+ SetCaptureMark(fsm);
+ State = 2;
+ } else if (State == 1 && Level > 0)
+ --Level;
+ }
+ private:
+ unsigned State;
+ size_t Pos;
+ size_t Level;
RepetitionTypes StateRepetition;
void SetRepetitionMark(Fsm& fsm, bool greedy)
@@ -108,28 +108,28 @@ namespace {
fsm.SetIsDetermined(false);
}
- void SetCaptureMark(Fsm& fsm)
- {
- fsm.Resize(fsm.Size() + 2);
- fsm.Connect(fsm.Size() - 2, fsm.Initial());
- fsm.ConnectFinal(fsm.Size() - 1);
-
- fsm.SetOutput(fsm.Size() - 2, fsm.Initial(), CapturingScanner::BeginCapture);
- for (size_t state = 0; state < fsm.Size() - 2; ++state)
- if (fsm.IsFinal(state))
- fsm.SetOutput(state, fsm.Size() - 1, CapturingScanner::EndCapture);
-
- fsm.SetInitial(fsm.Size() - 2);
- fsm.ClearFinal();
- fsm.SetFinal(fsm.Size() - 1, true);
- fsm.SetIsDetermined(false);
- }
-
- void FinishBuild() {}
- };
-}
-
-namespace Features {
+ void SetCaptureMark(Fsm& fsm)
+ {
+ fsm.Resize(fsm.Size() + 2);
+ fsm.Connect(fsm.Size() - 2, fsm.Initial());
+ fsm.ConnectFinal(fsm.Size() - 1);
+
+ fsm.SetOutput(fsm.Size() - 2, fsm.Initial(), CapturingScanner::BeginCapture);
+ for (size_t state = 0; state < fsm.Size() - 2; ++state)
+ if (fsm.IsFinal(state))
+ fsm.SetOutput(state, fsm.Size() - 1, CapturingScanner::EndCapture);
+
+ fsm.SetInitial(fsm.Size() - 2);
+ fsm.ClearFinal();
+ fsm.SetFinal(fsm.Size() - 1, true);
+ fsm.SetIsDetermined(false);
+ }
+
+ void FinishBuild() {}
+ };
+}
+
+namespace Features {
Feature::Ptr Capture(size_t pos) { return Feature::Ptr(new CaptureImpl(pos)); }
-};
-}
+};
+}
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index 8399914a67..1c7ada9b56 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -1,30 +1,30 @@
-/*
- * capture.h -- definition of CapturingScanner
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * capture.h -- definition of CapturingScanner
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_EXTRA_CAPTURE_H
-#define PIRE_EXTRA_CAPTURE_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_EXTRA_CAPTURE_H
+#define PIRE_EXTRA_CAPTURE_H
+
+
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/scanners/loaded.h>
#include <contrib/libs/pire/pire/scanners/multi.h>
@@ -32,77 +32,77 @@
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/re_lexer.h>
#include <contrib/libs/pire/pire/run.h>
-
+
#include <array>
-namespace Pire {
-
-/**
-* A capturing scanner.
-* Requires source FSM to be deterministic, matches input string
-* against a single regexp (taking O(strlen(str)) time) and
-* captures a substring between a single pair of parentheses.
-*
-* Requires regexp pattern to satisfy certain conditions
-* (I still do not know exactly what they are :) )
-*/
-class CapturingScanner: public LoadedScanner {
-public:
- enum {
- NoAction = 0,
- BeginCapture = 1,
- EndCapture = 2,
-
- FinalFlag = 1
- };
-
- class State {
- public:
- bool Captured() const { return (m_begin != npos) && (m_end != npos); }
- size_t Begin() const { return m_begin; }
- size_t End() const { return m_end; }
- private:
- static const size_t npos = static_cast<size_t>(-1);
- size_t m_state;
- size_t m_begin;
- size_t m_end;
- size_t m_counter;
- friend class CapturingScanner;
-
-#ifdef PIRE_DEBUG
- friend yostream& operator << (yostream& s, const State& state)
- {
- s << state.m_state;
- if (state.m_begin != State::npos || state.m_end != npos) {
- s << " [";
- if (state.m_begin != State::npos)
- s << 'b';
- if (state.m_end != State::npos)
- s << 'e';
- s << "]";
- }
- return s;
- }
-#endif
- };
-
- void Initialize(State& state) const
- {
- state.m_state = m.initial;
- state.m_begin = state.m_end = State::npos;
- state.m_counter = 0;
- }
-
- void TakeAction(State& s, Action a) const
- {
+namespace Pire {
+
+/**
+* A capturing scanner.
+* Requires source FSM to be deterministic, matches input string
+* against a single regexp (taking O(strlen(str)) time) and
+* captures a substring between a single pair of parentheses.
+*
+* Requires regexp pattern to satisfy certain conditions
+* (I still do not know exactly what they are :) )
+*/
+class CapturingScanner: public LoadedScanner {
+public:
+ enum {
+ NoAction = 0,
+ BeginCapture = 1,
+ EndCapture = 2,
+
+ FinalFlag = 1
+ };
+
+ class State {
+ public:
+ bool Captured() const { return (m_begin != npos) && (m_end != npos); }
+ size_t Begin() const { return m_begin; }
+ size_t End() const { return m_end; }
+ private:
+ static const size_t npos = static_cast<size_t>(-1);
+ size_t m_state;
+ size_t m_begin;
+ size_t m_end;
+ size_t m_counter;
+ friend class CapturingScanner;
+
+#ifdef PIRE_DEBUG
+ friend yostream& operator << (yostream& s, const State& state)
+ {
+ s << state.m_state;
+ if (state.m_begin != State::npos || state.m_end != npos) {
+ s << " [";
+ if (state.m_begin != State::npos)
+ s << 'b';
+ if (state.m_end != State::npos)
+ s << 'e';
+ s << "]";
+ }
+ return s;
+ }
+#endif
+ };
+
+ void Initialize(State& state) const
+ {
+ state.m_state = m.initial;
+ state.m_begin = state.m_end = State::npos;
+ state.m_counter = 0;
+ }
+
+ void TakeAction(State& s, Action a) const
+ {
if ((a & BeginCapture) && !s.Captured())
s.m_begin = s.m_counter - 1;
else if (a & EndCapture) {
if (s.m_end == State::npos)
s.m_end = s.m_counter - 1;
}
- }
-
+ }
+
Char Translate(Char ch) const
{
return m_letters[static_cast<size_t>(ch)];
@@ -117,47 +117,47 @@ public:
return x.action;
}
- Action Next(State& s, Char c) const
- {
+ Action Next(State& s, Char c) const
+ {
return NextTranslated(s, Translate(c));
- }
-
- Action Next(const State& current, State& n, Char c) const
- {
- n = current;
- return Next(n, c);
- }
-
- bool CanStop(const State& s) const
- {
- return Final(s);
- }
-
- bool Final(const State& s) const { return m_tags[(reinterpret_cast<Transition*>(s.m_state) - m_jumps) / m.lettersCount] & FinalFlag; }
-
- bool Dead(const State&) const { return false; }
-
- CapturingScanner() {}
- CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {}
+ }
+
+ Action Next(const State& current, State& n, Char c) const
+ {
+ n = current;
+ return Next(n, c);
+ }
+
+ bool CanStop(const State& s) const
+ {
+ return Final(s);
+ }
+
+ bool Final(const State& s) const { return m_tags[(reinterpret_cast<Transition*>(s.m_state) - m_jumps) / m.lettersCount] & FinalFlag; }
+
+ bool Dead(const State&) const { return false; }
+
+ CapturingScanner() {}
+ CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {}
explicit CapturingScanner(Fsm& fsm, size_t distance = 0)
- {
+ {
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
- fsm.Canonize();
- Init(fsm.Size(), fsm.Letters(), fsm.Initial());
- BuildScanner(fsm, *this);
- }
-
- void Swap(CapturingScanner& s) { LoadedScanner::Swap(s); }
- CapturingScanner& operator = (const CapturingScanner& s) { CapturingScanner(s).Swap(*this); return *this; }
-
- size_t StateIndex(const State& s) const { return StateIdx(s.m_state); }
-
-private:
-
- friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&);
-};
+ fsm.Canonize();
+ Init(fsm.Size(), fsm.Letters(), fsm.Initial());
+ BuildScanner(fsm, *this);
+ }
+
+ void Swap(CapturingScanner& s) { LoadedScanner::Swap(s); }
+ CapturingScanner& operator = (const CapturingScanner& s) { CapturingScanner(s).Swap(*this); return *this; }
+
+ size_t StateIndex(const State& s) const { return StateIdx(s.m_state); }
+
+private:
+
+ friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&);
+};
enum RepetitionTypes { // They are sorted by their priorities
NonGreedyRepetition,
@@ -582,11 +582,11 @@ public:
}
};
-namespace Features {
+namespace Features {
Feature::Ptr Capture(size_t pos);
-}
-
-}
-
-
-#endif
+}
+
+}
+
+
+#endif
diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp
index 468ff61d92..f79dba506c 100644
--- a/contrib/libs/pire/pire/extra/count.cpp
+++ b/contrib/libs/pire/pire/extra/count.cpp
@@ -1,26 +1,26 @@
-/*
- * count.cpp -- CountingScanner compiling routine
+/*
+ * count.cpp -- CountingScanner compiling routine
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
#include "count.h"
#include <contrib/libs/pire/pire/fsm.h>
@@ -31,8 +31,8 @@
#include <contrib/libs/pire/pire/stub/stl.h>
#include <tuple>
-
-namespace Pire {
+
+namespace Pire {
namespace Impl {
@@ -740,103 +740,103 @@ void CountingFsm::SwapTaskOutputs(CountingFsmTask& task) {
}
-namespace {
- Pire::Fsm FsmForDot() { Pire::Fsm f; f.AppendDot(); return f; }
- Pire::Fsm FsmForChar(Pire::Char c) { Pire::Fsm f; f.AppendSpecial(c); return f; }
-}
-
-CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep)
-{
- Fsm res = re;
- res.Surround();
- Fsm sep_re = ((sep & ~res) /* | Fsm()*/) + re;
- sep_re.Determine();
-
- Fsm dup = sep_re;
- for (size_t i = 0; i < dup.Size(); ++i)
- dup.SetTag(i, Matched);
- size_t oldsize = sep_re.Size();
- sep_re.Import(dup);
- for (Fsm::FinalTable::const_iterator i = sep_re.Finals().begin(), ie = sep_re.Finals().end(); i != ie; ++i)
- if (*i < oldsize)
- sep_re.Connect(*i, oldsize + *i);
-
- sep_re |= (FsmForDot() | FsmForChar(Pire::BeginMark) | FsmForChar(Pire::EndMark));
-
- // Make a full Cartesian product of two sep_res
- sep_re.Determine();
- sep_re.Unsparse();
+namespace {
+ Pire::Fsm FsmForDot() { Pire::Fsm f; f.AppendDot(); return f; }
+ Pire::Fsm FsmForChar(Pire::Char c) { Pire::Fsm f; f.AppendSpecial(c); return f; }
+}
+
+CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep)
+{
+ Fsm res = re;
+ res.Surround();
+ Fsm sep_re = ((sep & ~res) /* | Fsm()*/) + re;
+ sep_re.Determine();
+
+ Fsm dup = sep_re;
+ for (size_t i = 0; i < dup.Size(); ++i)
+ dup.SetTag(i, Matched);
+ size_t oldsize = sep_re.Size();
+ sep_re.Import(dup);
+ for (Fsm::FinalTable::const_iterator i = sep_re.Finals().begin(), ie = sep_re.Finals().end(); i != ie; ++i)
+ if (*i < oldsize)
+ sep_re.Connect(*i, oldsize + *i);
+
+ sep_re |= (FsmForDot() | FsmForChar(Pire::BeginMark) | FsmForChar(Pire::EndMark));
+
+ // Make a full Cartesian product of two sep_res
+ sep_re.Determine();
+ sep_re.Unsparse();
TSet<size_t> dead = sep_re.DeadStates();
-
- PIRE_IFDEBUG(Cdbg << "=== Original FSM ===" << Endl << sep_re << ">>> " << sep_re.Size() << " states, dead: [" << Join(dead.begin(), dead.end(), ", ") << "]" << Endl);
-
- Fsm sq;
-
- typedef ypair<size_t, size_t> NewState;
+
+ PIRE_IFDEBUG(Cdbg << "=== Original FSM ===" << Endl << sep_re << ">>> " << sep_re.Size() << " states, dead: [" << Join(dead.begin(), dead.end(), ", ") << "]" << Endl);
+
+ Fsm sq;
+
+ typedef ypair<size_t, size_t> NewState;
TVector<NewState> states;
TMap<NewState, size_t> invstates;
-
- states.push_back(NewState(sep_re.Initial(), sep_re.Initial()));
- invstates.insert(ymake_pair(states.back(), states.size() - 1));
-
- // TODO: this loop reminds me a general determination task...
- for (size_t curstate = 0; curstate < states.size(); ++curstate) {
-
- unsigned long tag = sep_re.Tag(states[curstate].first);
- if (tag)
- sq.SetTag(curstate, tag);
- sq.SetFinal(curstate, sep_re.IsFinal(states[curstate].first));
-
- PIRE_IFDEBUG(Cdbg << "State " << curstate << " = (" << states[curstate].first << ", " << states[curstate].second << ")" << Endl);
- for (Fsm::LettersTbl::ConstIterator lit = sep_re.Letters().Begin(), lie = sep_re.Letters().End(); lit != lie; ++lit) {
-
- Char letter = lit->first;
-
- const Fsm::StatesSet& mr = sep_re.Destinations(states[curstate].first, letter);
- const Fsm::StatesSet& br = sep_re.Destinations(states[curstate].second, letter);
-
- if (mr.size() != 1)
+
+ states.push_back(NewState(sep_re.Initial(), sep_re.Initial()));
+ invstates.insert(ymake_pair(states.back(), states.size() - 1));
+
+ // TODO: this loop reminds me a general determination task...
+ for (size_t curstate = 0; curstate < states.size(); ++curstate) {
+
+ unsigned long tag = sep_re.Tag(states[curstate].first);
+ if (tag)
+ sq.SetTag(curstate, tag);
+ sq.SetFinal(curstate, sep_re.IsFinal(states[curstate].first));
+
+ PIRE_IFDEBUG(Cdbg << "State " << curstate << " = (" << states[curstate].first << ", " << states[curstate].second << ")" << Endl);
+ for (Fsm::LettersTbl::ConstIterator lit = sep_re.Letters().Begin(), lie = sep_re.Letters().End(); lit != lie; ++lit) {
+
+ Char letter = lit->first;
+
+ const Fsm::StatesSet& mr = sep_re.Destinations(states[curstate].first, letter);
+ const Fsm::StatesSet& br = sep_re.Destinations(states[curstate].second, letter);
+
+ if (mr.size() != 1)
Y_ASSERT(!"Wrong transition size for main");
- if (br.size() != 1)
+ if (br.size() != 1)
Y_ASSERT(!"Wrong transition size for backup");
-
- NewState ns(*mr.begin(), *br.begin());
+
+ NewState ns(*mr.begin(), *br.begin());
PIRE_IFDEBUG(NewState savedNs = ns);
- unsigned long outputs = 0;
-
- PIRE_IFDEBUG(ystring dbgout);
- if (dead.find(ns.first) != dead.end()) {
- PIRE_IFDEBUG(dbgout = ((sep_re.Tag(ns.first) & Matched) ? ", ++cur" : ", max <- cur"));
- outputs = DeadFlag | (sep_re.Tag(ns.first) & Matched);
- ns.first = ns.second;
- }
- if (sep_re.IsFinal(ns.first) || (sep_re.IsFinal(ns.second) && !(sep_re.Tag(ns.first) & Matched)))
- ns.second = sep_re.Initial();
-
- PIRE_IFDEBUG(if (ns != savedNs) Cdbg << "Diverted transition to (" << savedNs.first << ", " << savedNs.second << ") on " << (char) letter << " to (" << ns.first << ", " << ns.second << ")" << dbgout << Endl);
-
+ unsigned long outputs = 0;
+
+ PIRE_IFDEBUG(ystring dbgout);
+ if (dead.find(ns.first) != dead.end()) {
+ PIRE_IFDEBUG(dbgout = ((sep_re.Tag(ns.first) & Matched) ? ", ++cur" : ", max <- cur"));
+ outputs = DeadFlag | (sep_re.Tag(ns.first) & Matched);
+ ns.first = ns.second;
+ }
+ if (sep_re.IsFinal(ns.first) || (sep_re.IsFinal(ns.second) && !(sep_re.Tag(ns.first) & Matched)))
+ ns.second = sep_re.Initial();
+
+ PIRE_IFDEBUG(if (ns != savedNs) Cdbg << "Diverted transition to (" << savedNs.first << ", " << savedNs.second << ") on " << (char) letter << " to (" << ns.first << ", " << ns.second << ")" << dbgout << Endl);
+
TMap<NewState, size_t>::iterator nsi = invstates.find(ns);
- if (nsi == invstates.end()) {
- PIRE_IFDEBUG(Cdbg << "New state " << states.size() << " = (" << ns.first << ", " << ns.second << ")" << Endl);
- states.push_back(ns);
- nsi = invstates.insert(ymake_pair(states.back(), states.size() - 1)).first;
- sq.Resize(states.size());
- }
-
+ if (nsi == invstates.end()) {
+ PIRE_IFDEBUG(Cdbg << "New state " << states.size() << " = (" << ns.first << ", " << ns.second << ")" << Endl);
+ states.push_back(ns);
+ nsi = invstates.insert(ymake_pair(states.back(), states.size() - 1)).first;
+ sq.Resize(states.size());
+ }
+
for (TVector<Char>::const_iterator li = lit->second.second.begin(), le = lit->second.second.end(); li != le; ++li)
- sq.Connect(curstate, nsi->second, *li);
- if (outputs)
- sq.SetOutput(curstate, nsi->second, outputs);
- }
- }
-
- sq.Determine();
-
- PIRE_IFDEBUG(Cdbg << "=== FSM ===" << Endl << sq << Endl);
- Init(sq.Size(), sq.Letters(), sq.Initial(), 1);
- BuildScanner(sq, *this);
-}
-
+ sq.Connect(curstate, nsi->second, *li);
+ if (outputs)
+ sq.SetOutput(curstate, nsi->second, outputs);
+ }
+ }
+
+ sq.Determine();
+
+ PIRE_IFDEBUG(Cdbg << "=== FSM ===" << Endl << sq << Endl);
+ Init(sq.Size(), sq.Letters(), sq.Initial(), 1);
+ BuildScanner(sq, *this);
+}
+
namespace Impl {
template <class AdvancedScanner>
AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) {
@@ -848,7 +848,7 @@ AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool*
if (simple) {
*simple = countingFsm.Simple();
}
-
+
const auto& determined = countingFsm.Determined();
const auto& letters = countingFsm.Letters();
@@ -877,11 +877,11 @@ NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm&
}
-namespace Impl {
-
+namespace Impl {
+
template<class Scanner>
class CountingScannerGlueTask: public ScannerGlueCommon<Scanner> {
-public:
+public:
using typename ScannerGlueCommon<Scanner>::State;
using TAction = typename Scanner::Action;
using InternalState = typename Scanner::InternalState;
@@ -889,36 +889,36 @@ public:
CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs)
: ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters))
- {
- }
+ {
+ }
void AcceptStates(const TVector<State>& states)
- {
- States = states;
+ {
+ States = states;
this->SetSc(THolder<Scanner>(new Scanner));
this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount());
- for (size_t i = 0; i < states.size(); ++i)
+ for (size_t i = 0; i < states.size(); ++i)
this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3));
- }
+ }
- void Connect(size_t from, size_t to, Char letter)
- {
+ void Connect(size_t from, size_t to, Char letter)
+ {
this->Sc().SetJump(from, letter, to,
Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount()));
- }
+ }
protected:
TVector<State> States;
TAction Action(const Scanner& sc, InternalState state, Char letter) const
- {
+ {
size_t state_index = sc.StateIdx(state);
size_t transition_index = sc.TransitionIndex(state_index, letter);
const auto& tr = sc.m_jumps[transition_index];
return tr.action;
- }
-};
-
+ }
+};
+
class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> {
public:
using ActionIndex = NoGlueLimitCountingScanner::ActionIndex;
@@ -980,18 +980,18 @@ private:
};
-}
+}
-CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */)
-{
+CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */)
+{
if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
return CountingScanner();
}
static constexpr size_t DefMaxSize = 250000;
Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs);
- return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
-}
-
+ return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
+}
+
AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */)
{
if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
@@ -1000,7 +1000,7 @@ AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScan
static constexpr size_t DefMaxSize = 250000;
Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs);
return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
-}
+}
NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */)
{
diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h
index bd1526b98d..deaa4c2314 100644
--- a/contrib/libs/pire/pire/extra/count.h
+++ b/contrib/libs/pire/pire/extra/count.h
@@ -1,38 +1,38 @@
-/*
- * count.h -- definition of the counting scanner
+/*
+ * count.h -- definition of the counting scanner
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_EXTRA_COUNT_H
-#define PIRE_EXTRA_COUNT_H
-
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_EXTRA_COUNT_H
+#define PIRE_EXTRA_COUNT_H
+
#include <contrib/libs/pire/pire/scanners/loaded.h>
#include <contrib/libs/pire/pire/fsm.h>
-
+
#include <algorithm>
-namespace Pire {
-class Fsm;
+namespace Pire {
+class Fsm;
-namespace Impl {
+namespace Impl {
template<class T>
class ScannerGlueCommon;
@@ -43,8 +43,8 @@ namespace Impl {
template <class AdvancedScanner>
AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple);
-};
-
+};
+
template<size_t I>
class IncrementPerformer {
public:
@@ -110,38 +110,38 @@ public:
}
};
-/**
- * A scanner which counts occurences of the
- * given regexp separated by another regexp
- * in input text.
- */
+/**
+ * A scanner which counts occurences of the
+ * given regexp separated by another regexp
+ * in input text.
+ */
template<class DerivedScanner, class State>
class BaseCountingScanner: public LoadedScanner {
-public:
- enum {
- IncrementAction = 1,
- ResetAction = 2,
-
- FinalFlag = 0,
- DeadFlag = 1,
- };
-
- void Initialize(State& state) const
- {
- state.m_state = m.initial;
- memset(&state.m_current, 0, sizeof(state.m_current));
- memset(&state.m_total, 0, sizeof(state.m_total));
- state.m_updatedMask = 0;
- }
-
+public:
+ enum {
+ IncrementAction = 1,
+ ResetAction = 2,
+
+ FinalFlag = 0,
+ DeadFlag = 1,
+ };
+
+ void Initialize(State& state) const
+ {
+ state.m_state = m.initial;
+ memset(&state.m_current, 0, sizeof(state.m_current));
+ memset(&state.m_total, 0, sizeof(state.m_total));
+ state.m_updatedMask = 0;
+ }
+
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
void TakeAction(State& s, Action a) const
{
static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a);
}
- bool CanStop(const State&) const { return false; }
-
+ bool CanStop(const State&) const { return false; }
+
Char Translate(Char ch) const
{
return m_letters[static_cast<size_t>(ch)];
@@ -154,55 +154,55 @@ public:
return x.action;
}
- Action Next(State& s, Char c) const
- {
+ Action Next(State& s, Char c) const
+ {
return NextTranslated(s, Translate(c));
- }
-
- Action Next(const State& current, State& n, Char c) const
- {
- n = current;
- return Next(n, c);
- }
-
- bool Final(const State& /*state*/) const { return false; }
-
- bool Dead(const State&) const { return false; }
-
+ }
+
+ Action Next(const State& current, State& n, Char c) const
+ {
+ n = current;
+ return Next(n, c);
+ }
+
+ bool Final(const State& /*state*/) const { return false; }
+
+ bool Dead(const State&) const { return false; }
+
using LoadedScanner::Swap;
- size_t StateIndex(const State& s) const { return StateIdx(s.m_state); }
-
+ size_t StateIndex(const State& s) const { return StateIdx(s.m_state); }
+
protected:
- using LoadedScanner::Init;
+ using LoadedScanner::Init;
using LoadedScanner::InternalState;
-
+
template<size_t ActualReCount>
- void PerformIncrement(State& s, Action mask) const
- {
- if (mask) {
+ void PerformIncrement(State& s, Action mask) const
+ {
+ if (mask) {
IncrementPerformer<ActualReCount>::Do(s, mask);
- s.m_updatedMask |= ((size_t)mask) << MAX_RE_COUNT;
- }
- }
-
+ s.m_updatedMask |= ((size_t)mask) << MAX_RE_COUNT;
+ }
+ }
+
template<size_t ActualReCount>
- void PerformReset(State& s, Action mask) const
- {
- mask &= s.m_updatedMask;
- if (mask) {
+ void PerformReset(State& s, Action mask) const
+ {
+ mask &= s.m_updatedMask;
+ if (mask) {
ResetPerformer<ActualReCount>::Do(s, mask);
s.m_updatedMask &= (Action)~mask;
- }
- }
-
- void Next(InternalState& s, Char c) const
- {
+ }
+ }
+
+ void Next(InternalState& s, Char c) const
+ {
Transition x = reinterpret_cast<const Transition*>(s)[Translate(c)];
- s += SignExtend(x.shift);
- }
+ s += SignExtend(x.shift);
+ }
};
-
+
template <size_t MAX_RE_COUNT>
class CountingState {
public:
@@ -258,21 +258,21 @@ public:
}
private:
- Action RemapAction(Action action)
- {
- if (action == (Matched | DeadFlag))
- return 1;
- else if (action == DeadFlag)
- return 1 << MAX_RE_COUNT;
- else
- return 0;
- }
-
- friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&);
- friend class Impl::ScannerGlueCommon<CountingScanner>;
+ Action RemapAction(Action action)
+ {
+ if (action == (Matched | DeadFlag))
+ return 1;
+ else if (action == DeadFlag)
+ return 1 << MAX_RE_COUNT;
+ else
+ return 0;
+ }
+
+ friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&);
+ friend class Impl::ScannerGlueCommon<CountingScanner>;
friend class Impl::CountingScannerGlueTask<CountingScanner>;
-};
-
+};
+
class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> {
public:
using State = CountingState<MAX_RE_COUNT>;
@@ -329,10 +329,10 @@ public:
++m_current[regexp_id];
m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]);
}
-
+
template<size_t I>
friend class IncrementPerformer;
-
+
template<size_t I>
friend class ResetPerformer;
@@ -352,7 +352,7 @@ private:
s << state.m_current[i] << '/' << state.m_total[i] << ' ';
return s << ')';
}
-#endif
+#endif
};
diff --git a/contrib/libs/pire/pire/extra/glyphs.cpp b/contrib/libs/pire/pire/extra/glyphs.cpp
index a14d2baa56..9bf7d1bd65 100644
--- a/contrib/libs/pire/pire/extra/glyphs.cpp
+++ b/contrib/libs/pire/pire/extra/glyphs.cpp
@@ -1,144 +1,144 @@
-/*
- * glyphs.cpp -- implementation for the GlueSimilarGlyphs feature.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * glyphs.cpp -- implementation for the GlueSimilarGlyphs feature.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <algorithm>
-#include <map>
-#include <list>
-#include <set>
-#include <vector>
-#include <utility>
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <algorithm>
+#include <map>
+#include <list>
+#include <set>
+#include <vector>
+#include <utility>
+
#include <contrib/libs/pire/pire/stub/singleton.h>
#include <contrib/libs/pire/pire/stub/noncopyable.h>
#include <contrib/libs/pire/pire/stub/utf8.h>
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/re_lexer.h>
-namespace Pire {
-
-namespace {
-
- /*
- * A class providing a function which returns a character
- * whose glyph resembles that of given char, if any;
- * otherwise returns given char itself.
- */
- class GlyphTable {
- private:
+namespace Pire {
+
+namespace {
+
+ /*
+ * A class providing a function which returns a character
+ * whose glyph resembles that of given char, if any;
+ * otherwise returns given char itself.
+ */
+ class GlyphTable {
+ private:
TList< TVector<wchar32> > m_classes;
TMap<wchar32, TVector<wchar32>*> m_map;
-
- struct GlyphClass {
+
+ struct GlyphClass {
TVector<wchar32>* m_class;
TMap<wchar32, TVector<wchar32>*> *m_map;
-
- GlyphClass& operator << (wchar32 c)
- {
- m_class->push_back(c);
- m_map->insert(ymake_pair(c, m_class));
- return *this;
- }
- };
-
- GlyphClass Class()
- {
- GlyphClass cl;
+
+ GlyphClass& operator << (wchar32 c)
+ {
+ m_class->push_back(c);
+ m_map->insert(ymake_pair(c, m_class));
+ return *this;
+ }
+ };
+
+ GlyphClass Class()
+ {
+ GlyphClass cl;
m_classes.push_back(TVector<wchar32>());
- cl.m_class = &m_classes.back();
- cl.m_map = &m_map;
- return cl;
- }
-
- public:
-
+ cl.m_class = &m_classes.back();
+ cl.m_map = &m_map;
+ return cl;
+ }
+
+ public:
+
const TVector<wchar32>& Klass(wchar32 x) const
- {
+ {
TMap<wchar32, TVector<wchar32>*>::const_iterator i = m_map.find(x);
- if (i != m_map.end())
- return *i->second;
- else
+ if (i != m_map.end())
+ return *i->second;
+ else
return DefaultValue< TVector<wchar32> >();
- }
-
- GlyphTable()
- {
- Class() << 'A' << 0x0410;
- Class() << 'B' << 0x0412;
- Class() << 'C' << 0x0421;
- Class() << 'E' << 0x0415 << 0x0401;
- Class() << 'H' << 0x041D;
- Class() << 'K' << 0x041A;
- Class() << 'M' << 0x041C;
- Class() << 'O' << 0x041E;
- Class() << 'P' << 0x0420;
- Class() << 'T' << 0x0422;
- Class() << 'X' << 0x0425;
-
- Class() << 'a' << 0x0430;
- Class() << 'c' << 0x0441;
- Class() << 'e' << 0x0435 << 0x0451;
- Class() << 'm' << 0x0442;
- Class() << 'o' << 0x043E;
- Class() << 'p' << 0x0440;
- Class() << 'u' << 0x0438;
- Class() << 'x' << 0x0445;
- Class() << 'y' << 0x0443;
- }
- };
-
- class GlueSimilarGlyphsImpl: public Feature {
- public:
- GlueSimilarGlyphsImpl(): m_table(Singleton<GlyphTable>()) {}
- int Priority() const { return 9; }
-
- void Alter(Term& t)
- {
- if (t.Value().IsA<Term::CharacterRange>()) {
- const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>();
- typedef Term::CharacterRange::first_type CharSet;
- const CharSet& old = range.first;
- CharSet altered;
+ }
+
+ GlyphTable()
+ {
+ Class() << 'A' << 0x0410;
+ Class() << 'B' << 0x0412;
+ Class() << 'C' << 0x0421;
+ Class() << 'E' << 0x0415 << 0x0401;
+ Class() << 'H' << 0x041D;
+ Class() << 'K' << 0x041A;
+ Class() << 'M' << 0x041C;
+ Class() << 'O' << 0x041E;
+ Class() << 'P' << 0x0420;
+ Class() << 'T' << 0x0422;
+ Class() << 'X' << 0x0425;
+
+ Class() << 'a' << 0x0430;
+ Class() << 'c' << 0x0441;
+ Class() << 'e' << 0x0435 << 0x0451;
+ Class() << 'm' << 0x0442;
+ Class() << 'o' << 0x043E;
+ Class() << 'p' << 0x0440;
+ Class() << 'u' << 0x0438;
+ Class() << 'x' << 0x0445;
+ Class() << 'y' << 0x0443;
+ }
+ };
+
+ class GlueSimilarGlyphsImpl: public Feature {
+ public:
+ GlueSimilarGlyphsImpl(): m_table(Singleton<GlyphTable>()) {}
+ int Priority() const { return 9; }
+
+ void Alter(Term& t)
+ {
+ if (t.Value().IsA<Term::CharacterRange>()) {
+ const Term::CharacterRange& range = t.Value().As<Term::CharacterRange>();
+ typedef Term::CharacterRange::first_type CharSet;
+ const CharSet& old = range.first;
+ CharSet altered;
for (auto&& i : old) {
const TVector<wchar32>* klass = 0;
if (i.size() == 1 && !(klass = &m_table->Klass(i[0]))->empty())
for (auto&& j : *klass)
altered.insert(Term::String(1, j));
- else
+ else
altered.insert(i);
- }
-
- t = Term(t.Type(), Term::CharacterRange(altered, range.second));
- }
- }
-
- private:
- GlyphTable* m_table;
- };
-}
-
-namespace Features {
+ }
+
+ t = Term(t.Type(), Term::CharacterRange(altered, range.second));
+ }
+ }
+
+ private:
+ GlyphTable* m_table;
+ };
+}
+
+namespace Features {
Feature::Ptr GlueSimilarGlyphs() { return Feature::Ptr(new GlueSimilarGlyphsImpl); }
-}
-
-}
-
+}
+
+}
+
diff --git a/contrib/libs/pire/pire/extra/glyphs.h b/contrib/libs/pire/pire/extra/glyphs.h
index 678b9e15c4..07c4276951 100644
--- a/contrib/libs/pire/pire/extra/glyphs.h
+++ b/contrib/libs/pire/pire/extra/glyphs.h
@@ -1,41 +1,41 @@
-/*
- * glyphs.h -- declaration of the GlueSimilarGlyphs feature.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * glyphs.h -- declaration of the GlueSimilarGlyphs feature.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_EXTRA_GLYPHS_H
-#define PIRE_EXTRA_GLYPHS_H
-
-
-namespace Pire {
-class Feature;
-namespace Features {
-
- /**
- * A feature which tells Pire not to distinguish latin
- * and cyrillic letters having identical shapes
- * (e.g. latin A and cyrillic A).
- */
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_EXTRA_GLYPHS_H
+#define PIRE_EXTRA_GLYPHS_H
+
+
+namespace Pire {
+class Feature;
+namespace Features {
+
+ /**
+ * A feature which tells Pire not to distinguish latin
+ * and cyrillic letters having identical shapes
+ * (e.g. latin A and cyrillic A).
+ */
Feature::Ptr GlueSimilarGlyphs();
-}
-}
-
-#endif
+}
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp
index 984d708dfa..f2216b3aba 100644
--- a/contrib/libs/pire/pire/fsm.cpp
+++ b/contrib/libs/pire/pire/fsm.cpp
@@ -1,114 +1,114 @@
-/*
- * fsm.cpp -- the implementation of the FSM class.
+/*
+ * fsm.cpp -- the implementation of the FSM class.
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <algorithm>
-#include <functional>
-#include <stdexcept>
-#include <iostream>
-#include <iterator>
-#include <numeric>
-#include <queue>
-#include <utility>
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <algorithm>
+#include <functional>
+#include <stdexcept>
+#include <iostream>
+#include <iterator>
+#include <numeric>
+#include <queue>
+#include <utility>
#include <iostream>
#include <stdio.h>
#include <contrib/libs/pire/pire/stub/lexical_cast.h>
-#include "fsm.h"
-#include "vbitset.h"
-#include "partition.h"
-#include "determine.h"
+#include "fsm.h"
+#include "vbitset.h"
+#include "partition.h"
+#include "determine.h"
#include "minimize.h"
-#include "platform.h"
+#include "platform.h"
+
+namespace Pire {
-namespace Pire {
-
-ystring CharDump(Char c)
-{
- char buf[8];
+ystring CharDump(Char c)
+{
+ char buf[8];
if (c == '"')
return ystring("\\\"");
else if (c == '[' || c == ']' || c == '-' || c == '^') {
snprintf(buf, sizeof(buf)-1, "\\\\%c", c);
return ystring(buf);
} else if (c >= 32 && c < 127)
- return ystring(1, static_cast<char>(c));
- else if (c == '\n')
+ return ystring(1, static_cast<char>(c));
+ else if (c == '\n')
return ystring("\\\\n");
- else if (c == '\t')
+ else if (c == '\t')
return ystring("\\\\t");
- else if (c == '\r')
+ else if (c == '\r')
return ystring("\\\\r");
- else if (c < 256) {
+ else if (c < 256) {
snprintf(buf, sizeof(buf)-1, "\\\\%03o", static_cast<int>(c));
- return ystring(buf);
- } else if (c == Epsilon)
- return ystring("<Epsilon>");
- else if (c == BeginMark)
- return ystring("<Begin>");
- else if (c == EndMark)
- return ystring("<End>");
- else
- return ystring("<?" "?" "?>");
-}
-
-void Fsm::DumpState(yostream& s, size_t state) const
-{
- // Fill in a 'row': Q -> exp(V) (for current state)
+ return ystring(buf);
+ } else if (c == Epsilon)
+ return ystring("<Epsilon>");
+ else if (c == BeginMark)
+ return ystring("<Begin>");
+ else if (c == EndMark)
+ return ystring("<End>");
+ else
+ return ystring("<?" "?" "?>");
+}
+
+void Fsm::DumpState(yostream& s, size_t state) const
+{
+ // Fill in a 'row': Q -> exp(V) (for current state)
TVector< ybitset<MaxChar> > row(Size());
for (auto&& transition : m_transitions[state])
for (auto&& transitionState : transition.second) {
if (transitionState >= Size()) {
std::cerr << "WTF?! Transition from " << state << " on letter " << transition.first << " leads to non-existing state " << transitionState << "\n";
Y_ASSERT(false);
- }
+ }
if (Letters().Contains(transition.first)) {
const TVector<Char>& letters = Letters().Klass(Letters().Representative(transition.first));
for (auto&& letter : letters)
row[transitionState].set(letter);
- } else
+ } else
row[transitionState].set(transition.first);
- }
-
+ }
+
bool statePrinted = false;
- // Display each destination state
+ // Display each destination state
for (auto rit = row.begin(), rie = row.end(); rit != rie; ++rit) {
- unsigned begin = 0, end = 0;
-
+ unsigned begin = 0, end = 0;
+
ystring delimiter;
ystring label;
- if (rit->test(Epsilon)) {
+ if (rit->test(Epsilon)) {
label += delimiter + CharDump(Epsilon);
delimiter = " ";
- }
- if (rit->test(BeginMark)) {
+ }
+ if (rit->test(BeginMark)) {
label += delimiter + CharDump(BeginMark);
delimiter = " ";
- }
- if (rit->test(EndMark)) {
+ }
+ if (rit->test(EndMark)) {
label += delimiter + CharDump(EndMark);
delimiter = " ";
- }
+ }
unsigned count = 0;
for (unsigned i = 0; i < 256; ++i)
if (rit->test(i))
@@ -130,13 +130,13 @@ void Fsm::DumpState(yostream& s, size_t state) const
label += CharDump(begin) + "-" + (CharDump(end-1));
delimiter = " ";
}
- }
+ }
label += "]";
delimiter = " ";
} else if (count == 256) {
label += delimiter + ".";
delimiter = " ";
- }
+ }
if (!label.empty()) {
if (!statePrinted) {
s << " " << state << "[shape=\"" << (IsFinal(state) ? "double" : "") << "circle\",label=\"" << state;
@@ -149,490 +149,490 @@ void Fsm::DumpState(yostream& s, size_t state) const
statePrinted = true;
}
s << " " << state << " -> " << std::distance(row.begin(), rit) << "[label=\"" << label;
-
- // Display outputs
+
+ // Display outputs
auto oit = outputs.find(state);
- if (oit != outputs.end()) {
+ if (oit != outputs.end()) {
auto oit2 = oit->second.find(std::distance(row.begin(), rit));
- if (oit2 == oit->second.end())
- ;
- else {
+ if (oit2 == oit->second.end())
+ ;
+ else {
TVector<int> payload;
- for (unsigned i = 0; i < sizeof(oit2->second) * 8; ++i)
- if (oit2->second & (1ul << i))
- payload.push_back(i);
- if (!payload.empty())
+ for (unsigned i = 0; i < sizeof(oit2->second) * 8; ++i)
+ if (oit2->second & (1ul << i))
+ payload.push_back(i);
+ if (!payload.empty())
s << " (outputs: " << Join(payload.begin(), payload.end(), ", ") << ")";
- }
- }
+ }
+ }
s << "\"]\n";
- }
- }
+ }
+ }
if (statePrinted)
s << '\n';
-}
-
+}
+
void Fsm::DumpTo(yostream& s, const ystring& name) const
-{
+{
s << "digraph {\n \"initial\"[shape=\"plaintext\",label=\"" << name << "\"]\n\n";
- for (size_t state = 0; state < Size(); ++state) {
- DumpState(s, state);
- }
+ for (size_t state = 0; state < Size(); ++state) {
+ DumpState(s, state);
+ }
s << "}\n\n";
-}
-
-yostream& operator << (yostream& s, const Fsm& fsm) { fsm.DumpTo(s); return s; }
-
-
-namespace {
- template<class Vector> void resizeVector(Vector& v, size_t s) { v.resize(s); }
-}
-
-Fsm::Fsm():
- m_transitions(1),
- initial(0),
- letters(m_transitions),
- m_sparsed(false),
- determined(false),
- isAlternative(false)
-{
- m_final.insert(0);
-}
-
-Fsm Fsm::MakeFalse()
-{
- Fsm f;
- f.SetFinal(0, false);
- return f;
-}
-
-Char Fsm::Translate(Char c) const
-{
- if (!m_sparsed || c == Epsilon)
- return c;
- else
- return Letters().Representative(c);
-}
-
-bool Fsm::Connected(size_t from, size_t to, Char c) const
-{
+}
+
+yostream& operator << (yostream& s, const Fsm& fsm) { fsm.DumpTo(s); return s; }
+
+
+namespace {
+ template<class Vector> void resizeVector(Vector& v, size_t s) { v.resize(s); }
+}
+
+Fsm::Fsm():
+ m_transitions(1),
+ initial(0),
+ letters(m_transitions),
+ m_sparsed(false),
+ determined(false),
+ isAlternative(false)
+{
+ m_final.insert(0);
+}
+
+Fsm Fsm::MakeFalse()
+{
+ Fsm f;
+ f.SetFinal(0, false);
+ return f;
+}
+
+Char Fsm::Translate(Char c) const
+{
+ if (!m_sparsed || c == Epsilon)
+ return c;
+ else
+ return Letters().Representative(c);
+}
+
+bool Fsm::Connected(size_t from, size_t to, Char c) const
+{
auto it = m_transitions[from].find(Translate(c));
- return (it != m_transitions[from].end() && it->second.find(to) != it->second.end());
-}
-
-bool Fsm::Connected(size_t from, size_t to) const
-{
+ return (it != m_transitions[from].end() && it->second.find(to) != it->second.end());
+}
+
+bool Fsm::Connected(size_t from, size_t to) const
+{
for (auto i = m_transitions[from].begin(), ie = m_transitions[from].end(); i != ie; ++i)
- if (i->second.find(to) != i->second.end())
- return true;
- return false;
-}
-
-const Fsm::StatesSet& Fsm::Destinations(size_t from, Char c) const
-{
+ if (i->second.find(to) != i->second.end())
+ return true;
+ return false;
+}
+
+const Fsm::StatesSet& Fsm::Destinations(size_t from, Char c) const
+{
auto i = m_transitions[from].find(Translate(c));
- return (i != m_transitions[from].end()) ? i->second : DefaultValue<StatesSet>();
-}
-
+ return (i != m_transitions[from].end()) ? i->second : DefaultValue<StatesSet>();
+}
+
TSet<Char> Fsm::OutgoingLetters(size_t state) const
-{
+{
TSet<Char> ret;
for (auto&& i : m_transitions[state])
ret.insert(i.first);
- return ret;
-}
-
-size_t Fsm::Resize(size_t newSize)
-{
- size_t ret = Size();
- m_transitions.resize(newSize);
- return ret;
-}
-
-void Fsm::Swap(Fsm& fsm)
-{
- DoSwap(m_transitions, fsm.m_transitions);
- DoSwap(initial, fsm.initial);
- DoSwap(m_final, fsm.m_final);
- DoSwap(letters, fsm.letters);
- DoSwap(determined, fsm.determined);
- DoSwap(outputs, fsm.outputs);
- DoSwap(tags, fsm.tags);
- DoSwap(isAlternative, fsm.isAlternative);
-}
-
-void Fsm::SetFinal(size_t state, bool final)
-{
- if (final)
- m_final.insert(state);
- else
- m_final.erase(state);
-}
-
-Fsm& Fsm::AppendDot()
-{
- Resize(Size() + 1);
- for (size_t letter = 0; letter != (1 << (sizeof(char)*8)); ++letter)
- ConnectFinal(Size() - 1, letter);
- ClearFinal();
- SetFinal(Size() - 1, true);
- determined = false;
- return *this;
-}
-
-Fsm& Fsm::Append(char c)
-{
- Resize(Size() + 1);
- ConnectFinal(Size() - 1, static_cast<unsigned char>(c));
- ClearFinal();
- SetFinal(Size() - 1, true);
- determined = false;
- return *this;
-}
-
-Fsm& Fsm::Append(const ystring& str)
-{
+ return ret;
+}
+
+size_t Fsm::Resize(size_t newSize)
+{
+ size_t ret = Size();
+ m_transitions.resize(newSize);
+ return ret;
+}
+
+void Fsm::Swap(Fsm& fsm)
+{
+ DoSwap(m_transitions, fsm.m_transitions);
+ DoSwap(initial, fsm.initial);
+ DoSwap(m_final, fsm.m_final);
+ DoSwap(letters, fsm.letters);
+ DoSwap(determined, fsm.determined);
+ DoSwap(outputs, fsm.outputs);
+ DoSwap(tags, fsm.tags);
+ DoSwap(isAlternative, fsm.isAlternative);
+}
+
+void Fsm::SetFinal(size_t state, bool final)
+{
+ if (final)
+ m_final.insert(state);
+ else
+ m_final.erase(state);
+}
+
+Fsm& Fsm::AppendDot()
+{
+ Resize(Size() + 1);
+ for (size_t letter = 0; letter != (1 << (sizeof(char)*8)); ++letter)
+ ConnectFinal(Size() - 1, letter);
+ ClearFinal();
+ SetFinal(Size() - 1, true);
+ determined = false;
+ return *this;
+}
+
+Fsm& Fsm::Append(char c)
+{
+ Resize(Size() + 1);
+ ConnectFinal(Size() - 1, static_cast<unsigned char>(c));
+ ClearFinal();
+ SetFinal(Size() - 1, true);
+ determined = false;
+ return *this;
+}
+
+Fsm& Fsm::Append(const ystring& str)
+{
for (auto&& i : str)
Append(i);
- return *this;
-}
-
-Fsm& Fsm::AppendSpecial(Char c)
-{
- Resize(Size() + 1);
- ConnectFinal(Size() - 1, c);
- ClearFinal();
- SetFinal(Size() - 1, true);
- determined = false;
- return *this;
-}
-
+ return *this;
+}
+
+Fsm& Fsm::AppendSpecial(Char c)
+{
+ Resize(Size() + 1);
+ ConnectFinal(Size() - 1, c);
+ ClearFinal();
+ SetFinal(Size() - 1, true);
+ determined = false;
+ return *this;
+}
+
Fsm& Fsm::AppendStrings(const TVector<ystring>& strings)
-{
+{
for (auto&& i : strings)
if (i.empty())
- throw Error("None of strings passed to appendStrings() can be empty");
-
- Resize(Size() + 1);
- size_t end = Size() - 1;
-
- // A local transitions table: (oldstate, char) -> newstate.
- // Valid for all letters in given strings except final ones,
- // which are always connected to the end state.
-
- // NB: since each FSM contains at least one state,
- // state #0 cannot appear in LTRs. Thus we can use this
- // criteria to test whether a transition has been created or not.
- typedef ypair<size_t, char> Transition;
+ throw Error("None of strings passed to appendStrings() can be empty");
+
+ Resize(Size() + 1);
+ size_t end = Size() - 1;
+
+ // A local transitions table: (oldstate, char) -> newstate.
+ // Valid for all letters in given strings except final ones,
+ // which are always connected to the end state.
+
+ // NB: since each FSM contains at least one state,
+ // state #0 cannot appear in LTRs. Thus we can use this
+ // criteria to test whether a transition has been created or not.
+ typedef ypair<size_t, char> Transition;
TMap<char, size_t> startLtr;
TMap<Transition, size_t> ltr;
-
- // A presense of a transition in this set indicates that
- // a that transition already points somewhere (either to end
- // or somewhere else). Another attempt to create such transition
- // will clear `determined flag.
+
+ // A presense of a transition in this set indicates that
+ // a that transition already points somewhere (either to end
+ // or somewhere else). Another attempt to create such transition
+ // will clear `determined flag.
TSet<Transition> usedTransitions;
TSet<char> usedFirsts;
-
+
for (const auto& str : strings) {
- if (str.size() > 1) {
-
- // First letter: all previously final states are connected to the new state
- size_t& firstJump = startLtr[str[0]];
- if (!firstJump) {
- firstJump = Resize(Size() + 1);
- ConnectFinal(firstJump, static_cast<unsigned char>(str[0]));
- determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end());
- }
-
- // All other letters except last one
- size_t state = firstJump;
+ if (str.size() > 1) {
+
+ // First letter: all previously final states are connected to the new state
+ size_t& firstJump = startLtr[str[0]];
+ if (!firstJump) {
+ firstJump = Resize(Size() + 1);
+ ConnectFinal(firstJump, static_cast<unsigned char>(str[0]));
+ determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end());
+ }
+
+ // All other letters except last one
+ size_t state = firstJump;
for (auto cit = str.begin() + 1, cie = str.end() - 1; cit != cie; ++cit) {
- size_t& newState = ltr[ymake_pair(state, *cit)];
- if (!newState) {
- newState = Resize(Size() + 1);
- Connect(state, newState, static_cast<unsigned char>(*cit));
- determined = determined && (usedTransitions.find(ymake_pair(state, *cit)) != usedTransitions.end());
- }
- state = newState;
- }
-
- // The last letter: connect the current state to end
- unsigned char last = static_cast<unsigned char>(*(str.end() - 1));
- Connect(state, end, last);
- determined = determined && (usedTransitions.find(ymake_pair(state, last)) != usedTransitions.end());
-
- } else {
- // The single letter: connect all the previously final states to end
- ConnectFinal(end, static_cast<unsigned char>(str[0]));
- determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end());
- }
- }
-
- ClearFinal();
- SetFinal(end, true);
- return *this;
-}
-
-void Fsm::Import(const Fsm& rhs)
-{
-// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "Importing");
-// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Left-hand side ===\n" << *this);
-// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Right-hand side ===\n" << rhs);
-
- size_t oldsize = Resize(Size() + rhs.Size());
-
+ size_t& newState = ltr[ymake_pair(state, *cit)];
+ if (!newState) {
+ newState = Resize(Size() + 1);
+ Connect(state, newState, static_cast<unsigned char>(*cit));
+ determined = determined && (usedTransitions.find(ymake_pair(state, *cit)) != usedTransitions.end());
+ }
+ state = newState;
+ }
+
+ // The last letter: connect the current state to end
+ unsigned char last = static_cast<unsigned char>(*(str.end() - 1));
+ Connect(state, end, last);
+ determined = determined && (usedTransitions.find(ymake_pair(state, last)) != usedTransitions.end());
+
+ } else {
+ // The single letter: connect all the previously final states to end
+ ConnectFinal(end, static_cast<unsigned char>(str[0]));
+ determined = determined && (usedFirsts.find(str[0]) != usedFirsts.end());
+ }
+ }
+
+ ClearFinal();
+ SetFinal(end, true);
+ return *this;
+}
+
+void Fsm::Import(const Fsm& rhs)
+{
+// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "Importing");
+// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Left-hand side ===\n" << *this);
+// PIRE_IFDEBUG(LOG_DEBUG("fsm") << "=== Right-hand side ===\n" << rhs);
+
+ size_t oldsize = Resize(Size() + rhs.Size());
+
for (auto&& outer : m_transitions) {
for (auto&& letter : letters) {
auto targets = outer.find(letter.first);
if (targets == outer.end())
- continue;
+ continue;
for (auto&& character : letter.second.second)
if (character != letter.first)
outer.insert(ymake_pair(character, targets->second));
- }
- }
-
+ }
+ }
+
auto dest = m_transitions.begin() + oldsize;
for (auto outer = rhs.m_transitions.begin(), outerEnd = rhs.m_transitions.end(); outer != outerEnd; ++outer, ++dest) {
for (auto&& inner : *outer) {
TSet<size_t> targets;
std::transform(inner.second.begin(), inner.second.end(), std::inserter(targets, targets.begin()),
- std::bind2nd(std::plus<size_t>(), oldsize));
+ std::bind2nd(std::plus<size_t>(), oldsize));
dest->insert(ymake_pair(inner.first, targets));
- }
-
+ }
+
for (auto&& letter : rhs.letters) {
auto targets = dest->find(letter.first);
if (targets == dest->end())
- continue;
+ continue;
for (auto&& character : letter.second.second)
if (character != letter.first)
dest->insert(ymake_pair(character, targets->second));
- }
- }
-
- // Import outputs
+ }
+ }
+
+ // Import outputs
for (auto&& output : rhs.outputs) {
auto& dest = outputs[output.first + oldsize];
for (auto&& element : output.second)
dest.insert(ymake_pair(element.first + oldsize, element.second));
- }
-
- // Import tags
+ }
+
+ // Import tags
for (auto&& tag : rhs.tags)
tags.insert(ymake_pair(tag.first + oldsize, tag.second));
-
- letters = LettersTbl(LettersEquality(m_transitions));
-}
-
-void Fsm::Connect(size_t from, size_t to, Char c /* = Epsilon */)
-{
- m_transitions[from][c].insert(to);
- ClearHints();
-}
-
-void Fsm::ConnectFinal(size_t to, Char c /* = Epsilon */)
-{
+
+ letters = LettersTbl(LettersEquality(m_transitions));
+}
+
+void Fsm::Connect(size_t from, size_t to, Char c /* = Epsilon */)
+{
+ m_transitions[from][c].insert(to);
+ ClearHints();
+}
+
+void Fsm::ConnectFinal(size_t to, Char c /* = Epsilon */)
+{
for (auto&& final : m_final)
Connect(final, to, c);
- ClearHints();
-}
-
-void Fsm::Disconnect(size_t from, size_t to, Char c)
-{
+ ClearHints();
+}
+
+void Fsm::Disconnect(size_t from, size_t to, Char c)
+{
auto i = m_transitions[from].find(c);
- if (i != m_transitions[from].end())
- i->second.erase(to);
- ClearHints();
-}
-
-void Fsm::Disconnect(size_t from, size_t to)
-{
+ if (i != m_transitions[from].end())
+ i->second.erase(to);
+ ClearHints();
+}
+
+void Fsm::Disconnect(size_t from, size_t to)
+{
for (auto&& i : m_transitions[from])
i.second.erase(to);
- ClearHints();
-}
-
-unsigned long Fsm::Output(size_t from, size_t to) const
-{
+ ClearHints();
+}
+
+unsigned long Fsm::Output(size_t from, size_t to) const
+{
auto i = outputs.find(from);
- if (i == outputs.end())
- return 0;
+ if (i == outputs.end())
+ return 0;
auto j = i->second.find(to);
- if (j == i->second.end())
- return 0;
- else
- return j->second;
-}
-
-Fsm& Fsm::operator += (const Fsm& rhs)
-{
- size_t lhsSize = Size();
- Import(rhs);
-
- const TransitionRow& row = m_transitions[lhsSize + rhs.initial];
-
+ if (j == i->second.end())
+ return 0;
+ else
+ return j->second;
+}
+
+Fsm& Fsm::operator += (const Fsm& rhs)
+{
+ size_t lhsSize = Size();
+ Import(rhs);
+
+ const TransitionRow& row = m_transitions[lhsSize + rhs.initial];
+
for (auto&& outer : row)
for (auto&& inner : outer.second)
ConnectFinal(inner, outer.first);
-
+
auto out = rhs.outputs.find(rhs.initial);
- if (out != rhs.outputs.end())
+ if (out != rhs.outputs.end())
for (auto&& toAndOutput : out->second) {
for (auto&& final : m_final)
outputs[final].insert(ymake_pair(toAndOutput.first + lhsSize, toAndOutput.second));
- }
-
- ClearFinal();
+ }
+
+ ClearFinal();
for (auto&& letter : rhs.m_final)
SetFinal(letter + lhsSize, true);
- determined = false;
-
- ClearHints();
- PIRE_IFDEBUG(Cdbg << "=== After addition ===" << Endl << *this << Endl);
-
- return *this;
-}
-
-Fsm& Fsm::operator |= (const Fsm& rhs)
-{
- size_t lhsSize = Size();
-
- Import(rhs);
+ determined = false;
+
+ ClearHints();
+ PIRE_IFDEBUG(Cdbg << "=== After addition ===" << Endl << *this << Endl);
+
+ return *this;
+}
+
+Fsm& Fsm::operator |= (const Fsm& rhs)
+{
+ size_t lhsSize = Size();
+
+ Import(rhs);
for (auto&& final : rhs.m_final)
m_final.insert(final + lhsSize);
- if (!isAlternative && !rhs.isAlternative) {
- Resize(Size() + 1);
- Connect(Size() - 1, initial);
- Connect(Size() - 1, lhsSize + rhs.initial);
- initial = Size() - 1;
- } else if (isAlternative && !rhs.isAlternative) {
- Connect(initial, lhsSize + rhs.initial, Epsilon);
- } else if (!isAlternative && rhs.isAlternative) {
- Connect(lhsSize + rhs.initial, initial, Epsilon);
- initial = rhs.initial + lhsSize;
- } else if (isAlternative && rhs.isAlternative) {
- const StatesSet& tos = rhs.Destinations(rhs.initial, Epsilon);
+ if (!isAlternative && !rhs.isAlternative) {
+ Resize(Size() + 1);
+ Connect(Size() - 1, initial);
+ Connect(Size() - 1, lhsSize + rhs.initial);
+ initial = Size() - 1;
+ } else if (isAlternative && !rhs.isAlternative) {
+ Connect(initial, lhsSize + rhs.initial, Epsilon);
+ } else if (!isAlternative && rhs.isAlternative) {
+ Connect(lhsSize + rhs.initial, initial, Epsilon);
+ initial = rhs.initial + lhsSize;
+ } else if (isAlternative && rhs.isAlternative) {
+ const StatesSet& tos = rhs.Destinations(rhs.initial, Epsilon);
for (auto&& to : tos) {
Connect(initial, to + lhsSize, Epsilon);
Disconnect(rhs.initial + lhsSize, to + lhsSize, Epsilon);
- }
- }
-
- determined = false;
- isAlternative = true;
- return *this;
-}
-
-Fsm& Fsm::operator &= (const Fsm& rhs)
-{
- Fsm rhs2(rhs);
- Complement();
- rhs2.Complement();
- *this |= rhs2;
- Complement();
- return *this;
-}
-
-Fsm& Fsm::Iterate()
-{
- PIRE_IFDEBUG(Cdbg << "Iterating:" << Endl << *this << Endl);
- Resize(Size() + 2);
-
- Connect(Size() - 2, Size() - 1);
- Connect(Size() - 2, initial);
- ConnectFinal(initial);
- ConnectFinal(Size() - 1);
-
- ClearFinal();
- SetFinal(Size() - 1, true);
- initial = Size() - 2;
-
- determined = false;
-
- PIRE_IFDEBUG(Cdbg << "Iterated:" << Endl << *this << Endl);
- return *this;
-}
-
-Fsm& Fsm::Complement()
-{
- if (!Determine())
- throw Error("Regexp pattern too complicated");
- Minimize();
- Resize(Size() + 1);
- for (size_t i = 0; i < Size(); ++i)
- if (!IsFinal(i))
- Connect(i, Size() - 1);
- ClearFinal();
- SetFinal(Size() - 1, true);
- determined = false;
-
- return *this;
-}
-
+ }
+ }
+
+ determined = false;
+ isAlternative = true;
+ return *this;
+}
+
+Fsm& Fsm::operator &= (const Fsm& rhs)
+{
+ Fsm rhs2(rhs);
+ Complement();
+ rhs2.Complement();
+ *this |= rhs2;
+ Complement();
+ return *this;
+}
+
+Fsm& Fsm::Iterate()
+{
+ PIRE_IFDEBUG(Cdbg << "Iterating:" << Endl << *this << Endl);
+ Resize(Size() + 2);
+
+ Connect(Size() - 2, Size() - 1);
+ Connect(Size() - 2, initial);
+ ConnectFinal(initial);
+ ConnectFinal(Size() - 1);
+
+ ClearFinal();
+ SetFinal(Size() - 1, true);
+ initial = Size() - 2;
+
+ determined = false;
+
+ PIRE_IFDEBUG(Cdbg << "Iterated:" << Endl << *this << Endl);
+ return *this;
+}
+
+Fsm& Fsm::Complement()
+{
+ if (!Determine())
+ throw Error("Regexp pattern too complicated");
+ Minimize();
+ Resize(Size() + 1);
+ for (size_t i = 0; i < Size(); ++i)
+ if (!IsFinal(i))
+ Connect(i, Size() - 1);
+ ClearFinal();
+ SetFinal(Size() - 1, true);
+ determined = false;
+
+ return *this;
+}
+
Fsm Fsm::operator *(size_t count) const
+{
+ Fsm ret;
+ while (count--)
+ ret += *this;
+ return ret;
+}
+
+void Fsm::MakePrefix()
+{
+ RemoveDeadEnds();
+ for (size_t i = 0; i < Size(); ++i)
+ if (!m_transitions[i].empty())
+ m_final.insert(i);
+ ClearHints();
+}
+
+void Fsm::MakeSuffix()
+{
+ for (size_t i = 0; i < Size(); ++i)
+ if (i != initial)
+ Connect(initial, i);
+ ClearHints();
+}
+
+Fsm& Fsm::Reverse()
{
- Fsm ret;
- while (count--)
- ret += *this;
- return ret;
-}
+ Fsm out;
+ out.Resize(Size() + 1);
+ out.letters = Letters();
-void Fsm::MakePrefix()
-{
- RemoveDeadEnds();
- for (size_t i = 0; i < Size(); ++i)
- if (!m_transitions[i].empty())
- m_final.insert(i);
- ClearHints();
-}
-
-void Fsm::MakeSuffix()
-{
- for (size_t i = 0; i < Size(); ++i)
- if (i != initial)
- Connect(initial, i);
- ClearHints();
-}
-
-Fsm& Fsm::Reverse()
-{
- Fsm out;
- out.Resize(Size() + 1);
- out.letters = Letters();
-
- // Invert transitions
- for (size_t from = 0; from < Size(); ++from)
+ // Invert transitions
+ for (size_t from = 0; from < Size(); ++from)
for (auto&& i : m_transitions[from])
for (auto&& j : i.second)
out.Connect(j, from, i.first);
- // Invert initial and final states
+ // Invert initial and final states
out.m_final.clear();
- out.SetFinal(initial, true);
+ out.SetFinal(initial, true);
for (auto i : m_final)
out.Connect(Size(), i, Epsilon);
- out.SetInitial(Size());
+ out.SetInitial(Size());
- // Invert outputs
+ // Invert outputs
for (auto&& i : outputs)
for (auto&& j : i.second)
out.SetOutput(j.first, i.first, j.second);
- // Preserve tags (although thier semantics are usually heavily broken at this point)
- out.tags = tags;
-
- // Apply
- Swap(out);
- return *this;
-}
+ // Preserve tags (although thier semantics are usually heavily broken at this point)
+ out.tags = tags;
+ // Apply
+ Swap(out);
+ return *this;
+}
+
TSet<size_t> Fsm::DeadStates() const
-{
+{
TSet<size_t> res;
for (int invert = 0; invert <= 1; ++invert) {
@@ -649,26 +649,26 @@ TSet<size_t> Fsm::DeadStates() const
digraph.Connect(j - m_transitions.begin(), *toSt, 0);
}
}
- }
- }
-
+ }
+ }
+
TVector<bool> unchecked(Size(), true);
TVector<bool> useless(Size(), true);
TDeque<size_t> queue;
-
+
// Put all final (or initial) states into queue, marking them useful
for (size_t i = 0; i < Size(); ++i)
if ((invert && IsFinal(i)) || (!invert && Initial() == i)) {
useless[i] = false;
queue.push_back(i);
}
-
+
// Do the breadth-first search, marking all states
// from which already marked states are reachable
while (!queue.empty()) {
size_t to = queue.front();
queue.pop_front();
-
+
// All the states that are connected to this state in the transition matrix are useful
const StatesSet& connections = (digraph.m_transitions[to])[0];
for (auto&& fr : connections) {
@@ -677,310 +677,310 @@ TSet<size_t> Fsm::DeadStates() const
useless[fr] = false;
queue.push_back(fr);
}
- }
+ }
// Now we consider this state checked
unchecked[to] = false;
- }
-
+ }
+
for (size_t i = 0; i < Size(); ++i) {
if (useless[i]) {
res.insert(i);
}
- }
- }
-
- return res;
-}
-
-void Fsm::RemoveDeadEnds()
-{
- PIRE_IFDEBUG(Cdbg << "Removing dead ends on:" << Endl << *this << Endl);
-
+ }
+ }
+
+ return res;
+}
+
+void Fsm::RemoveDeadEnds()
+{
+ PIRE_IFDEBUG(Cdbg << "Removing dead ends on:" << Endl << *this << Endl);
+
TSet<size_t> dead = DeadStates();
- // Erase all useless states
+ // Erase all useless states
for (auto&& i : dead) {
PIRE_IFDEBUG(Cdbg << "Removing useless state " << i << Endl);
m_transitions[i].clear();
for (auto&& j : m_transitions)
for (auto&& k : j)
k.second.erase(i);
- }
- ClearHints();
-
- PIRE_IFDEBUG(Cdbg << "Result:" << Endl << *this << Endl);
-}
-
-// This method is one step of Epsilon-connection removal algorithm.
-// It merges transitions, tags, and outputs of 'to' state into 'from' state
-void Fsm::MergeEpsilonConnection(size_t from, size_t to)
-{
- unsigned long frEpsOutput = 0;
- bool fsEpsOutputExists = false;
-
- // Is there an output for 'from'->'to' transition?
- if (outputs.find(from) != outputs.end() && outputs[from].find(to) != outputs[from].end()) {
- frEpsOutput = outputs[from][to];
- fsEpsOutputExists = true;
- }
-
- // Merge transitions from 'to' state into transitions from 'from' state
+ }
+ ClearHints();
+
+ PIRE_IFDEBUG(Cdbg << "Result:" << Endl << *this << Endl);
+}
+
+// This method is one step of Epsilon-connection removal algorithm.
+// It merges transitions, tags, and outputs of 'to' state into 'from' state
+void Fsm::MergeEpsilonConnection(size_t from, size_t to)
+{
+ unsigned long frEpsOutput = 0;
+ bool fsEpsOutputExists = false;
+
+ // Is there an output for 'from'->'to' transition?
+ if (outputs.find(from) != outputs.end() && outputs[from].find(to) != outputs[from].end()) {
+ frEpsOutput = outputs[from][to];
+ fsEpsOutputExists = true;
+ }
+
+ // Merge transitions from 'to' state into transitions from 'from' state
for (auto&& transition : m_transitions[to]) {
TSet<size_t> connStates;
std::copy(transition.second.begin(), transition.second.end(),
std::inserter(m_transitions[from][transition.first], m_transitions[from][transition.first].end()));
-
- // If there is an output of the 'from'->'to' connection it has to be set to all
- // new connections that were merged from 'to' state
- if (fsEpsOutputExists) {
- // Compute the set of states that are reachable from 'to' state
+
+ // If there is an output of the 'from'->'to' connection it has to be set to all
+ // new connections that were merged from 'to' state
+ if (fsEpsOutputExists) {
+ // Compute the set of states that are reachable from 'to' state
std::copy(transition.second.begin(), transition.second.end(), std::inserter(connStates, connStates.end()));
-
- // For each of these states add an output equal to the Epsilon-connection output
+
+ // For each of these states add an output equal to the Epsilon-connection output
for (auto&& newConnSt : connStates) {
outputs[from][newConnSt] |= frEpsOutput;
- }
- }
- }
-
- // Mark 'from' state final if 'to' state is final
- if (IsFinal(to))
- SetFinal(from, true);
-
- // Combine tags
+ }
+ }
+ }
+
+ // Mark 'from' state final if 'to' state is final
+ if (IsFinal(to))
+ SetFinal(from, true);
+
+ // Combine tags
auto ti = tags.find(to);
- if (ti != tags.end())
- tags[from] |= ti->second;
-
- // Merge all 'to' into 'from' outputs:
- // outputs[from][i] |= (outputs[from][to] | outputs[to][i])
+ if (ti != tags.end())
+ tags[from] |= ti->second;
+
+ // Merge all 'to' into 'from' outputs:
+ // outputs[from][i] |= (outputs[from][to] | outputs[to][i])
auto toOit = outputs.find(to);
- if (toOit != outputs.end()) {
+ if (toOit != outputs.end()) {
for (auto&& output : toOit->second) {
outputs[from][output.first] |= (frEpsOutput | output.second);
- }
- }
-}
-
-// Assuming the epsilon transitions is possible from 'from' to 'thru',
-// finds all states which are Epsilon-reachable from 'thru' and connects
-// them directly to 'from' with Epsilon transition having proper output.
-// Updates inverse map of epsilon transitions as well.
+ }
+ }
+}
+
+// Assuming the epsilon transitions is possible from 'from' to 'thru',
+// finds all states which are Epsilon-reachable from 'thru' and connects
+// them directly to 'from' with Epsilon transition having proper output.
+// Updates inverse map of epsilon transitions as well.
void Fsm::ShortCutEpsilon(size_t from, size_t thru, TVector< TSet<size_t> >& inveps)
-{
- PIRE_IFDEBUG(Cdbg << "In Fsm::ShortCutEpsilon(" << from << ", " << thru << ")\n");
- const StatesSet& to = Destinations(thru, Epsilon);
- Outputs::iterator outIt = outputs.find(from);
- unsigned long fromThruOut = Output(from, thru);
+{
+ PIRE_IFDEBUG(Cdbg << "In Fsm::ShortCutEpsilon(" << from << ", " << thru << ")\n");
+ const StatesSet& to = Destinations(thru, Epsilon);
+ Outputs::iterator outIt = outputs.find(from);
+ unsigned long fromThruOut = Output(from, thru);
for (auto&& toElement : to) {
PIRE_IFDEBUG(Cdbg << "Epsilon connecting " << from << " --> " << thru << " --> " << toElement << "\n");
Connect(from, toElement, Epsilon);
inveps[toElement].insert(from);
- if (outIt != outputs.end())
+ if (outIt != outputs.end())
outIt->second[toElement] |= (fromThruOut | Output(thru, toElement));
}
-}
-
-// Removes all Epsilon-connections by iterating though states and merging each Epsilon-connection
-// effects from 'to' state into 'from' state
-void Fsm::RemoveEpsilons()
-{
- Unsparse();
-
- // Build inverse map of epsilon transitions
+}
+
+// Removes all Epsilon-connections by iterating though states and merging each Epsilon-connection
+// effects from 'to' state into 'from' state
+void Fsm::RemoveEpsilons()
+{
+ Unsparse();
+
+ // Build inverse map of epsilon transitions
TVector< TSet<size_t> > inveps(Size()); // We have to use TSet<> here since we want it sorted
- for (size_t from = 0; from != Size(); ++from) {
- const StatesSet& tos = Destinations(from, Epsilon);
+ for (size_t from = 0; from != Size(); ++from) {
+ const StatesSet& tos = Destinations(from, Epsilon);
for (auto&& to : tos)
inveps[to].insert(from);
- }
+ }
- // Make a transitive closure of all epsilon transitions (Floyd-Warshall algorithm)
- // (if there exists an epsilon-path between two states, epsilon-connect them directly)
- for (size_t thru = 0; thru != Size(); ++thru)
+ // Make a transitive closure of all epsilon transitions (Floyd-Warshall algorithm)
+ // (if there exists an epsilon-path between two states, epsilon-connect them directly)
+ for (size_t thru = 0; thru != Size(); ++thru)
for (auto&& from : inveps[thru])
- // inveps[thru] may alter during loop body, hence we cannot cache ivneps[thru].end()
+ // inveps[thru] may alter during loop body, hence we cannot cache ivneps[thru].end()
if (from != thru)
ShortCutEpsilon(from, thru, inveps);
- PIRE_IFDEBUG(Cdbg << "=== After epsilons shortcut\n" << *this << Endl);
+ PIRE_IFDEBUG(Cdbg << "=== After epsilons shortcut\n" << *this << Endl);
- // Iterate through all epsilon-connected state pairs, merging states together
- for (size_t from = 0; from != Size(); ++from) {
- const StatesSet& to = Destinations(from, Epsilon);
+ // Iterate through all epsilon-connected state pairs, merging states together
+ for (size_t from = 0; from != Size(); ++from) {
+ const StatesSet& to = Destinations(from, Epsilon);
for (auto&& toElement : to)
if (toElement != from)
MergeEpsilonConnection(from, toElement); // it's a NOP if to == from, so don't waste time
- }
+ }
- PIRE_IFDEBUG(Cdbg << "=== After epsilons merged\n" << *this << Endl);
+ PIRE_IFDEBUG(Cdbg << "=== After epsilons merged\n" << *this << Endl);
- // Drop all epsilon transitions
+ // Drop all epsilon transitions
for (auto&& i : m_transitions)
i.erase(Epsilon);
- Sparse();
- ClearHints();
-}
-
-bool Fsm::LettersEquality::operator()(Char a, Char b) const
-{
+ Sparse();
+ ClearHints();
+}
+
+bool Fsm::LettersEquality::operator()(Char a, Char b) const
+{
for (auto&& outer : *m_tbl) {
auto ia = outer.find(a);
auto ib = outer.find(b);
if (ia == outer.end() && ib == outer.end())
- continue;
+ continue;
else if (ia == outer.end() || ib == outer.end() || ia->second != ib->second) {
- return false;
- }
- }
- return true;
-}
-
+ return false;
+ }
+ }
+ return true;
+}
+
void Fsm::Sparse(bool needEpsilons /* = false */)
-{
- letters = LettersTbl(LettersEquality(m_transitions));
- for (unsigned letter = 0; letter < MaxChar; ++letter)
+{
+ letters = LettersTbl(LettersEquality(m_transitions));
+ for (unsigned letter = 0; letter < MaxChar; ++letter)
if (letter != Epsilon || needEpsilons)
- letters.Append(letter);
-
- m_sparsed = true;
- PIRE_IFDEBUG(Cdbg << "Letter classes = " << letters << Endl);
-}
-
-void Fsm::Unsparse()
-{
+ letters.Append(letter);
+
+ m_sparsed = true;
+ PIRE_IFDEBUG(Cdbg << "Letter classes = " << letters << Endl);
+}
+
+void Fsm::Unsparse()
+{
for (auto&& letter : letters)
for (auto&& i : m_transitions)
for (auto&& j : letter.second.second)
i[j] = i[letter.first];
- m_sparsed = false;
-}
-
-// Returns a set of 'terminal states', which are those of the final states,
-// from which a transition to themselves on any letter is possible.
+ m_sparsed = false;
+}
+
+// Returns a set of 'terminal states', which are those of the final states,
+// from which a transition to themselves on any letter is possible.
TSet<size_t> Fsm::TerminalStates() const
-{
+{
TSet<size_t> terminals;
for (auto&& final : m_final) {
- bool ok = true;
+ bool ok = true;
for (auto&& letter : letters) {
auto dests = m_transitions[final].find(letter.first);
ok = ok && (dests != m_transitions[final].end() && dests->second.find(final) != dests->second.end());
- }
- if (ok)
+ }
+ if (ok)
terminals.insert(final);
- }
- return terminals;
-}
-
-namespace Impl {
-class FsmDetermineTask {
-public:
+ }
+ return terminals;
+}
+
+namespace Impl {
+class FsmDetermineTask {
+public:
typedef TVector<size_t> State;
- typedef Fsm::LettersTbl LettersTbl;
+ typedef Fsm::LettersTbl LettersTbl;
typedef TMap<State, size_t> InvStates;
- FsmDetermineTask(const Fsm& fsm)
- : mFsm(fsm)
- , mTerminals(fsm.TerminalStates())
- {
- PIRE_IFDEBUG(Cdbg << "Terminal states: [" << Join(mTerminals.begin(), mTerminals.end(), ", ") << "]" << Endl);
- }
- const LettersTbl& Letters() const { return mFsm.letters; }
-
- State Initial() const { return State(1, mFsm.initial); }
- bool IsRequired(const State& state) const
- {
+ FsmDetermineTask(const Fsm& fsm)
+ : mFsm(fsm)
+ , mTerminals(fsm.TerminalStates())
+ {
+ PIRE_IFDEBUG(Cdbg << "Terminal states: [" << Join(mTerminals.begin(), mTerminals.end(), ", ") << "]" << Endl);
+ }
+ const LettersTbl& Letters() const { return mFsm.letters; }
+
+ State Initial() const { return State(1, mFsm.initial); }
+ bool IsRequired(const State& state) const
+ {
for (auto&& i : state)
if (mTerminals.find(i) != mTerminals.end())
- return false;
- return true;
- }
-
- State Next(const State& state, Char letter) const
- {
- State next;
- next.reserve(20);
+ return false;
+ return true;
+ }
+
+ State Next(const State& state, Char letter) const
+ {
+ State next;
+ next.reserve(20);
for (auto&& from : state) {
const auto& part = mFsm.Destinations(from, letter);
- std::copy(part.begin(), part.end(), std::back_inserter(next));
- }
-
- std::sort(next.begin(), next.end());
- next.erase(std::unique(next.begin(), next.end()), next.end());
- PIRE_IFDEBUG(Cdbg << "Returning transition [" << Join(state.begin(), state.end(), ", ") << "] --" << letter
- << "--> [" << Join(next.begin(), next.end(), ", ") << "]" << Endl);
- return next;
- }
+ std::copy(part.begin(), part.end(), std::back_inserter(next));
+ }
+
+ std::sort(next.begin(), next.end());
+ next.erase(std::unique(next.begin(), next.end()), next.end());
+ PIRE_IFDEBUG(Cdbg << "Returning transition [" << Join(state.begin(), state.end(), ", ") << "] --" << letter
+ << "--> [" << Join(next.begin(), next.end(), ", ") << "]" << Endl);
+ return next;
+ }
void AcceptStates(const TVector<State>& states)
- {
- mNewFsm.Resize(states.size());
- mNewFsm.initial = 0;
- mNewFsm.determined = true;
- mNewFsm.letters = Letters();
- mNewFsm.m_final.clear();
- for (size_t ns = 0; ns < states.size(); ++ns) {
- PIRE_IFDEBUG(Cdbg << "State " << ns << " = [" << Join(states[ns].begin(), states[ns].end(), ", ") << "]" << Endl);
+ {
+ mNewFsm.Resize(states.size());
+ mNewFsm.initial = 0;
+ mNewFsm.determined = true;
+ mNewFsm.letters = Letters();
+ mNewFsm.m_final.clear();
+ for (size_t ns = 0; ns < states.size(); ++ns) {
+ PIRE_IFDEBUG(Cdbg << "State " << ns << " = [" << Join(states[ns].begin(), states[ns].end(), ", ") << "]" << Endl);
for (auto&& j : states[ns]) {
- // If it was a terminal state, connect it to itself
+ // If it was a terminal state, connect it to itself
if (mTerminals.find(j) != mTerminals.end()) {
for (auto&& letter : Letters())
mNewFsm.Connect(ns, ns, letter.first);
- mNewTerminals.insert(ns);
+ mNewTerminals.insert(ns);
PIRE_IFDEBUG(Cdbg << "State " << ns << " becomes terminal because of old state " << j << Endl);
- }
- }
+ }
+ }
for (auto&& j : states[ns]) {
- // If any state containing in our one is marked final, mark the new state final as well
+ // If any state containing in our one is marked final, mark the new state final as well
if (mFsm.IsFinal(j)) {
PIRE_IFDEBUG(Cdbg << "State " << ns << " becomes final because of old state " << j << Endl);
- mNewFsm.SetFinal(ns, true);
- if (mFsm.tags.empty())
- // Weve got no tags and already know that the state is final,
- // hence weve done with this state and got nothing more to do.
- break;
- }
-
- // Bitwise OR all tags in states
+ mNewFsm.SetFinal(ns, true);
+ if (mFsm.tags.empty())
+ // Weve got no tags and already know that the state is final,
+ // hence weve done with this state and got nothing more to do.
+ break;
+ }
+
+ // Bitwise OR all tags in states
auto ti = mFsm.tags.find(j);
- if (ti != mFsm.tags.end()) {
+ if (ti != mFsm.tags.end()) {
PIRE_IFDEBUG(Cdbg << "State " << ns << " carries tag " << ti->second << " because of old state " << j << Endl);
- mNewFsm.tags[ns] |= ti->second;
- }
- }
- }
- // For each old state, prepare a list of new state it is contained in
+ mNewFsm.tags[ns] |= ti->second;
+ }
+ }
+ }
+ // For each old state, prepare a list of new state it is contained in
typedef TMap< size_t, TVector<size_t> > Old2New;
- Old2New old2new;
- for (size_t ns = 0; ns < states.size(); ++ns)
+ Old2New old2new;
+ for (size_t ns = 0; ns < states.size(); ++ns)
for (auto&& j : states[ns])
old2new[j].push_back(ns);
- // Copy all outputs
+ // Copy all outputs
for (auto&& i : mFsm.outputs) {
for (auto&& j : i.second) {
auto from = old2new.find(i.first);
auto to = old2new.find(j.first);
- if (from != old2new.end() && to != old2new.end()) {
+ if (from != old2new.end() && to != old2new.end()) {
for (auto&& k : from->second)
for (auto&& l : to->second)
mNewFsm.outputs[k][l] |= j.second;
- }
- }
- }
- PIRE_IFDEBUG(Cdbg << "New terminals = [" << Join(mNewTerminals.begin(), mNewTerminals.end(), ",") << "]" << Endl);
- }
-
- void Connect(size_t from, size_t to, Char letter)
- {
- PIRE_IFDEBUG(Cdbg << "Connecting " << from << " --" << letter << "--> " << to << Endl);
+ }
+ }
+ }
+ PIRE_IFDEBUG(Cdbg << "New terminals = [" << Join(mNewTerminals.begin(), mNewTerminals.end(), ",") << "]" << Endl);
+ }
+
+ void Connect(size_t from, size_t to, Char letter)
+ {
+ PIRE_IFDEBUG(Cdbg << "Connecting " << from << " --" << letter << "--> " << to << Endl);
Y_ASSERT(mNewTerminals.find(from) == mNewTerminals.end());
- mNewFsm.Connect(from, to, letter);
- }
- typedef bool Result;
+ mNewFsm.Connect(from, to, letter);
+ }
+ typedef bool Result;
Result Success() {
Fsm::Outputs oldOutputs;
@@ -1003,40 +1003,40 @@ public:
return true;
}
- Result Failure() { return false; }
+ Result Failure() { return false; }
- Fsm& Output() { return mNewFsm; }
-private:
- const Fsm& mFsm;
- Fsm mNewFsm;
+ Fsm& Output() { return mNewFsm; }
+private:
+ const Fsm& mFsm;
+ Fsm mNewFsm;
TSet<size_t> mTerminals;
TSet<size_t> mNewTerminals;
-};
-}
-
-bool Fsm::Determine(size_t maxsize /* = 0 */)
-{
- static const unsigned MaxSize = 200000;
- if (determined)
- return true;
-
- PIRE_IFDEBUG(Cdbg << "=== Initial ===" << Endl << *this << Endl);
-
- RemoveEpsilons();
- PIRE_IFDEBUG(Cdbg << "=== After all epsilons removed" << Endl << *this << Endl);
-
- Impl::FsmDetermineTask task(*this);
- if (Pire::Impl::Determine(task, maxsize ? maxsize : MaxSize)) {
- task.Output().Swap(*this);
- PIRE_IFDEBUG(Cdbg << "=== Determined ===" << Endl << *this << Endl);
- return true;
- } else
- return false;
-}
-
+};
+}
+
+bool Fsm::Determine(size_t maxsize /* = 0 */)
+{
+ static const unsigned MaxSize = 200000;
+ if (determined)
+ return true;
+
+ PIRE_IFDEBUG(Cdbg << "=== Initial ===" << Endl << *this << Endl);
+
+ RemoveEpsilons();
+ PIRE_IFDEBUG(Cdbg << "=== After all epsilons removed" << Endl << *this << Endl);
+
+ Impl::FsmDetermineTask task(*this);
+ if (Pire::Impl::Determine(task, maxsize ? maxsize : MaxSize)) {
+ task.Output().Swap(*this);
+ PIRE_IFDEBUG(Cdbg << "=== Determined ===" << Endl << *this << Endl);
+ return true;
+ } else
+ return false;
+}
+
namespace Impl {
class FsmMinimizeTask {
-public:
+public:
explicit FsmMinimizeTask(const Fsm& fsm)
: mFsm(fsm)
, reversedTransitions(fsm.Size())
@@ -1044,7 +1044,7 @@ public:
, Classes(0)
{
Y_ASSERT(mFsm.IsDetermined());
-
+
TMap<bool, size_t> FinalStateClassMap;
for (size_t state = 0; state < mFsm.Size(); ++state) {
@@ -1068,7 +1068,7 @@ public:
}
}
}
-
+
TVector<size_t>& GetStateClass() { return StateClass; }
size_t& GetClassesNumber() { return Classes; }
@@ -1080,22 +1080,22 @@ public:
bool IsDetermined() const {
return mFsm.IsDetermined();
}
-
+
size_t Size() const {
return mFsm.Size();
- }
-
+ }
+
const TVector<size_t>& Previous(size_t state, size_t letter) const {
return reversedTransitions[state][letter];
- }
-
+ }
+
void AcceptStates() {
mNewFsm.Resize(Classes);
mNewFsm.letters = mFsm.letters;
mNewFsm.determined = mFsm.determined;
mNewFsm.m_sparsed = mFsm.m_sparsed;
mNewFsm.SetFinal(0, false);
-
+
// Unite equality classes into new states
size_t fromIdx = 0;
for (auto from = mFsm.m_transitions.begin(), fromEnd = mFsm.m_transitions.end(); from != fromEnd; ++from, ++fromIdx) {
@@ -1109,36 +1109,36 @@ public:
mNewFsm.SetFinal(dest, true);
PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " becomes final because of old state " << fromIdx << Endl);
}
-
+
// Append tags
auto ti = mFsm.tags.find(fromIdx);
if (ti != mFsm.tags.end()) {
mNewFsm.tags[dest] |= ti->second;
PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " carries tag " << ti->second << " because of old state " << fromIdx << Endl);
}
- }
+ }
mNewFsm.initial = StateClass[mFsm.initial];
// Restore outputs
for (auto&& output : mFsm.outputs)
for (auto&& output2 : output.second)
mNewFsm.outputs[StateClass[output.first]].insert(ymake_pair(StateClass[output2.first], output2.second));
- }
-
+ }
+
typedef bool Result;
-
+
Result Success() {
return true;
}
-
+
Result Failure() {
return false;
}
-
+
Fsm& Output() {
return mNewFsm;
- }
-
+ }
+
private:
const Fsm& mFsm;
Fsm mNewFsm;
@@ -1147,89 +1147,89 @@ private:
size_t Classes;
};
}
-
+
void Fsm::Minimize()
{
// Minimization algorithm is only applicable to a determined FSM.
Y_ASSERT(determined);
-
+
Impl::FsmMinimizeTask task{*this};
if (Pire::Impl::Minimize(task)) {
task.Output().Swap(*this);
- }
-}
-
-Fsm& Fsm::Canonize(size_t maxSize /* = 0 */)
-{
- if (!IsDetermined()) {
+ }
+}
+
+Fsm& Fsm::Canonize(size_t maxSize /* = 0 */)
+{
+ if (!IsDetermined()) {
if (!Determine(maxSize))
- throw Error("regexp pattern too complicated");
- }
- Minimize();
- return *this;
-}
-
-void Fsm::PrependAnything()
-{
- size_t newstate = Size();
- Resize(Size() + 1);
- for (size_t letter = 0; letter < MaxChar; ++letter)
- Connect(newstate, newstate, letter);
-
- Connect(newstate, initial);
- initial = newstate;
-
- determined = false;
-}
-
-void Fsm::AppendAnything()
-{
- size_t newstate = Size();
- Resize(Size() + 1);
- for (size_t letter = 0; letter < MaxChar; ++letter)
- Connect(newstate, newstate, letter);
-
- ConnectFinal(newstate);
- ClearFinal();
- SetFinal(newstate, 1);
-
- determined = false;
-}
-
-Fsm& Fsm::Surround()
-{
- PrependAnything();
- AppendAnything();
- return *this;
-}
-
-void Fsm::Divert(size_t from, size_t to, size_t dest)
-{
- if (to == dest)
- return;
-
- // Assign the output
+ throw Error("regexp pattern too complicated");
+ }
+ Minimize();
+ return *this;
+}
+
+void Fsm::PrependAnything()
+{
+ size_t newstate = Size();
+ Resize(Size() + 1);
+ for (size_t letter = 0; letter < MaxChar; ++letter)
+ Connect(newstate, newstate, letter);
+
+ Connect(newstate, initial);
+ initial = newstate;
+
+ determined = false;
+}
+
+void Fsm::AppendAnything()
+{
+ size_t newstate = Size();
+ Resize(Size() + 1);
+ for (size_t letter = 0; letter < MaxChar; ++letter)
+ Connect(newstate, newstate, letter);
+
+ ConnectFinal(newstate);
+ ClearFinal();
+ SetFinal(newstate, 1);
+
+ determined = false;
+}
+
+Fsm& Fsm::Surround()
+{
+ PrependAnything();
+ AppendAnything();
+ return *this;
+}
+
+void Fsm::Divert(size_t from, size_t to, size_t dest)
+{
+ if (to == dest)
+ return;
+
+ // Assign the output
auto oi = outputs.find(from);
- if (oi != outputs.end()) {
+ if (oi != outputs.end()) {
auto oi2 = oi->second.find(to);
- if (oi2 != oi->second.end()) {
- unsigned long output = oi2->second;
- oi->second.erase(oi2);
- oi->second.insert(ymake_pair(dest, output));
- }
- }
-
- // Assign the transition
+ if (oi2 != oi->second.end()) {
+ unsigned long output = oi2->second;
+ oi->second.erase(oi2);
+ oi->second.insert(ymake_pair(dest, output));
+ }
+ }
+
+ // Assign the transition
for (auto&& i : m_transitions[from]) {
auto di = i.second.find(to);
if (di != i.second.end()) {
i.second.erase(di);
i.second.insert(dest);
- }
- }
-
- ClearHints();
-}
-
-
-}
+ }
+ }
+
+ ClearHints();
+}
+
+
+}
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 4dad06ca06..d25d1764e3 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -1,283 +1,283 @@
-/*
- * fsm.h -- the definition of the FSM class.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * fsm.h -- the definition of the FSM class.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_FSM_H
-#define PIRE_FSM_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_FSM_H
+#define PIRE_FSM_H
+
+
#include <contrib/libs/pire/pire/stub/stl.h>
-#include "partition.h"
-#include "defs.h"
-
-namespace Pire {
-
- namespace Impl {
- class FsmDetermineTask;
+#include "partition.h"
+#include "defs.h"
+
+namespace Pire {
+
+ namespace Impl {
+ class FsmDetermineTask;
class FsmMinimizeTask;
class HalfFinalDetermineTask;
- }
-
- /// A Flying Spaghetti Monster... no, just a Finite State Machine.
- class Fsm {
- public:
- typedef ybitset<MaxChar> Charset;
-
- Fsm();
- void Swap(Fsm& fsm);
-
- static Fsm MakeFalse();
-
- /// Current number of states
- size_t Size() const { return m_transitions.size(); }
-
- Fsm& Append(char c);
- Fsm& Append(const ystring& str);
- Fsm& AppendSpecial(Char c);
-
- /// Efficiently appends a union of passed strings to FSM.
- /// Used for ranges (e.g. [a-z]), character classes (e.g. \w, \d)
- /// and case-insensitive comparison of multibyte characters,
- /// when one string represents a lowercase variant of a character,
- /// while another string represents its uppercase variant.
+ }
+
+ /// A Flying Spaghetti Monster... no, just a Finite State Machine.
+ class Fsm {
+ public:
+ typedef ybitset<MaxChar> Charset;
+
+ Fsm();
+ void Swap(Fsm& fsm);
+
+ static Fsm MakeFalse();
+
+ /// Current number of states
+ size_t Size() const { return m_transitions.size(); }
+
+ Fsm& Append(char c);
+ Fsm& Append(const ystring& str);
+ Fsm& AppendSpecial(Char c);
+
+ /// Efficiently appends a union of passed strings to FSM.
+ /// Used for ranges (e.g. [a-z]), character classes (e.g. \w, \d)
+ /// and case-insensitive comparison of multibyte characters,
+ /// when one string represents a lowercase variant of a character,
+ /// while another string represents its uppercase variant.
Fsm& AppendStrings(const TVector<ystring>& strings);
-
- /// Appends a part matching a single byte (any).
- Fsm& AppendDot();
-
- /// Appends and prepends the FSM with the iterated dot (see above).
- Fsm& Surround(); // returns *this
- Fsm Surrounded() const { Fsm copy(*this); copy.Surround(); return copy; }
-
- Fsm& operator += (const Fsm& rhs); ///< Concatenation
- Fsm& operator |= (const Fsm& rhs); ///< Alternation
- Fsm& operator &= (const Fsm& rhs); ///< Conjunction
- Fsm& Iterate(); ///< Klene star
- Fsm& Complement(); ///< Complementation
- Fsm& operator *= (size_t count) { *this = *this * count; return *this; }
-
- Fsm operator + (const Fsm& rhs) const { Fsm a(*this); return a += rhs; }
- Fsm operator | (const Fsm& rhs) const { Fsm a(*this); return a |= rhs; }
- Fsm operator & (const Fsm& rhs) const { Fsm a(*this); return a &= rhs; }
- Fsm operator * () const { Fsm a(*this); return a.Iterate(); }
- Fsm operator ~ () const { Fsm a(*this); return a.Complement(); }
+
+ /// Appends a part matching a single byte (any).
+ Fsm& AppendDot();
+
+ /// Appends and prepends the FSM with the iterated dot (see above).
+ Fsm& Surround(); // returns *this
+ Fsm Surrounded() const { Fsm copy(*this); copy.Surround(); return copy; }
+
+ Fsm& operator += (const Fsm& rhs); ///< Concatenation
+ Fsm& operator |= (const Fsm& rhs); ///< Alternation
+ Fsm& operator &= (const Fsm& rhs); ///< Conjunction
+ Fsm& Iterate(); ///< Klene star
+ Fsm& Complement(); ///< Complementation
+ Fsm& operator *= (size_t count) { *this = *this * count; return *this; }
+
+ Fsm operator + (const Fsm& rhs) const { Fsm a(*this); return a += rhs; }
+ Fsm operator | (const Fsm& rhs) const { Fsm a(*this); return a |= rhs; }
+ Fsm operator & (const Fsm& rhs) const { Fsm a(*this); return a &= rhs; }
+ Fsm operator * () const { Fsm a(*this); return a.Iterate(); }
+ Fsm operator ~ () const { Fsm a(*this); return a.Complement(); }
Fsm operator * (size_t count) const;
-
- // === Raw FSM construction ===
-
- /// Connects two states with given transition
- void Connect(size_t from, size_t to, Char c = Epsilon);
-
- /// Removes given character from the specified transition.
- void Disconnect(size_t from, size_t to, Char c);
-
- /// Completely removes given transition
- void Disconnect(size_t from, size_t to);
-
+
+ // === Raw FSM construction ===
+
+ /// Connects two states with given transition
+ void Connect(size_t from, size_t to, Char c = Epsilon);
+
+ /// Removes given character from the specified transition.
+ void Disconnect(size_t from, size_t to, Char c);
+
+ /// Completely removes given transition
+ void Disconnect(size_t from, size_t to);
+
/// Creates an FSM which matches any prefix of any word current FSM matches.
- void MakePrefix();
-
- /// Creates an FSM which matches any suffix of any word current FSM matches.
- void MakeSuffix();
-
- /// Does the one way part of Surround().
- void PrependAnything();
- void AppendAnything();
-
- /// Creates an FSM which matches reversed strings matched by current FSM.
- Fsm& Reverse();
-
- /// Returns a set of states from which no final states are reachable
+ void MakePrefix();
+
+ /// Creates an FSM which matches any suffix of any word current FSM matches.
+ void MakeSuffix();
+
+ /// Does the one way part of Surround().
+ void PrependAnything();
+ void AppendAnything();
+
+ /// Creates an FSM which matches reversed strings matched by current FSM.
+ Fsm& Reverse();
+
+ /// Returns a set of states from which no final states are reachable
TSet<size_t> DeadStates() const;
-
- /// Removes all dead end paths from FSM
- void RemoveDeadEnds();
-
- /// Determines and minimizes the FSM if neccessary. Returns *this.
- Fsm& Canonize(size_t maxSize = 0);
-
- template<class Scanner>
+
+ /// Removes all dead end paths from FSM
+ void RemoveDeadEnds();
+
+ /// Determines and minimizes the FSM if neccessary. Returns *this.
+ Fsm& Canonize(size_t maxSize = 0);
+
+ template<class Scanner>
Scanner Compile(size_t distance = 0);
-
- void DumpState(yostream& s, size_t state) const;
+
+ void DumpState(yostream& s, size_t state) const;
void DumpTo(yostream& s, const ystring& name = "") const;
-
+
typedef TSet<size_t> StatesSet;
typedef TMap<size_t, StatesSet> TransitionRow;
typedef TVector<TransitionRow> TransitionTable;
-
- struct LettersEquality {
- LettersEquality(const Fsm::TransitionTable& tbl): m_tbl(&tbl) {}
- bool operator()(Char a, Char b) const;
- private:
- const Fsm::TransitionTable* m_tbl;
- };
-
+
+ struct LettersEquality {
+ LettersEquality(const Fsm::TransitionTable& tbl): m_tbl(&tbl) {}
+ bool operator()(Char a, Char b) const;
+ private:
+ const Fsm::TransitionTable* m_tbl;
+ };
+
typedef TSet<size_t> FinalTable;
- typedef Partition<Char, LettersEquality> LettersTbl;
-
-
- /*
- * A very low level FSM building interface.
- *
- * It is generally unwise to call any of these functions unless you are building
- * your own scanner, your own ecoding or exaclty know what you are doing.
- */
- unsigned long Tag(size_t state) const { Tags::const_iterator i = tags.find(state); return (i == tags.end()) ? 0 : i->second; }
- void SetTag(size_t state, unsigned long tag) { tags[state] = tag; }
-
- unsigned long Output(size_t from, size_t to) const;
- void SetOutput(size_t from, size_t to, unsigned long output) { outputs[from][to] = output; }
- void ClearOutputs() { outputs.clear(); }
-
- const FinalTable& Finals() const { return m_final; }
- bool IsFinal(size_t state) const { return m_final.find(state) != m_final.end(); }
- void SetFinal(size_t size, bool final);
- void ClearFinal() { m_final.clear(); }
-
- /// Removes all espilon transitions from the FSM. Does not change the FSMs language.
- void RemoveEpsilons();
-
- /// Resize FSM to newSize states. Returns old size.
- size_t Resize(size_t newSize);
-
- /// Imports foreign transition table
- void Import(const Fsm& rhs);
-
- /// Connects all final state with given state
- void ConnectFinal(size_t to, Char c = Epsilon);
-
- /// Diverts all transition between two given states to @p dest, preserving outputs
- void Divert(size_t from, size_t to, size_t dest);
-
- /// Checks whether two states are connected using given letter.
- bool Connected(size_t from, size_t to, Char c) const;
-
- /// Returns a set of letters on which a transition from the specified state exists
+ typedef Partition<Char, LettersEquality> LettersTbl;
+
+
+ /*
+ * A very low level FSM building interface.
+ *
+ * It is generally unwise to call any of these functions unless you are building
+ * your own scanner, your own ecoding or exaclty know what you are doing.
+ */
+ unsigned long Tag(size_t state) const { Tags::const_iterator i = tags.find(state); return (i == tags.end()) ? 0 : i->second; }
+ void SetTag(size_t state, unsigned long tag) { tags[state] = tag; }
+
+ unsigned long Output(size_t from, size_t to) const;
+ void SetOutput(size_t from, size_t to, unsigned long output) { outputs[from][to] = output; }
+ void ClearOutputs() { outputs.clear(); }
+
+ const FinalTable& Finals() const { return m_final; }
+ bool IsFinal(size_t state) const { return m_final.find(state) != m_final.end(); }
+ void SetFinal(size_t size, bool final);
+ void ClearFinal() { m_final.clear(); }
+
+ /// Removes all espilon transitions from the FSM. Does not change the FSMs language.
+ void RemoveEpsilons();
+
+ /// Resize FSM to newSize states. Returns old size.
+ size_t Resize(size_t newSize);
+
+ /// Imports foreign transition table
+ void Import(const Fsm& rhs);
+
+ /// Connects all final state with given state
+ void ConnectFinal(size_t to, Char c = Epsilon);
+
+ /// Diverts all transition between two given states to @p dest, preserving outputs
+ void Divert(size_t from, size_t to, size_t dest);
+
+ /// Checks whether two states are connected using given letter.
+ bool Connected(size_t from, size_t to, Char c) const;
+
+ /// Returns a set of letters on which a transition from the specified state exists
TSet<Char> OutgoingLetters(size_t state) const;
-
- /// Returns a set of states where a transition from the given state using the given letter is possible
- const StatesSet& Destinations(size_t from, Char letter) const;
-
- /// Checks whether two states are connected using any letter.
- bool Connected(size_t from, size_t to) const;
- size_t Initial() const { return initial; }
- void SetInitial(size_t init) { initial = init; }
-
- const LettersTbl& Letters() const { return letters; }
-
- /// Determines the FSM.
- /// Breaks FSM invariant of having a single final state, so high-level FSM building
- /// functions (i.e. Append(), operator+(), etc...) no longer can be applied to the FSM
- /// until the invariants have been manually restored.
- /// return value: successful?
- bool Determine(size_t maxsize = 0);
- bool IsDetermined() const { return determined; }
- void SetIsDetermined(bool det) { determined = det; }
-
- /// Minimizes amount of states in the regexp.
- /// Requires a determined FSM.
- void Minimize();
-
-
- /// Builds letters equivalence classes
+
+ /// Returns a set of states where a transition from the given state using the given letter is possible
+ const StatesSet& Destinations(size_t from, Char letter) const;
+
+ /// Checks whether two states are connected using any letter.
+ bool Connected(size_t from, size_t to) const;
+ size_t Initial() const { return initial; }
+ void SetInitial(size_t init) { initial = init; }
+
+ const LettersTbl& Letters() const { return letters; }
+
+ /// Determines the FSM.
+ /// Breaks FSM invariant of having a single final state, so high-level FSM building
+ /// functions (i.e. Append(), operator+(), etc...) no longer can be applied to the FSM
+ /// until the invariants have been manually restored.
+ /// return value: successful?
+ bool Determine(size_t maxsize = 0);
+ bool IsDetermined() const { return determined; }
+ void SetIsDetermined(bool det) { determined = det; }
+
+ /// Minimizes amount of states in the regexp.
+ /// Requires a determined FSM.
+ void Minimize();
+
+
+ /// Builds letters equivalence classes
void Sparse(bool needEpsilons = false);
-
- /// Unpacks all letters equivalence classs back into transitions table
- void Unsparse();
-
- private:
-
- /// Transitions table :: Q x V -> exp(Q)
- TransitionTable m_transitions;
-
- /// Initial state
- size_t initial;
-
- /// Final states.
- FinalTable m_final;
-
- LettersTbl letters;
-
- /// Does 'letters' make sense?
- bool m_sparsed;
-
- /// Is the FSM already determined?
- bool determined;
-
- /// Output
+
+ /// Unpacks all letters equivalence classs back into transitions table
+ void Unsparse();
+
+ private:
+
+ /// Transitions table :: Q x V -> exp(Q)
+ TransitionTable m_transitions;
+
+ /// Initial state
+ size_t initial;
+
+ /// Final states.
+ FinalTable m_final;
+
+ LettersTbl letters;
+
+ /// Does 'letters' make sense?
+ bool m_sparsed;
+
+ /// Is the FSM already determined?
+ bool determined;
+
+ /// Output
typedef TMap< size_t, TMap<size_t, unsigned long> > Outputs;
- Outputs outputs;
-
+ Outputs outputs;
+
typedef TMap<size_t, unsigned long> Tags;
- Tags tags;
-
- /// Heuristics hit: true iff this FSM is a union of two other FSMs
- bool isAlternative;
-
+ Tags tags;
+
+ /// Heuristics hit: true iff this FSM is a union of two other FSMs
+ bool isAlternative;
+
void ShortCutEpsilon(size_t from, size_t thru, TVector< TSet<size_t> >& inveps); ///< internal
- void MergeEpsilonConnection(size_t from, size_t to); ///< internal
-
+ void MergeEpsilonConnection(size_t from, size_t to); ///< internal
+
TSet<size_t> TerminalStates() const;
-
- Char Translate(Char c) const;
-
- void ClearHints() { isAlternative = false; }
-
- friend class Impl::FsmDetermineTask;
+
+ Char Translate(Char c) const;
+
+ void ClearHints() { isAlternative = false; }
+
+ friend class Impl::FsmDetermineTask;
friend class Impl::FsmMinimizeTask;
friend class Impl::HalfFinalDetermineTask;
- };
-
- template<class Scanner>
+ };
+
+ template<class Scanner>
void BuildScanner(const Fsm& fsm, Scanner& r)
- {
+ {
TSet<size_t> dead;
- if (Scanner::DeadFlag)
- dead = fsm.DeadStates();
-
- for (size_t state = 0; state < fsm.Size(); ++state)
- r.SetTag(state, typename Scanner::Tag(fsm.Tag(state)
- | (fsm.IsFinal(state) ? Scanner::FinalFlag : 0)
- | ((dead.find(state) != dead.end()) ? Scanner::DeadFlag : 0)));
-
- for (size_t from = 0; from != fsm.Size(); ++from)
- for (Fsm::LettersTbl::ConstIterator lit = fsm.Letters().Begin(), lie = fsm.Letters().End(); lit != lie; ++lit) {
- const Fsm::StatesSet& tos = fsm.Destinations(from, lit->first);
- for (Fsm::StatesSet::const_iterator to = tos.begin(), toEnd = tos.end(); to != toEnd; ++to)
- r.SetJump(from, lit->first, *to, r.RemapAction(fsm.Output(from, *to)));
- }
-
- r.FinishBuild();
- }
-
- template<class Scanner>
+ if (Scanner::DeadFlag)
+ dead = fsm.DeadStates();
+
+ for (size_t state = 0; state < fsm.Size(); ++state)
+ r.SetTag(state, typename Scanner::Tag(fsm.Tag(state)
+ | (fsm.IsFinal(state) ? Scanner::FinalFlag : 0)
+ | ((dead.find(state) != dead.end()) ? Scanner::DeadFlag : 0)));
+
+ for (size_t from = 0; from != fsm.Size(); ++from)
+ for (Fsm::LettersTbl::ConstIterator lit = fsm.Letters().Begin(), lie = fsm.Letters().End(); lit != lie; ++lit) {
+ const Fsm::StatesSet& tos = fsm.Destinations(from, lit->first);
+ for (Fsm::StatesSet::const_iterator to = tos.begin(), toEnd = tos.end(); to != toEnd; ++to)
+ r.SetJump(from, lit->first, *to, r.RemapAction(fsm.Output(from, *to)));
+ }
+
+ r.FinishBuild();
+ }
+
+ template<class Scanner>
inline Scanner Fsm::Compile(size_t distance)
- {
+ {
return Scanner(*this, distance);
- }
-
- yostream& operator << (yostream&, const Fsm&);
-}
-
-#endif
+ }
+
+ yostream& operator << (yostream&, const Fsm&);
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/fwd.h b/contrib/libs/pire/pire/fwd.h
index c2b5870b05..aa6eb6b051 100644
--- a/contrib/libs/pire/pire/fwd.h
+++ b/contrib/libs/pire/pire/fwd.h
@@ -1,42 +1,42 @@
-/*
- * fwd.h -- forward declarations of Pire classes
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * fwd.h -- forward declarations of Pire classes
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_FWD_H
-#define PIRE_FWD_H
-
-
-namespace Pire {
-
- class Scanner;
- class MultiScanner;
- class SlowScanner;
- class CapturingScanner;
- class CountingScanner;
-
- class Fsm;
-
- class Lexer;
- class Encoding;
-}
-
-#endif
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_FWD_H
+#define PIRE_FWD_H
+
+
+namespace Pire {
+
+ class Scanner;
+ class MultiScanner;
+ class SlowScanner;
+ class CapturingScanner;
+ class CountingScanner;
+
+ class Fsm;
+
+ class Lexer;
+ class Encoding;
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/glue.h b/contrib/libs/pire/pire/glue.h
index bac086f2f0..fb34c6cfa8 100644
--- a/contrib/libs/pire/pire/glue.h
+++ b/contrib/libs/pire/pire/glue.h
@@ -1,166 +1,166 @@
-/*
- * glue.h -- scanner agglutination task, which can be used as
- * a parameter to Determine().
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * glue.h -- scanner agglutination task, which can be used as
+ * a parameter to Determine().
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_GLUE_H
-#define PIRE_GLUE_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_GLUE_H
+#define PIRE_GLUE_H
+
+
#include <contrib/libs/pire/pire/stub/stl.h>
-#include "partition.h"
-
-namespace Pire {
-namespace Impl {
-
-template <class Scanner>
-class LettersEquality: public ybinary_function<Char, Char, bool> {
-public:
- LettersEquality(typename Scanner::Letter* lhs, typename Scanner::Letter* rhs): m_lhs(lhs), m_rhs(rhs) {}
-
- bool operator()(Char a, Char b) const
- {
- return m_lhs[a] == m_lhs[b] && m_rhs[a] == m_rhs[b];
- }
-
-private:
- typename Scanner::Letter* m_lhs;
- typename Scanner::Letter* m_rhs;
-};
-
-// This lookup table is used instead of std::map.
-// The key idea is to specify size which is a power of 2 in order to use >> and | instead of
-// divisions and remainders.
-// NB: it mimics limited std::map<> behaviour, hence stl-like method names and typedefs.
-template <size_t N, class State>
-class GluedStateLookupTable {
-public:
- static const size_t MaxSize = N;
- typedef ypair<State, State> key_type;
- typedef size_t mapped_type;
- typedef ypair<key_type, mapped_type> value_type;
- typedef value_type* iterator;
- typedef const value_type* const_iterator;
-
- GluedStateLookupTable()
- : mMap(new value_type[N])
- , mFilled(N, false)
- {}
-
+#include "partition.h"
+
+namespace Pire {
+namespace Impl {
+
+template <class Scanner>
+class LettersEquality: public ybinary_function<Char, Char, bool> {
+public:
+ LettersEquality(typename Scanner::Letter* lhs, typename Scanner::Letter* rhs): m_lhs(lhs), m_rhs(rhs) {}
+
+ bool operator()(Char a, Char b) const
+ {
+ return m_lhs[a] == m_lhs[b] && m_rhs[a] == m_rhs[b];
+ }
+
+private:
+ typename Scanner::Letter* m_lhs;
+ typename Scanner::Letter* m_rhs;
+};
+
+// This lookup table is used instead of std::map.
+// The key idea is to specify size which is a power of 2 in order to use >> and | instead of
+// divisions and remainders.
+// NB: it mimics limited std::map<> behaviour, hence stl-like method names and typedefs.
+template <size_t N, class State>
+class GluedStateLookupTable {
+public:
+ static const size_t MaxSize = N;
+ typedef ypair<State, State> key_type;
+ typedef size_t mapped_type;
+ typedef ypair<key_type, mapped_type> value_type;
+ typedef value_type* iterator;
+ typedef const value_type* const_iterator;
+
+ GluedStateLookupTable()
+ : mMap(new value_type[N])
+ , mFilled(N, false)
+ {}
+
~GluedStateLookupTable() = default;
-
- const_iterator end() const {
+
+ const_iterator end() const {
return mMap.Get() + MaxSize;
- }
- // Note that in fact mMap is sparsed and traditional [begin,end)
- // traversal is unavailable; hence no begin() method here.
- // end() is only valid for comparing with find() result.
- const_iterator find(const key_type& st) const {
- size_t ind = Search(st);
+ }
+ // Note that in fact mMap is sparsed and traditional [begin,end)
+ // traversal is unavailable; hence no begin() method here.
+ // end() is only valid for comparing with find() result.
+ const_iterator find(const key_type& st) const {
+ size_t ind = Search(st);
return mFilled[ind] ? (mMap.Get() + ind) : end();
- }
-
- ypair<iterator, bool> insert(const value_type& v) {
- size_t ind = Search(v.first);
- if (!mFilled[ind]) {
+ }
+
+ ypair<iterator, bool> insert(const value_type& v) {
+ size_t ind = Search(v.first);
+ if (!mFilled[ind]) {
mMap[ind] = v;
- mFilled[ind] = true;
+ mFilled[ind] = true;
return ymake_pair(mMap.Get() + ind, true);
- } else
+ } else
return ymake_pair(mMap.Get() + ind, false);
- }
-
-private:
- size_t Search(const key_type& st) const {
- size_t startInd = (Hash(st) % N);
- for (size_t ind = startInd; ind != (startInd + N - 1) % N; ind = (ind + 1) % N) {
- if (!mFilled[ind] || mMap[ind].first == st) {
- return ind;
- }
- }
+ }
+
+private:
+ size_t Search(const key_type& st) const {
+ size_t startInd = (Hash(st) % N);
+ for (size_t ind = startInd; ind != (startInd + N - 1) % N; ind = (ind + 1) % N) {
+ if (!mFilled[ind] || mMap[ind].first == st) {
+ return ind;
+ }
+ }
return (size_t)-1;
- }
-
- static size_t Hash(const key_type& st) {
- return size_t((st.first >> 2) ^ (st.second >> 4) ^ (st.second << 10));
- }
-
+ }
+
+ static size_t Hash(const key_type& st) {
+ return size_t((st.first >> 2) ^ (st.second >> 4) ^ (st.second << 10));
+ }
+
TArrayHolder<value_type> mMap;
TVector<bool> mFilled;
-
- // Noncopyable
- GluedStateLookupTable(const GluedStateLookupTable&);
- GluedStateLookupTable& operator = (const GluedStateLookupTable&);
-};
-
-template<class Scanner>
-class ScannerGlueCommon {
-public:
- typedef Partition< Char, Impl::LettersEquality<Scanner> > LettersTbl;
-
- typedef ypair<typename Scanner::InternalState, typename Scanner::InternalState> State;
- ScannerGlueCommon(const Scanner& lhs, const Scanner& rhs, const LettersTbl& letters)
- : m_lhs(lhs)
- , m_rhs(rhs)
- , m_letters(letters)
- {
- // Form a new letters partition
- for (unsigned ch = 0; ch < MaxChar; ++ch)
- if (ch != Epsilon)
- m_letters.Append(ch);
- }
-
- const LettersTbl& Letters() const { return m_letters; }
-
- const Scanner& Lhs() const { return m_lhs; }
- const Scanner& Rhs() const { return m_rhs; }
-
- State Initial() const { return State(Lhs().m.initial, Rhs().m.initial); }
-
- State Next(State state, Char letter) const
- {
- Lhs().Next(state.first, letter);
- Rhs().Next(state.second, letter);
- return state;
- }
-
- bool IsRequired(const State& /*state*/) const { return true; }
-
- typedef Scanner Result;
- const Scanner& Success() const { return *m_result; }
- Scanner Failure() const { return Scanner(); }
-
-protected:
- Scanner& Sc() { return *m_result; }
+
+ // Noncopyable
+ GluedStateLookupTable(const GluedStateLookupTable&);
+ GluedStateLookupTable& operator = (const GluedStateLookupTable&);
+};
+
+template<class Scanner>
+class ScannerGlueCommon {
+public:
+ typedef Partition< Char, Impl::LettersEquality<Scanner> > LettersTbl;
+
+ typedef ypair<typename Scanner::InternalState, typename Scanner::InternalState> State;
+ ScannerGlueCommon(const Scanner& lhs, const Scanner& rhs, const LettersTbl& letters)
+ : m_lhs(lhs)
+ , m_rhs(rhs)
+ , m_letters(letters)
+ {
+ // Form a new letters partition
+ for (unsigned ch = 0; ch < MaxChar; ++ch)
+ if (ch != Epsilon)
+ m_letters.Append(ch);
+ }
+
+ const LettersTbl& Letters() const { return m_letters; }
+
+ const Scanner& Lhs() const { return m_lhs; }
+ const Scanner& Rhs() const { return m_rhs; }
+
+ State Initial() const { return State(Lhs().m.initial, Rhs().m.initial); }
+
+ State Next(State state, Char letter) const
+ {
+ Lhs().Next(state.first, letter);
+ Rhs().Next(state.second, letter);
+ return state;
+ }
+
+ bool IsRequired(const State& /*state*/) const { return true; }
+
+ typedef Scanner Result;
+ const Scanner& Success() const { return *m_result; }
+ Scanner Failure() const { return Scanner(); }
+
+protected:
+ Scanner& Sc() { return *m_result; }
void SetSc(THolder<Scanner>&& sc) { m_result = std::move(sc); }
-
-private:
- const Scanner& m_lhs;
- const Scanner& m_rhs;
- LettersTbl m_letters;
+
+private:
+ const Scanner& m_lhs;
+ const Scanner& m_rhs;
+ LettersTbl m_letters;
THolder<Scanner> m_result;
-};
-
-}
-}
-
-#endif
+};
+
+}
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/inline.l b/contrib/libs/pire/pire/inline.l
index a4d2e1a836..67f6d80584 100644
--- a/contrib/libs/pire/pire/inline.l
+++ b/contrib/libs/pire/pire/inline.l
@@ -1,31 +1,31 @@
-%{ // -*- mode: c++ -*-
-
-/*
- * inline.lpp -- a tool for inlining Pire regexps into your C++ code
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#include <stdio.h>
-#include <vector>
-#include <string>
-#include <stdexcept>
+%{ // -*- mode: c++ -*-
+
+/*
+ * inline.lpp -- a tool for inlining Pire regexps into your C++ code
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#include <stdio.h>
+#include <vector>
+#include <string>
+#include <stdexcept>
#include <contrib/libs/pire/pire/stub/hacks.h>
#include <contrib/libs/pire/pire/stub/lexical_cast.h>
@@ -35,238 +35,238 @@
#include "pire.h"
-ystring filename = "";
-int line = 1;
+ystring filename = "";
+int line = 1;
TVector<ystring> args;
-
-#ifdef _WIN32
-#if _MCS_VER >= 1600
-static int isatty(int) { return 0; }
-#endif
-#endif
-
-class Die {
-public:
- Die() {
- Msg = filename.empty() ? "pire_inline" : (filename + ":" + ToString(line) + ":");
- }
-
-
- template<class T>
- Die& operator << (const T& t) {
- Msg += ToString(t);
- return *this;
- }
-
-
- ~Die() {
- fprintf(stderr, "%s\n", Msg.c_str());
- exit(1);
- }
-private:
- ystring Msg;
-};
-Die DieHelper() {
- return Die();
-}
-
-void putChar(char c) { putc(c, yyout); }
-void suppressChar(char) {}
-void eatComment(void (*action)(char));
-
-#define YY_FATAL_ERROR(msg) DieHelper() << msg
-%}
-%x Regexp
-%%
-
-
-<INITIAL>"/*" { eatComment(putChar); }
-<Regexp>"/*" { eatComment(suppressChar); }
-<INITIAL>"//".*\n { ++line; fprintf(yyout, "%s", yytext); }
-<Regexp>"//".*\n { ++line; }
-"\""([^\"]|\\.)*"\"" { fprintf(yyout, "%s", yytext); }
-\n { ++line; putc('\n', yyout); }
-
-
-<INITIAL>"PIRE_REGEXP"[:space:]*"(" { BEGIN(Regexp); args.clear(); args.push_back(ystring()); }
-<Regexp>"\""([^\"]|\\.)*"\"" {
- ystring& s = args.back();
- const char* p;
- for (p = yytext + 1; *p && p[1]; ++p) {
- if (*p == '\\') {
- ++p;
- if (!*p)
- Die() << "string ends with a backslash";
- else if (*p == '\'' || *p == '\"' || *p == '\\')
- s.push_back(*p);
- else if (*p == 'n')
- s.push_back('\n');
- else if (*p == 't')
- s.push_back('\t');
- else if (isdigit(*p)) {
- const char* beg = p;
- while (isdigit(*p))
- ++p;
- s.push_back(strtol(ystring(beg, p).c_str(), 0, 8));
- } else if (*p == 'x') {
- const char* beg = p;
- while (isdigit(*p) || (*p > 'a' && *p <= 'f') || (*p > 'A' && *p < 'F'))
- ++p;
- s.push_back(strtol(ystring(beg, p).c_str(), 0, 16));
- } else
- Die() << "unknown escape sequence (\\" << *p << ")";
- } else
- s.push_back(*p);
- }
- if (!*p)
- Die() << "string ends with a backslash";
-}
-<Regexp>[ \t] {}
-<Regexp>\n { ++line; }
-<Regexp>"," { args.push_back(ystring()); }
-<Regexp>")" {
-
- if (args.size() & 1 || args.empty())
- Die() << "Usage: PIRE_REGEXP(\"regexp1\", \"flags1\" [, \"regexp2\", \"flags2\" [,...] ])";
-
- bool first = true;
- Pire::Scanner sc;
- ystring pattern;
+
+#ifdef _WIN32
+#if _MCS_VER >= 1600
+static int isatty(int) { return 0; }
+#endif
+#endif
+
+class Die {
+public:
+ Die() {
+ Msg = filename.empty() ? "pire_inline" : (filename + ":" + ToString(line) + ":");
+ }
+
+
+ template<class T>
+ Die& operator << (const T& t) {
+ Msg += ToString(t);
+ return *this;
+ }
+
+
+ ~Die() {
+ fprintf(stderr, "%s\n", Msg.c_str());
+ exit(1);
+ }
+private:
+ ystring Msg;
+};
+Die DieHelper() {
+ return Die();
+}
+
+void putChar(char c) { putc(c, yyout); }
+void suppressChar(char) {}
+void eatComment(void (*action)(char));
+
+#define YY_FATAL_ERROR(msg) DieHelper() << msg
+%}
+%x Regexp
+%%
+
+
+<INITIAL>"/*" { eatComment(putChar); }
+<Regexp>"/*" { eatComment(suppressChar); }
+<INITIAL>"//".*\n { ++line; fprintf(yyout, "%s", yytext); }
+<Regexp>"//".*\n { ++line; }
+"\""([^\"]|\\.)*"\"" { fprintf(yyout, "%s", yytext); }
+\n { ++line; putc('\n', yyout); }
+
+
+<INITIAL>"PIRE_REGEXP"[:space:]*"(" { BEGIN(Regexp); args.clear(); args.push_back(ystring()); }
+<Regexp>"\""([^\"]|\\.)*"\"" {
+ ystring& s = args.back();
+ const char* p;
+ for (p = yytext + 1; *p && p[1]; ++p) {
+ if (*p == '\\') {
+ ++p;
+ if (!*p)
+ Die() << "string ends with a backslash";
+ else if (*p == '\'' || *p == '\"' || *p == '\\')
+ s.push_back(*p);
+ else if (*p == 'n')
+ s.push_back('\n');
+ else if (*p == 't')
+ s.push_back('\t');
+ else if (isdigit(*p)) {
+ const char* beg = p;
+ while (isdigit(*p))
+ ++p;
+ s.push_back(strtol(ystring(beg, p).c_str(), 0, 8));
+ } else if (*p == 'x') {
+ const char* beg = p;
+ while (isdigit(*p) || (*p > 'a' && *p <= 'f') || (*p > 'A' && *p < 'F'))
+ ++p;
+ s.push_back(strtol(ystring(beg, p).c_str(), 0, 16));
+ } else
+ Die() << "unknown escape sequence (\\" << *p << ")";
+ } else
+ s.push_back(*p);
+ }
+ if (!*p)
+ Die() << "string ends with a backslash";
+}
+<Regexp>[ \t] {}
+<Regexp>\n { ++line; }
+<Regexp>"," { args.push_back(ystring()); }
+<Regexp>")" {
+
+ if (args.size() & 1 || args.empty())
+ Die() << "Usage: PIRE_REGEXP(\"regexp1\", \"flags1\" [, \"regexp2\", \"flags2\" [,...] ])";
+
+ bool first = true;
+ Pire::Scanner sc;
+ ystring pattern;
for (auto i = args.begin(), ie = args.end(); i != ie; i += 2) {
-
- Pire::Lexer lexer(i->c_str(), i->c_str() + i->size());
- bool surround = false;
- bool greedy = false;
+
+ Pire::Lexer lexer(i->c_str(), i->c_str() + i->size());
+ bool surround = false;
+ bool greedy = false;
bool reverse = false;
- for (const char* option = (i+1)->c_str(); *option; ++option) {
- if (*option == 'i')
- lexer.AddFeature(Pire::Features::CaseInsensitive());
- else if (*option == 'u')
- lexer.SetEncoding(Pire::Encodings::Utf8());
- else if (*option == 's')
- surround = true;
- else if (*option == 'a')
- lexer.AddFeature(Pire::Features::AndNotSupport());
- else if (*option == 'g')
- greedy = true;
+ for (const char* option = (i+1)->c_str(); *option; ++option) {
+ if (*option == 'i')
+ lexer.AddFeature(Pire::Features::CaseInsensitive());
+ else if (*option == 'u')
+ lexer.SetEncoding(Pire::Encodings::Utf8());
+ else if (*option == 's')
+ surround = true;
+ else if (*option == 'a')
+ lexer.AddFeature(Pire::Features::AndNotSupport());
+ else if (*option == 'g')
+ greedy = true;
else if (*option == 'r')
reverse = true;
- else
- Die() << "unknown option " << *option << "";
- }
-
- Pire::Fsm fsm;
- try {
- fsm = lexer.Parse();
- }
- catch (std::exception& e) {
- Die() << "" << filename << ":" << line << ": " << e.what() << "";
- }
+ else
+ Die() << "unknown option " << *option << "";
+ }
+
+ Pire::Fsm fsm;
+ try {
+ fsm = lexer.Parse();
+ }
+ catch (std::exception& e) {
+ Die() << "" << filename << ":" << line << ": " << e.what() << "";
+ }
if (reverse)
fsm.Reverse();
- if (greedy && surround)
- Die() << "greedy and surround options are incompatible";
- if (greedy)
- fsm = ~fsm.Surrounded() + fsm;
- else if (surround)
- fsm.Surround();
-
- Pire::Scanner tsc(fsm);
- if (first) {
- pattern = *i;
- first = false;
- tsc.Swap(sc);
- } else {
- sc = Pire::Scanner::Glue(sc, tsc);
- pattern += " | ";
- pattern += *i;
- }
- }
-
- BufferOutput buf;
- AlignedOutput stream(&buf);
- Save(&stream, sc);
-
- fprintf(yyout, "Pire::MmappedScanner<Pire::Scanner>(PIRE_LITERAL( // %s \n \"", pattern.c_str());
- size_t pos = 5;
+ if (greedy && surround)
+ Die() << "greedy and surround options are incompatible";
+ if (greedy)
+ fsm = ~fsm.Surrounded() + fsm;
+ else if (surround)
+ fsm.Surround();
+
+ Pire::Scanner tsc(fsm);
+ if (first) {
+ pattern = *i;
+ first = false;
+ tsc.Swap(sc);
+ } else {
+ sc = Pire::Scanner::Glue(sc, tsc);
+ pattern += " | ";
+ pattern += *i;
+ }
+ }
+
+ BufferOutput buf;
+ AlignedOutput stream(&buf);
+ Save(&stream, sc);
+
+ fprintf(yyout, "Pire::MmappedScanner<Pire::Scanner>(PIRE_LITERAL( // %s \n \"", pattern.c_str());
+ size_t pos = 5;
for (auto i = buf.Buffer().Begin(), ie = buf.Buffer().End(); i != ie; ++i) {
- pos += fprintf(yyout, "\\x%02X", static_cast<unsigned char>(*i));
- if (pos >= 78) {
- fprintf(yyout, "\"\n \"");
- pos = 5;
- }
- }
- fprintf(yyout, "\"), %u)\n#line %d \"%s\"\n",
- (unsigned int) buf.Buffer().Size(), line, filename.c_str());
- BEGIN(INITIAL);
-}
-<INITIAL>. { putc(*yytext, yyout); }
-
-
-
-
-%%
-
-void eatComment(void (*action)(char))
-{
- int c;
- action('/'); action('*');
- for (;;) {
- while ((c = yyinput()) != EOF && c != '*') {
- if (c == '\n')
- ++line;
- action(c);
- }
- if (c == '*') {
- action(c);
- while ((c = yyinput()) == '*')
- action(c);
- if (c == '/') {
- action(c);
- break;
- }
- }
- if (c == EOF)
- Die() << "EOF in comment";
- }
-}
-
-int yywrap() { return 1; }
-
-
-int main(int argc, char** argv)
-{
- // Suppress warnings
- static_cast<void>(&yy_fatal_error);
- static_cast<void>(&yyunput);
-
-
- try {
- const char* outfile = 0;
- if (argc >= 3 && !strcmp(argv[1], "-o")) {
- outfile = argv[2];
- argv += 2, argc -= 2;
- }
- if (argc == 2)
- filename = ystring(argv[1]);
- else if (argc > 2)
- Die() << "usage: pire_inline [-o outfile] [infile]";
-
- yyin = stdin, yyout = stdout;
- if (outfile && (yyout = fopen(outfile, "w")) == NULL)
- Die() << "cannot open file " << outfile << " for writing";
- if (!filename.empty()) {
- if ((yyin = fopen(filename.c_str(), "r")) == NULL)
- Die() << "cannot open file " << filename.c_str() << "\n";
- } else
- filename = "(stdin)";
-
-
- yylex();
- return 0;
- }
- catch (std::exception& e) {
- fprintf(stderr, "%s\n", e.what());
- return 1;
- }
-}
+ pos += fprintf(yyout, "\\x%02X", static_cast<unsigned char>(*i));
+ if (pos >= 78) {
+ fprintf(yyout, "\"\n \"");
+ pos = 5;
+ }
+ }
+ fprintf(yyout, "\"), %u)\n#line %d \"%s\"\n",
+ (unsigned int) buf.Buffer().Size(), line, filename.c_str());
+ BEGIN(INITIAL);
+}
+<INITIAL>. { putc(*yytext, yyout); }
+
+
+
+
+%%
+
+void eatComment(void (*action)(char))
+{
+ int c;
+ action('/'); action('*');
+ for (;;) {
+ while ((c = yyinput()) != EOF && c != '*') {
+ if (c == '\n')
+ ++line;
+ action(c);
+ }
+ if (c == '*') {
+ action(c);
+ while ((c = yyinput()) == '*')
+ action(c);
+ if (c == '/') {
+ action(c);
+ break;
+ }
+ }
+ if (c == EOF)
+ Die() << "EOF in comment";
+ }
+}
+
+int yywrap() { return 1; }
+
+
+int main(int argc, char** argv)
+{
+ // Suppress warnings
+ static_cast<void>(&yy_fatal_error);
+ static_cast<void>(&yyunput);
+
+
+ try {
+ const char* outfile = 0;
+ if (argc >= 3 && !strcmp(argv[1], "-o")) {
+ outfile = argv[2];
+ argv += 2, argc -= 2;
+ }
+ if (argc == 2)
+ filename = ystring(argv[1]);
+ else if (argc > 2)
+ Die() << "usage: pire_inline [-o outfile] [infile]";
+
+ yyin = stdin, yyout = stdout;
+ if (outfile && (yyout = fopen(outfile, "w")) == NULL)
+ Die() << "cannot open file " << outfile << " for writing";
+ if (!filename.empty()) {
+ if ((yyin = fopen(filename.c_str(), "r")) == NULL)
+ Die() << "cannot open file " << filename.c_str() << "\n";
+ } else
+ filename = "(stdin)";
+
+
+ yylex();
+ return 0;
+ }
+ catch (std::exception& e) {
+ fprintf(stderr, "%s\n", e.what());
+ return 1;
+ }
+}
diff --git a/contrib/libs/pire/pire/partition.h b/contrib/libs/pire/pire/partition.h
index 85a9af8863..c41cf5c335 100644
--- a/contrib/libs/pire/pire/partition.h
+++ b/contrib/libs/pire/pire/partition.h
@@ -1,193 +1,193 @@
-/*
- * partition.h -- a disjoint set of pairwise equivalent items
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * partition.h -- a disjoint set of pairwise equivalent items
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_PARTITION_H
-#define PIRE_PARTITION_H
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_PARTITION_H
+#define PIRE_PARTITION_H
+
+
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/singleton.h>
-
-namespace Pire {
-
-/*
-* A class which forms a disjoint set of pairwise equivalent items,
-* depending on given equivalence relation.
-*/
-template<class T, class Eq>
-class Partition {
-private:
+
+namespace Pire {
+
+/*
+* A class which forms a disjoint set of pairwise equivalent items,
+* depending on given equivalence relation.
+*/
+template<class T, class Eq>
+class Partition {
+private:
typedef TMap< T, ypair< size_t, TVector<T> > > Set;
-
-public:
- Partition(const Eq& eq)
- : m_eq(eq)
- , m_maxidx(0)
- {
- }
-
- /// Appends a new item into partition, creating new equivalience class if neccessary.
- void Append(const T& t) {
- DoAppend(m_set, t);
- }
-
- typedef typename Set::const_iterator ConstIterator;
-
- ConstIterator Begin() const {
- return m_set.begin();
- }
+
+public:
+ Partition(const Eq& eq)
+ : m_eq(eq)
+ , m_maxidx(0)
+ {
+ }
+
+ /// Appends a new item into partition, creating new equivalience class if neccessary.
+ void Append(const T& t) {
+ DoAppend(m_set, t);
+ }
+
+ typedef typename Set::const_iterator ConstIterator;
+
+ ConstIterator Begin() const {
+ return m_set.begin();
+ }
ConstIterator begin() const {
return m_set.begin();
}
- ConstIterator End() const {
- return m_set.end();
- }
+ ConstIterator End() const {
+ return m_set.end();
+ }
ConstIterator end() const {
return m_set.end();
}
- size_t Size() const {
- return m_set.size();
- }
- bool Empty() const {
- return m_set.empty();
- }
-
- /// Returns an item equal to @p t. It is guaranteed that:
- /// - representative(a) equals representative(b) iff a is equivalent to b;
- /// - representative(a) is equivalent to a.
- const T& Representative(const T& t) const
- {
+ size_t Size() const {
+ return m_set.size();
+ }
+ bool Empty() const {
+ return m_set.empty();
+ }
+
+ /// Returns an item equal to @p t. It is guaranteed that:
+ /// - representative(a) equals representative(b) iff a is equivalent to b;
+ /// - representative(a) is equivalent to a.
+ const T& Representative(const T& t) const
+ {
auto it = m_inv.find(t);
- if (it != m_inv.end())
- return it->second;
- else
- return DefaultValue<T>();
- }
-
- bool Contains(const T& t) const
- {
- return m_inv.find(t) != m_inv.end();
- }
-
- /// Returns an index of set containing @p t. It is guaranteed that:
- /// - index(a) equals index(b) iff a is equivalent to b;
- /// - 0 <= index(a) < size().
- size_t Index(const T& t) const
- {
+ if (it != m_inv.end())
+ return it->second;
+ else
+ return DefaultValue<T>();
+ }
+
+ bool Contains(const T& t) const
+ {
+ return m_inv.find(t) != m_inv.end();
+ }
+
+ /// Returns an index of set containing @p t. It is guaranteed that:
+ /// - index(a) equals index(b) iff a is equivalent to b;
+ /// - 0 <= index(a) < size().
+ size_t Index(const T& t) const
+ {
auto it = m_inv.find(t);
- if (it == m_inv.end())
- throw Error("Partition::index(): attempted to obtain an index of nonexistent item");
+ if (it == m_inv.end())
+ throw Error("Partition::index(): attempted to obtain an index of nonexistent item");
auto it2 = m_set.find(it->second);
Y_ASSERT(it2 != m_set.end());
- return it2->second.first;
- }
- /// Returns the whole equivalence class of @p t (i.e. item @p i
- /// is returned iff representative(i) == representative(t)).
+ return it2->second.first;
+ }
+ /// Returns the whole equivalence class of @p t (i.e. item @p i
+ /// is returned iff representative(i) == representative(t)).
const TVector<T>& Klass(const T& t) const
- {
+ {
auto it = m_inv.find(t);
- if (it == m_inv.end())
- throw Error("Partition::index(): attempted to obtain an index of nonexistent item");
+ if (it == m_inv.end())
+ throw Error("Partition::index(): attempted to obtain an index of nonexistent item");
auto it2 = m_set.find(it->second);
Y_ASSERT(it2 != m_set.end());
- return it2->second.second;
- }
-
- bool operator == (const Partition& rhs) const { return m_set == rhs.m_set; }
- bool operator != (const Partition& rhs) const { return !(*this == rhs); }
-
- /// Splits the current sets into smaller ones, using given equivalence relation.
- /// Requires given relation imply previous one (set either in ctor or
- /// in preceeding calls to split()), but performs faster.
- /// Replaces previous relation with given one.
- void Split(const Eq& eq)
- {
- m_eq = eq;
-
+ return it2->second.second;
+ }
+
+ bool operator == (const Partition& rhs) const { return m_set == rhs.m_set; }
+ bool operator != (const Partition& rhs) const { return !(*this == rhs); }
+
+ /// Splits the current sets into smaller ones, using given equivalence relation.
+ /// Requires given relation imply previous one (set either in ctor or
+ /// in preceeding calls to split()), but performs faster.
+ /// Replaces previous relation with given one.
+ void Split(const Eq& eq)
+ {
+ m_eq = eq;
+
for (auto&& element : m_set)
if (element.second.second.size() > 1) {
TVector<T>& v = element.second.second;
auto bound = std::partition(v.begin(), v.end(), std::bind2nd(m_eq, v[0]));
- if (bound == v.end())
- continue;
-
- Set delta;
+ if (bound == v.end())
+ continue;
+
+ Set delta;
for (auto it = bound, ie = v.end(); it != ie; ++it)
- DoAppend(delta, *it);
-
- v.erase(bound, v.end());
- m_set.insert(delta.begin(), delta.end());
- }
- }
-
-private:
- Eq m_eq;
- Set m_set;
+ DoAppend(delta, *it);
+
+ v.erase(bound, v.end());
+ m_set.insert(delta.begin(), delta.end());
+ }
+ }
+
+private:
+ Eq m_eq;
+ Set m_set;
TMap<T, T> m_inv;
- size_t m_maxidx;
-
- void DoAppend(Set& set, const T& t)
- {
+ size_t m_maxidx;
+
+ void DoAppend(Set& set, const T& t)
+ {
auto it = set.begin();
auto end = set.end();
- for (; it != end; ++it)
- if (m_eq(it->first, t)) {
- it->second.second.push_back(t);
- m_inv[t] = it->first;
- break;
- }
-
- if (it == end) {
- // Begin new set
+ for (; it != end; ++it)
+ if (m_eq(it->first, t)) {
+ it->second.second.push_back(t);
+ m_inv[t] = it->first;
+ break;
+ }
+
+ if (it == end) {
+ // Begin new set
TVector<T> v(1, t);
- set.insert(ymake_pair(t, ymake_pair(m_maxidx++, v)));
- m_inv[t] = t;
- }
- }
-};
-
-// Mainly for debugging
-template<class T, class Eq>
-yostream& operator << (yostream& stream, const Partition<T, Eq>& partition)
-{
- stream << "Partition {\n";
+ set.insert(ymake_pair(t, ymake_pair(m_maxidx++, v)));
+ m_inv[t] = t;
+ }
+ }
+};
+
+// Mainly for debugging
+template<class T, class Eq>
+yostream& operator << (yostream& stream, const Partition<T, Eq>& partition)
+{
+ stream << "Partition {\n";
for (auto&& partitionElement : partition) {
stream << " Class " << partitionElement.second.first << " \"" << partitionElement.first << "\" { ";
- bool first = false;
+ bool first = false;
for (auto&& element : partitionElement.second.second) {
- if (first)
- stream << ", ";
- else
- first = true;
+ if (first)
+ stream << ", ";
+ else
+ first = true;
stream << element;
- }
- stream << " }\n";
- }
- stream << "}";
- return stream;
-}
-
-}
-
-
-#endif
+ }
+ stream << " }\n";
+ }
+ stream << "}";
+ return stream;
+}
+
+}
+
+
+#endif
diff --git a/contrib/libs/pire/pire/pire.h b/contrib/libs/pire/pire/pire.h
index 12eb84ccb6..d4d3acd92d 100644
--- a/contrib/libs/pire/pire/pire.h
+++ b/contrib/libs/pire/pire/pire.h
@@ -1,38 +1,38 @@
-/*
- * pire.h -- a single include file for end-users
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * pire.h -- a single include file for end-users
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_PIRE_H
-#define PIRE_PIRE_H
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_PIRE_H
+#define PIRE_PIRE_H
+
#include <contrib/libs/pire/pire/scanners/multi.h>
#include <contrib/libs/pire/pire/scanners/half_final.h>
#include <contrib/libs/pire/pire/scanners/simple.h>
#include <contrib/libs/pire/pire/scanners/slow.h>
#include <contrib/libs/pire/pire/scanners/pair.h>
-
-#include "re_lexer.h"
-#include "fsm.h"
-#include "encoding.h"
-#include "run.h"
-
-#endif
+
+#include "re_lexer.h"
+#include "fsm.h"
+#include "encoding.h"
+#include "run.h"
+
+#endif
diff --git a/contrib/libs/pire/pire/platform.h b/contrib/libs/pire/pire/platform.h
index 54ded6b387..c0504b7ce3 100644
--- a/contrib/libs/pire/pire/platform.h
+++ b/contrib/libs/pire/pire/platform.h
@@ -1,47 +1,47 @@
-/*
- * platform.h -- hardware and OS specific stuff
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#ifndef PIRE_PLATFORM_H_INCLUDED
-#define PIRE_PLATFORM_H_INCLUDED
-
+/*
+ * platform.h -- hardware and OS specific stuff
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#ifndef PIRE_PLATFORM_H_INCLUDED
+#define PIRE_PLATFORM_H_INCLUDED
+
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/static_assert.h>
-
+
#ifndef PIRE_FORCED_INLINE
-#ifdef __GNUC__
+#ifdef __GNUC__
#define PIRE_FORCED_INLINE inline __attribute__((__always_inline__))
-#elif _MSC_VER
+#elif _MSC_VER
#define PIRE_FORCED_INLINE __forceinline
-#else
+#else
#define PIRE_FORCED_INLINE inline
-#endif
-#endif
-
-#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
-#define PIRE_HOT_FUNCTION __attribute__ ((hot))
-#else
-#define PIRE_HOT_FUNCTION
-#endif
-
+#endif
+#endif
+
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+#define PIRE_HOT_FUNCTION __attribute__ ((hot))
+#else
+#define PIRE_HOT_FUNCTION
+#endif
+
#ifndef PIRE_LIKELY
#ifdef __GNUC__
#define PIRE_LIKELY(x) (__builtin_expect((x), 1))
@@ -58,27 +58,27 @@
#endif
#endif
-#ifdef _MSC_VER
-#include <stdio.h>
-#include <stdarg.h>
-
-namespace Pire {
-
-#if _MCS_VER >= 1600
-#ifdef _WIN64
-typedef i64 ssize_t;
-#else
-typedef i32 ssize_t;
-#endif
-#endif
-
-inline int snprintf(char *str, size_t size, const char *format, ...)
-{
+#ifdef _MSC_VER
+#include <stdio.h>
+#include <stdarg.h>
+
+namespace Pire {
+
+#if _MCS_VER >= 1600
+#ifdef _WIN64
+typedef i64 ssize_t;
+#else
+typedef i32 ssize_t;
+#endif
+#endif
+
+inline int snprintf(char *str, size_t size, const char *format, ...)
+{
va_list argptr;
va_start(argptr, format);
int i = _vsnprintf(str, size-1, format, argptr);
va_end(argptr);
-
+
// A workaround for some bug
if (i < 0) {
str[size - 1] = '\x00';
@@ -87,14 +87,14 @@ inline int snprintf(char *str, size_t size, const char *format, ...)
str[i] = '\x00';
}
return i;
-}
-
-}
-#endif
-
-namespace Pire {
-namespace Impl {
-
+}
+
+}
+#endif
+
+namespace Pire {
+namespace Impl {
+
// A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings.
template<unsigned Pos, unsigned char Byte>
struct DoGenerateConst {
@@ -112,11 +112,11 @@ struct GenerateConst {
};
-// Common implementation of mask comparison logic suitable for
-// any instruction set
-struct BasicInstructionSet {
+// Common implementation of mask comparison logic suitable for
+// any instruction set
+struct BasicInstructionSet {
typedef size_t Vector;
-
+
// Check bytes in the chunk against bytes in the mask
static inline Vector CheckBytes(Vector mask, Vector chunk)
{
@@ -125,66 +125,66 @@ struct BasicInstructionSet {
size_t mc = chunk ^ mask;
return ((mc - mask0x01) & ~mc & mask0x80);
}
-
+
static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); }
-
+
static inline bool IsAnySet(Vector mask) { return (mask != 0); }
-};
-
-}}
-
-#if defined(__SSE2__)
-#include <emmintrin.h>
-
-namespace Pire {
-namespace Impl {
-
-// SSE2-optimized mask comparison logic
-struct AvailSSE2 {
+};
+
+}}
+
+#if defined(__SSE2__)
+#include <emmintrin.h>
+
+namespace Pire {
+namespace Impl {
+
+// SSE2-optimized mask comparison logic
+struct AvailSSE2 {
typedef __m128i Vector;
-
+
static inline Vector CheckBytes(Vector mask, Vector chunk)
{
return _mm_cmpeq_epi8(mask, chunk);
}
-
+
static inline Vector Or(Vector mask1, Vector mask2)
{
return _mm_or_si128(mask1, mask2);
}
-
+
static inline bool IsAnySet(Vector mask)
{
return _mm_movemask_epi8(mask);
}
-};
-
-typedef AvailSSE2 AvailInstructionSet;
-
-inline AvailSSE2::Vector ToLittleEndian(AvailSSE2::Vector x) { return x; }
-
-}}
-
-#elif defined(__MMX__)
-#include <mmintrin.h>
-
-namespace Pire {
-namespace Impl {
-
-// MMX-optimized mask comparison logic
-struct AvailMMX {
+};
+
+typedef AvailSSE2 AvailInstructionSet;
+
+inline AvailSSE2::Vector ToLittleEndian(AvailSSE2::Vector x) { return x; }
+
+}}
+
+#elif defined(__MMX__)
+#include <mmintrin.h>
+
+namespace Pire {
+namespace Impl {
+
+// MMX-optimized mask comparison logic
+struct AvailMMX {
typedef __m64 Vector;
-
+
static inline Vector CheckBytes(Vector mask, Vector chunk)
{
return _mm_cmpeq_pi8(mask, chunk);
}
-
+
static inline Vector Or(Vector mask1, Vector mask2)
{
return _mm_or_si64(mask1, mask2);
}
-
+
static inline bool IsAnySet(Vector mask)
{
union {
@@ -194,68 +194,68 @@ struct AvailMMX {
mmxMask = mask;
return ui64Mask;
}
-};
-
-typedef AvailMMX AvailInstructionSet;
-
-inline AvailMMX::Vector ToLittleEndian(AvailMMX::Vector x) { return x; }
-
-}}
-
-#else // no SSE and MMX
-
-namespace Pire {
-namespace Impl {
-
-typedef BasicInstructionSet AvailInstructionSet;
-
-}}
-
-#endif
-
-namespace Pire {
-namespace Impl {
-
-typedef AvailInstructionSet::Vector Word;
-
-inline Word CheckBytes(Word mask, Word chunk) { return AvailInstructionSet::CheckBytes(mask, chunk); }
-
-inline Word Or(Word mask1, Word mask2) { return AvailInstructionSet::Or(mask1, mask2); }
-
-inline bool IsAnySet(Word mask) { return AvailInstructionSet::IsAnySet(mask); }
-
-// MaxSizeWord type is largest integer type supported by the plaform including
-// all possible SSE extensions that are are known for this platform (even if these
-// extensions are not available at compile time)
-// It is used for alignments and save/load data structures to produce data format
-// compatible between all platforms with the same endianness and pointer size
-template <size_t Size> struct MaxWordSizeHelper;
-
-// Maximum size of SSE register is 128 bit on x86 and x86_64
-template <>
-struct MaxWordSizeHelper<16> {
+};
+
+typedef AvailMMX AvailInstructionSet;
+
+inline AvailMMX::Vector ToLittleEndian(AvailMMX::Vector x) { return x; }
+
+}}
+
+#else // no SSE and MMX
+
+namespace Pire {
+namespace Impl {
+
+typedef BasicInstructionSet AvailInstructionSet;
+
+}}
+
+#endif
+
+namespace Pire {
+namespace Impl {
+
+typedef AvailInstructionSet::Vector Word;
+
+inline Word CheckBytes(Word mask, Word chunk) { return AvailInstructionSet::CheckBytes(mask, chunk); }
+
+inline Word Or(Word mask1, Word mask2) { return AvailInstructionSet::Or(mask1, mask2); }
+
+inline bool IsAnySet(Word mask) { return AvailInstructionSet::IsAnySet(mask); }
+
+// MaxSizeWord type is largest integer type supported by the plaform including
+// all possible SSE extensions that are are known for this platform (even if these
+// extensions are not available at compile time)
+// It is used for alignments and save/load data structures to produce data format
+// compatible between all platforms with the same endianness and pointer size
+template <size_t Size> struct MaxWordSizeHelper;
+
+// Maximum size of SSE register is 128 bit on x86 and x86_64
+template <>
+struct MaxWordSizeHelper<16> {
struct MaxSizeWord {
char val[16];
};
-};
-
-typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord;
-
-// MaxSizeWord size should be a multiple of size_t size and a multipe of Word size
-PIRE_STATIC_ASSERT(
+};
+
+typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord;
+
+// MaxSizeWord size should be a multiple of size_t size and a multipe of Word size
+PIRE_STATIC_ASSERT(
(sizeof(MaxSizeWord) % sizeof(size_t) == 0) &&
(sizeof(MaxSizeWord) % sizeof(Word) == 0));
-
-inline size_t FillSizeT(char c)
-{
+
+inline size_t FillSizeT(char c)
+{
size_t w = c;
w &= 0x0ff;
for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1)
w = (w << i) | w;
return w;
-}
-
-}}
-
-#endif
-
+}
+
+}}
+
+#endif
+
diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp
index 132fbeb039..c2258dd759 100644
--- a/contrib/libs/pire/pire/re_lexer.cpp
+++ b/contrib/libs/pire/pire/re_lexer.cpp
@@ -1,28 +1,28 @@
-/*
- * re_lexer.cpp -- implementation of Lexer class
+/*
+ * re_lexer.cpp -- implementation of Lexer class
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <ctype.h>
-#include <stdexcept>
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <ctype.h>
+#include <stdexcept>
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/utf8.h>
@@ -32,24 +32,24 @@
#include "re_lexer.h"
#include "re_parser.h"
#include "read_unicode.h"
+
-
-namespace Pire {
-
-namespace Impl {
+namespace Pire {
+
+namespace Impl {
int yre_parse(Pire::Lexer& lexer);
-}
-
-Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); }
-Term Term::Repetition(int lower, int upper) { return Term(TokenTypes::Count, RepetitionCount(lower, upper)); }
-Term Term::Dot() { return Term(TokenTypes::Dot, DotTag()); }
-Term Term::BeginMark() { return Term(TokenTypes::BeginMark, BeginTag()); }
-Term Term::EndMark() { return Term(TokenTypes::EndMark, EndTag()); }
-
+}
+
+Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); }
+Term Term::Repetition(int lower, int upper) { return Term(TokenTypes::Count, RepetitionCount(lower, upper)); }
+Term Term::Dot() { return Term(TokenTypes::Dot, DotTag()); }
+Term Term::BeginMark() { return Term(TokenTypes::BeginMark, BeginTag()); }
+Term Term::EndMark() { return Term(TokenTypes::EndMark, EndTag()); }
+
Lexer::~Lexer() = default;
-
-wchar32 Lexer::GetChar()
-{
+
+wchar32 Lexer::GetChar()
+{
if (m_input.empty())
return End;
else if (m_input.front() == '\\') {
@@ -64,23 +64,23 @@ wchar32 Lexer::GetChar()
m_input.pop_front();
return ch;
}
-}
-
-wchar32 Lexer::PeekChar()
-{
+}
+
+wchar32 Lexer::PeekChar()
+{
if (m_input.empty())
return End;
else
return m_input.front();
-}
-
-void Lexer::UngetChar(wchar32 c)
-{
+}
+
+void Lexer::UngetChar(wchar32 c)
+{
if (c != End)
m_input.push_front(c);
-}
-
-namespace {
+}
+
+namespace {
class CompareFeaturesByPriority: public ybinary_function<const Feature::Ptr&, const Feature::Ptr&, bool> {
public:
bool operator()(const Feature::Ptr& a, const Feature::Ptr& b) const
@@ -88,15 +88,15 @@ namespace {
return a->Priority() < b->Priority();
}
};
-}
-
+}
+
Lexer& Lexer::AddFeature(Feature::Ptr& feature)
-{
+{
feature->m_lexer = this;
m_features.insert(LowerBound(m_features.begin(), m_features.end(), feature, CompareFeaturesByPriority()), std::move(feature));
return *this;
-}
-
+}
+
Lexer& Lexer::AddFeature(Feature::Ptr&& feature)
{
feature->m_lexer = this;
@@ -104,8 +104,8 @@ Lexer& Lexer::AddFeature(Feature::Ptr&& feature)
return *this;
}
-Term Lexer::DoLex()
-{
+Term Lexer::DoLex()
+{
static const char* controls = "|().*+?^$\\";
for (;;) {
UngetChar(GetChar());
@@ -120,7 +120,7 @@ Term Lexer::DoLex()
}
}
ch = GetChar();
-
+
if (ch == '|')
return Term(TokenTypes::Or);
else if (ch == '(') {
@@ -144,15 +144,15 @@ Term Lexer::DoLex()
else
return Term::Character(ch);
}
-}
-
-Term Lexer::Lex()
-{
+}
+
+Term Lexer::Lex()
+{
Term t = DoLex();
-
+
for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i)
(*i)->Alter(t);
-
+
if (t.Value().IsA<Term::CharacterRange>()) {
const auto& chars = t.Value().As<Term::CharacterRange>();
//std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl;
@@ -186,25 +186,25 @@ Term Lexer::Lex()
else if (type == TokenTypes::End)
type = 0;
return Term(type, t.Value());
-}
+}
-void Lexer::Parenthesized(Fsm& fsm)
-{
+void Lexer::Parenthesized(Fsm& fsm)
+{
for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i)
(*i)->Parenthesized(fsm);
-}
-
-wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
-{
+}
+
+wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
+{
bool ctrl = (strchr(controls, c & 0xFF) != 0);
if ((c & ControlMask) == Control && ctrl)
return c & ~ControlMask;
if (c <= 0xFF && ctrl)
return c | Control;
return c;
-}
-
-namespace {
+}
+
+namespace {
class EnableUnicodeSequencesImpl : public UnicodeReader {
public:
bool Accepts(wchar32 c) const {
@@ -219,7 +219,7 @@ namespace {
class CharacterRangeReader: public UnicodeReader {
public:
bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
-
+
Term Lex()
{
static const char* controls = "^[]-\\";
@@ -227,14 +227,14 @@ namespace {
wchar32 ch = CorrectChar(GetChar(), controls);
if (ch == '[' || ch == ']')
return Term::Character(ch);
-
+
Term::CharacterRange cs;
ch = CorrectChar(GetChar(), controls);
if (ch == (Control | '^')) {
cs.second = true;
ch = CorrectChar(GetChar(), controls);
}
-
+
bool firstUnicode;
wchar32 unicodeSymbol = 0;
@@ -281,15 +281,15 @@ namespace {
}
if (ch == End)
Error("Unexpected end of pattern");
-
+
return Term(TokenTypes::Letters, cs);
}
};
-
+
class RepetitionCountReader: public Feature {
public:
bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); }
-
+
Term Lex()
{
wchar32 ch = GetChar();
@@ -297,17 +297,17 @@ namespace {
return Term::Character(ch & ~ControlMask);
ch = GetChar();
int lower = 0, upper = 0;
-
+
if (!is_digit(ch))
Error("Wrong repetition count");
-
+
for (; is_digit(ch); ch = GetChar())
lower = lower * 10 + (ch - '0');
if (ch == '}')
return Term::Repetition(lower, lower);
else if (ch != ',')
Error("Wrong repetition count");
-
+
ch = GetChar();
if (ch == '}')
return Term::Repetition(lower, Inf);
@@ -315,13 +315,13 @@ namespace {
Error("Wrong repetition count");
for (; is_digit(ch); ch = GetChar())
upper = upper * 10 + (ch - '0');
-
+
if (ch != '}')
Error("Wrong repetition count");
return Term::Repetition(lower, upper);
}
};
-
+
class CaseInsensitiveImpl: public Feature {
public:
void Alter(Term& t)
@@ -363,30 +363,30 @@ namespace {
}
}
};
-}
-
-namespace Features {
+}
+
+namespace Features {
Feature::Ptr CaseInsensitive() { return Feature::Ptr(new CaseInsensitiveImpl); }
Feature::Ptr CharClasses();
Feature::Ptr AndNotSupport() { return Feature::Ptr(new AndNotSupportImpl); }
-};
-
-void Lexer::InstallDefaultFeatures()
-{
+};
+
+void Lexer::InstallDefaultFeatures()
+{
AddFeature(Feature::Ptr(new CharacterRangeReader));
AddFeature(Feature::Ptr(new RepetitionCountReader));
AddFeature(Features::CharClasses());
AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
-}
-
-Fsm Lexer::Parse()
-{
+}
+
+Fsm Lexer::Parse()
+{
if (!Impl::yre_parse(*this))
return m_retval.As<Fsm>();
else {
Error("Syntax error in regexp");
return Fsm(); // Make compiler happy
}
-}
-
-}
+}
+
+}
diff --git a/contrib/libs/pire/pire/re_lexer.h b/contrib/libs/pire/pire/re_lexer.h
index 5591c16d34..e397a38d5c 100644
--- a/contrib/libs/pire/pire/re_lexer.h
+++ b/contrib/libs/pire/pire/re_lexer.h
@@ -1,244 +1,244 @@
-/*
- * re_lexer.h -- definition required for parsing regexps
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * re_lexer.h -- definition required for parsing regexps
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_RE_LEXER_H
-#define PIRE_RE_LEXER_H
-
-
-#include <vector>
-#include <stack>
-#include <set>
-#include <utility>
-#include <stdexcept>
-#include <utility>
-#include <string.h>
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_RE_LEXER_H
+#define PIRE_RE_LEXER_H
+
+
+#include <vector>
+#include <stack>
+#include <set>
+#include <utility>
+#include <stdexcept>
+#include <utility>
+#include <string.h>
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/stub/stl.h>
-
+
#include "encoding.h"
#include "any.h"
-namespace Pire {
-
-namespace Consts {
-enum { Inf = -1 };
-
-static const wchar32 Control = 0xF000;
-static const wchar32 ControlMask = 0xFF00;
-static const wchar32 End = Control | 0xFF;
-};
-
-using namespace Consts;
-
-namespace TokenTypes {
-enum {
- None = 0,
- Letters,
- Count,
- Dot,
- Open,
- Close,
- Or,
- And,
- Not,
- BeginMark,
- EndMark,
- End
-};
-}
-
-/**
-* A single terminal character in regexp pattern.
-* Consists of a type (a character, a repetition count, an opening parenthesis, etc...)
-* and optional value.
-*/
-class Term {
-public:
+namespace Pire {
+
+namespace Consts {
+enum { Inf = -1 };
+
+static const wchar32 Control = 0xF000;
+static const wchar32 ControlMask = 0xFF00;
+static const wchar32 End = Control | 0xFF;
+};
+
+using namespace Consts;
+
+namespace TokenTypes {
+enum {
+ None = 0,
+ Letters,
+ Count,
+ Dot,
+ Open,
+ Close,
+ Or,
+ And,
+ Not,
+ BeginMark,
+ EndMark,
+ End
+};
+}
+
+/**
+* A single terminal character in regexp pattern.
+* Consists of a type (a character, a repetition count, an opening parenthesis, etc...)
+* and optional value.
+*/
+class Term {
+public:
typedef TVector<wchar32> String;
typedef TSet<String> Strings;
-
- typedef ypair<int, int> RepetitionCount;
- typedef ypair<Strings, bool> CharacterRange;
-
- struct DotTag {};
- struct BeginTag {};
- struct EndTag {};
-
- Term(int type): m_type(type) {}
- template<class T> Term(int type, T t): m_type(type), m_value(t) {}
- Term(int type, const Any& value): m_type(type), m_value(value) {}
-
- static Term Character(wchar32 c);
- static Term Repetition(int lower, int upper);
- static Term Dot();
- static Term BeginMark();
- static Term EndMark();
-
- int Type() const { return m_type; }
- const Any& Value() const { return m_value; }
-private:
- int m_type;
- Any m_value;
-};
-
-class Feature;
-
-/**
-* A class performing regexp pattern parsing.
-*/
-class Lexer {
-public:
- // One-size-fits-all constructor set.
- Lexer()
- : m_encoding(&Encodings::Latin1())
- { InstallDefaultFeatures(); }
-
- explicit Lexer(const char* str)
- : m_encoding(&Encodings::Latin1())
- {
- InstallDefaultFeatures();
- Assign(str, str + strlen(str));
- }
- template<class T> explicit Lexer(const T& t)
- : m_encoding(&Encodings::Latin1())
- {
- InstallDefaultFeatures();
- Assign(t.begin(), t.end());
- }
-
- template<class Iter> Lexer(Iter begin, Iter end)
- : m_encoding(&Encodings::Latin1())
- {
- InstallDefaultFeatures();
- Assign(begin, end);
- }
- ~Lexer();
-
- template<class Iter> void Assign(Iter begin, Iter end)
- {
- m_input.clear();
- std::copy(begin, end, std::back_inserter(m_input));
- }
-
- /// The main lexer function. Extracts and returns the next term in input sequence.
- Term Lex();
- /// Installs an additional lexer feature.
+
+ typedef ypair<int, int> RepetitionCount;
+ typedef ypair<Strings, bool> CharacterRange;
+
+ struct DotTag {};
+ struct BeginTag {};
+ struct EndTag {};
+
+ Term(int type): m_type(type) {}
+ template<class T> Term(int type, T t): m_type(type), m_value(t) {}
+ Term(int type, const Any& value): m_type(type), m_value(value) {}
+
+ static Term Character(wchar32 c);
+ static Term Repetition(int lower, int upper);
+ static Term Dot();
+ static Term BeginMark();
+ static Term EndMark();
+
+ int Type() const { return m_type; }
+ const Any& Value() const { return m_value; }
+private:
+ int m_type;
+ Any m_value;
+};
+
+class Feature;
+
+/**
+* A class performing regexp pattern parsing.
+*/
+class Lexer {
+public:
+ // One-size-fits-all constructor set.
+ Lexer()
+ : m_encoding(&Encodings::Latin1())
+ { InstallDefaultFeatures(); }
+
+ explicit Lexer(const char* str)
+ : m_encoding(&Encodings::Latin1())
+ {
+ InstallDefaultFeatures();
+ Assign(str, str + strlen(str));
+ }
+ template<class T> explicit Lexer(const T& t)
+ : m_encoding(&Encodings::Latin1())
+ {
+ InstallDefaultFeatures();
+ Assign(t.begin(), t.end());
+ }
+
+ template<class Iter> Lexer(Iter begin, Iter end)
+ : m_encoding(&Encodings::Latin1())
+ {
+ InstallDefaultFeatures();
+ Assign(begin, end);
+ }
+ ~Lexer();
+
+ template<class Iter> void Assign(Iter begin, Iter end)
+ {
+ m_input.clear();
+ std::copy(begin, end, std::back_inserter(m_input));
+ }
+
+ /// The main lexer function. Extracts and returns the next term in input sequence.
+ Term Lex();
+ /// Installs an additional lexer feature.
/// We declare both lvalue and rvalue reference types to fix some linker errors.
Lexer& AddFeature(THolder<Feature>& a);
Lexer& AddFeature(THolder<Feature>&& a);
-
- const Pire::Encoding& Encoding() const { return *m_encoding; }
- Lexer& SetEncoding(const Pire::Encoding& encoding) { m_encoding = &encoding; return *this; }
+
+ const Pire::Encoding& Encoding() const { return *m_encoding; }
+ Lexer& SetEncoding(const Pire::Encoding& encoding) { m_encoding = &encoding; return *this; }
void SetError(const char* msg) { errmsg = msg; }
void SetError(ystring msg) { errmsg = msg; }
ystring& GetError() { return errmsg; }
-
- Any& Retval() { return m_retval; }
-
- Fsm Parse();
-
- void Parenthesized(Fsm& fsm);
-
-private:
- Term DoLex();
-
- wchar32 GetChar();
- wchar32 PeekChar();
- void UngetChar(wchar32 c);
-
- void Error(const char* msg) { throw Pire::Error(msg); }
-
- void InstallDefaultFeatures();
-
+
+ Any& Retval() { return m_retval; }
+
+ Fsm Parse();
+
+ void Parenthesized(Fsm& fsm);
+
+private:
+ Term DoLex();
+
+ wchar32 GetChar();
+ wchar32 PeekChar();
+ void UngetChar(wchar32 c);
+
+ void Error(const char* msg) { throw Pire::Error(msg); }
+
+ void InstallDefaultFeatures();
+
TDeque<wchar32> m_input;
- const Pire::Encoding* m_encoding;
+ const Pire::Encoding* m_encoding;
TVector<THolder<Feature>> m_features;
- Any m_retval;
+ Any m_retval;
ystring errmsg;
-
- friend class Feature;
-
- Lexer(const Lexer&);
- Lexer& operator = (const Lexer&);
-};
-
-/**
-* A basic class for Pire customization.
-* Features can be installed in the lexer and alter its behaviour.
-*/
-class Feature {
-public:
+
+ friend class Feature;
+
+ Lexer(const Lexer&);
+ Lexer& operator = (const Lexer&);
+};
+
+/**
+* A basic class for Pire customization.
+* Features can be installed in the lexer and alter its behaviour.
+*/
+class Feature {
+public:
/// Precedence of features. The less the priority, the earlier
- /// will Lex() be called, and the later will Alter() and Parenthesized() be called.
- virtual int Priority() const { return 50; }
-
- /// Lexer will call this function to check whether the feature
- /// wants to handle the next part of the input sequence in its
- /// specific way. If it does not, features Lex() will not be called.
- virtual bool Accepts(wchar32 /*c*/) const { return false; }
- /// Should eat up some part of the input sequence, handle it
- /// somehow and produce a terminal.
- virtual Term Lex() { return Term(0); }
-
- /// This function recieves a shiny new terminal, and the feature
- /// has a chance to hack it somehow if it wants.
- virtual void Alter(Term&) {}
- /// This function recieves a parenthesized part of a pattern, and the feature
- /// has a chance to hack it somehow if it wants (its the way to implement
- /// those perl-style (?@#$%:..) clauses).
- virtual void Parenthesized(Fsm&) {}
-
+ /// will Lex() be called, and the later will Alter() and Parenthesized() be called.
+ virtual int Priority() const { return 50; }
+
+ /// Lexer will call this function to check whether the feature
+ /// wants to handle the next part of the input sequence in its
+ /// specific way. If it does not, features Lex() will not be called.
+ virtual bool Accepts(wchar32 /*c*/) const { return false; }
+ /// Should eat up some part of the input sequence, handle it
+ /// somehow and produce a terminal.
+ virtual Term Lex() { return Term(0); }
+
+ /// This function recieves a shiny new terminal, and the feature
+ /// has a chance to hack it somehow if it wants.
+ virtual void Alter(Term&) {}
+ /// This function recieves a parenthesized part of a pattern, and the feature
+ /// has a chance to hack it somehow if it wants (its the way to implement
+ /// those perl-style (?@#$%:..) clauses).
+ virtual void Parenthesized(Fsm&) {}
+
using Ptr = THolder<Feature>;
-
+
virtual ~Feature() = default;
-protected:
-
- // These functions are exposed versions of the corresponding lexer functions.
- const Pire::Encoding& Encoding() const { return m_lexer->Encoding(); }
- wchar32 GetChar() { return m_lexer->GetChar(); }
- wchar32 PeekChar() { return m_lexer->PeekChar(); }
- void UngetChar(wchar32 c) { m_lexer->UngetChar(c); }
- wchar32 CorrectChar(wchar32 c, const char* controls);
- void Error(const char* msg) { m_lexer->Error(msg); }
-
-private:
- friend class Lexer;
- Lexer* m_lexer;
-};
-
-namespace Features {
- /// Disables case sensitivity
+protected:
+
+ // These functions are exposed versions of the corresponding lexer functions.
+ const Pire::Encoding& Encoding() const { return m_lexer->Encoding(); }
+ wchar32 GetChar() { return m_lexer->GetChar(); }
+ wchar32 PeekChar() { return m_lexer->PeekChar(); }
+ void UngetChar(wchar32 c) { m_lexer->UngetChar(c); }
+ wchar32 CorrectChar(wchar32 c, const char* controls);
+ void Error(const char* msg) { m_lexer->Error(msg); }
+
+private:
+ friend class Lexer;
+ Lexer* m_lexer;
+};
+
+namespace Features {
+ /// Disables case sensitivity
Feature::Ptr CaseInsensitive();
-
- /**
- * Adds two more operations:
- * (pattern1)&(pattern2) -- matches those strings which match both /pattern1/ and /pattern2/;
- * ~(pattern) -- matches those strings which do not match /pattern/.
- */
+
+ /**
+ * Adds two more operations:
+ * (pattern1)&(pattern2) -- matches those strings which match both /pattern1/ and /pattern2/;
+ * ~(pattern) -- matches those strings which do not match /pattern/.
+ */
Feature::Ptr AndNotSupport();
-}
-
-}
-
-#endif
+}
+
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y
index dbad88e287..292c275ebd 100644
--- a/contrib/libs/pire/pire/re_parser.y
+++ b/contrib/libs/pire/pire/re_parser.y
@@ -1,80 +1,80 @@
-%{ // -*- mode: c++ -*-
-
-/*
- * re_parser.ypp -- the main regexp parsing routine
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+%{ // -*- mode: c++ -*-
+
+/*
+ * re_parser.ypp -- the main regexp parsing routine
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifdef _MSC_VER
-// Disable yacc warnings
-#pragma warning(disable: 4060) // switch contains no 'case' or 'default' statements
-#pragma warning(disable: 4065) // switch contains 'default' but no 'case' statements
-#pragma warning(disable: 4102) // unreferenced label 'yyerrlabl'
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifdef _MSC_VER
+// Disable yacc warnings
+#pragma warning(disable: 4060) // switch contains no 'case' or 'default' statements
+#pragma warning(disable: 4065) // switch contains 'default' but no 'case' statements
+#pragma warning(disable: 4102) // unreferenced label 'yyerrlabl'
#pragma warning(disable: 4702) // unreachable code
-#endif
-
-#ifdef __GNUC__
-#pragma GCC diagnostic ignored "-Wuninitialized" // 'yylval' may be used uninitialized
-#endif
-
-#include <stdexcept>
-
+#endif
+
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wuninitialized" // 'yylval' may be used uninitialized
+#endif
+
+#include <stdexcept>
+
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/re_lexer.h>
#include <contrib/libs/pire/pire/any.h>
#include <contrib/libs/pire/pire/stub/stl.h>
-
-#define YYSTYPE Any*
-#define YYSTYPE_IS_TRIVIAL 0
-
-namespace {
-
-using namespace Pire;
-using Pire::Fsm;
-using Pire::Encoding;
-
-int yylex(YYSTYPE*, Lexer&);
+
+#define YYSTYPE Any*
+#define YYSTYPE_IS_TRIVIAL 0
+
+namespace {
+
+using namespace Pire;
+using Pire::Fsm;
+using Pire::Encoding;
+
+int yylex(YYSTYPE*, Lexer&);
void yyerror(Pire::Lexer&, const char*);
-
-Fsm& ConvertToFSM(const Encoding& encoding, Any* any);
-void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr);
-
-%}
-
+
+Fsm& ConvertToFSM(const Encoding& encoding, Any* any);
+void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr);
+
+%}
+
%parse-param { Pire::Lexer& rlex }
%lex-param { Pire::Lexer& rlex }
%pure-parser
-
-// Terminal declarations
-%term YRE_LETTERS
-%term YRE_COUNT
-%term YRE_DOT
-%term YRE_AND
-%term YRE_NOT
-
+
+// Terminal declarations
+%term YRE_LETTERS
+%term YRE_COUNT
+%term YRE_DOT
+%term YRE_AND
+%term YRE_NOT
+
%destructor { delete $$; } <>
-%%
-
-regexp
+%%
+
+regexp
: alternative
{
ConvertToFSM(rlex.Encoding(), $1);
@@ -83,23 +83,23 @@ regexp
$$ = nullptr;
}
;
-
-alternative
+
+alternative
: conjunction
| alternative '|' conjunction { ConvertToFSM(rlex.Encoding(), ($$ = $1)) |= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; }
;
-
-conjunction
+
+conjunction
: negation
| conjunction YRE_AND negation { ConvertToFSM(rlex.Encoding(), ($$ = $1)) &= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; }
;
-
-negation
+
+negation
: concatenation
| YRE_NOT concatenation { ConvertToFSM(rlex.Encoding(), ($$ = $2)).Complement(); delete $1; }
;
-
-concatenation
+
+concatenation
: { $$ = new Any(Fsm()); }
| concatenation iteration
{
@@ -113,8 +113,8 @@ concatenation
delete $2;
}
;
-
-iteration
+
+iteration
: term
| term YRE_COUNT
{
@@ -122,8 +122,8 @@ iteration
$$ = new Any(orig);
Fsm& cur = $$->As<Fsm>();
const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>();
-
-
+
+
if (repc.first == 0 && repc.second == 1) {
Fsm empty;
cur |= empty;
@@ -144,19 +144,19 @@ iteration
delete $2;
}
;
-
-term
+
+term
: YRE_LETTERS
| YRE_DOT
| '^'
| '$'
| '(' alternative ')' { $$ = $2; rlex.Parenthesized($$->As<Fsm>()); delete $1; delete $3; }
;
-
-%%
-
-int yylex(YYSTYPE* lval, Pire::Lexer& rlex)
-{
+
+%%
+
+int yylex(YYSTYPE* lval, Pire::Lexer& rlex)
+{
try {
Pire::Term term = rlex.Lex();
if (!term.Value().Empty())
@@ -168,18 +168,18 @@ int yylex(YYSTYPE* lval, Pire::Lexer& rlex)
rlex.SetError(e.what());
return 0;
}
-}
-
+}
+
void yyerror(Pire::Lexer& rlex, const char* str)
-{
+{
if (rlex.GetError().length() == 0)
rlex.SetError(ystring("Regexp parse error: ").append(str));
-}
-
-void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr)
-{
+}
+
+void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr)
+{
TVector<ystring> strings;
-
+
for (auto&& i : cr.first) {
ystring s;
for (auto&& j : i) {
@@ -199,16 +199,16 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c
a = Fsm::MakeFalse();
else
a.AppendStrings(strings);
-}
-
-Fsm& ConvertToFSM(const Encoding& encoding, Any* any)
-{
+}
+
+Fsm& ConvertToFSM(const Encoding& encoding, Any* any)
+{
if (any->IsA<Fsm>())
return any->As<Fsm>();
-
+
Any ret = Fsm();
Fsm& a = ret.As<Fsm>();
-
+
if (any->IsA<Term::DotTag>()) {
encoding.AppendDot(a);
} else if (any->IsA<Term::BeginTag>()) {
@@ -229,11 +229,11 @@ Fsm& ConvertToFSM(const Encoding& encoding, Any* any)
}
any->Swap(ret);
return a;
-}
-
-}
-
-namespace Pire {
+}
+
+}
+
+namespace Pire {
namespace Impl {
int yre_parse(Pire::Lexer& rlex)
{
@@ -244,4 +244,4 @@ namespace Pire {
return rc;
}
}
-}
+}
diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h
index f6e1ff734d..a2f3a2fc8b 100644
--- a/contrib/libs/pire/pire/run.h
+++ b/contrib/libs/pire/pire/run.h
@@ -1,113 +1,113 @@
-/*
- * run.h -- routines for running scanners on strings.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * run.h -- routines for running scanners on strings.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_RE_SCANNER_H
-#define PIRE_RE_SCANNER_H
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_RE_SCANNER_H
+#define PIRE_RE_SCANNER_H
+
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/memstreams.h>
#include <contrib/libs/pire/pire/scanners/pair.h>
#include "platform.h"
-#include "defs.h"
-
+#include "defs.h"
+
#include <string>
-namespace Pire {
-
- template<class Scanner>
- struct StDumper {
- StDumper(const Scanner& sc, typename Scanner::State st): m_sc(&sc), m_st(st) {}
- void Dump(yostream& stream) const { stream << m_sc->StateIndex(m_st) << (m_sc->Final(m_st) ? " [final]" : ""); }
- private:
- const Scanner* m_sc;
- typename Scanner::State m_st;
- };
-
- template<class Scanner> StDumper<Scanner> StDump(const Scanner& sc, typename Scanner::State st) { return StDumper<Scanner>(sc, st); }
- template<class Scanner> yostream& operator << (yostream& stream, const StDumper<Scanner>& stdump) { stdump.Dump(stream); return stream; }
-}
-
-namespace Pire {
-
-template<class Scanner>
+namespace Pire {
+
+ template<class Scanner>
+ struct StDumper {
+ StDumper(const Scanner& sc, typename Scanner::State st): m_sc(&sc), m_st(st) {}
+ void Dump(yostream& stream) const { stream << m_sc->StateIndex(m_st) << (m_sc->Final(m_st) ? " [final]" : ""); }
+ private:
+ const Scanner* m_sc;
+ typename Scanner::State m_st;
+ };
+
+ template<class Scanner> StDumper<Scanner> StDump(const Scanner& sc, typename Scanner::State st) { return StDumper<Scanner>(sc, st); }
+ template<class Scanner> yostream& operator << (yostream& stream, const StDumper<Scanner>& stdump) { stdump.Dump(stream); return stream; }
+}
+
+namespace Pire {
+
+template<class Scanner>
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
-void Step(const Scanner& scanner, typename Scanner::State& state, Char ch)
-{
+void Step(const Scanner& scanner, typename Scanner::State& state, Char ch)
+{
Y_ASSERT(ch < MaxCharUnaligned);
- typename Scanner::Action a = scanner.Next(state, ch);
- scanner.TakeAction(state, a);
-}
-
-namespace Impl {
-
- enum Action { Continue, Stop };
-
- template<class Scanner>
- struct RunPred {
+ typename Scanner::Action a = scanner.Next(state, ch);
+ scanner.TakeAction(state, a);
+}
+
+namespace Impl {
+
+ enum Action { Continue, Stop };
+
+ template<class Scanner>
+ struct RunPred {
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action operator()(const Scanner&, const typename Scanner::State&, const char*) const { return Continue; }
- };
+ Action operator()(const Scanner&, const typename Scanner::State&, const char*) const { return Continue; }
+ };
- template<class Scanner>
- struct ShortestPrefixPred {
- explicit ShortestPrefixPred(const char*& pos): m_pos(&pos) {}
-
+ template<class Scanner>
+ struct ShortestPrefixPred {
+ explicit ShortestPrefixPred(const char*& pos): m_pos(&pos) {}
+
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
- {
- if (sc.Final(st)) {
- *m_pos = pos;
- return Stop;
- } else {
+ Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
+ {
+ if (sc.Final(st)) {
+ *m_pos = pos;
+ return Stop;
+ } else {
return (sc.Dead(st) ? Stop : Continue);
- }
- }
- private:
- const char** m_pos;
- };
+ }
+ }
+ private:
+ const char** m_pos;
+ };
- template<class Scanner>
- struct LongestPrefixPred {
- explicit LongestPrefixPred(const char*& pos): m_pos(&pos) {}
+ template<class Scanner>
+ struct LongestPrefixPred {
+ explicit LongestPrefixPred(const char*& pos): m_pos(&pos) {}
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
- {
- if (sc.Final(st))
- *m_pos = pos;
- return (sc.Dead(st) ? Stop : Continue);
- }
- private:
- const char** m_pos;
- };
-
-}
-
-#ifndef PIRE_DEBUG
-
-namespace Impl {
-
+ Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
+ {
+ if (sc.Final(st))
+ *m_pos = pos;
+ return (sc.Dead(st) ? Stop : Continue);
+ }
+ private:
+ const char** m_pos;
+ };
+
+}
+
+#ifndef PIRE_DEBUG
+
+namespace Impl {
+
template<class Scanner, class Pred>
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action SafeRunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
@@ -128,168 +128,168 @@ namespace Impl {
return Continue;
}
- /// Effectively runs a scanner on a short data chunk, fit completely into one machine word.
- template<class Scanner, class Pred>
+ /// Effectively runs a scanner on a short data chunk, fit completely into one machine word.
+ template<class Scanner, class Pred>
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
- {
+ Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
+ {
Y_ASSERT(pos <= sizeof(size_t));
Y_ASSERT(size <= sizeof(size_t));
Y_ASSERT(pos + size <= sizeof(size_t));
-
+
if (PIRE_UNLIKELY(size == 0))
return Continue;
- size_t chunk = Impl::ToLittleEndian(*p) >> 8*pos;
- const char* ptr = (const char*) p + pos + size + 1;
-
- for (size_t i = size; i != 0; --i) {
- Step(scanner, state, chunk & 0xFF);
- if (pred(scanner, state, ptr - i) == Stop)
- return Stop;
- chunk >>= 8;
- }
- return Continue;
- }
-
- template<class Scanner>
- struct AlignedRunner {
-
- // Generic version for LongestPrefix()/ShortestPrefix() impelementations
- template<class Pred>
- static inline PIRE_HOT_FUNCTION
- Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, Pred stop)
- {
- typename Scanner::State st = state;
- Action ret = Continue;
- for (; begin != end && (ret = RunChunk(scanner, st, begin, 0, sizeof(void*), stop)) == Continue; ++begin)
- ;
- state = st;
- return ret;
- }
-
- // A special version for Run() impelementation that skips predicate checks
- static inline PIRE_HOT_FUNCTION
- Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, RunPred<Scanner>)
- {
- typename Scanner::State st = state;
- for (; begin != end; ++begin) {
- size_t chunk = *begin;
- for (size_t i = sizeof(chunk); i != 0; --i) {
- Step(scanner, st, chunk & 0xFF);
- chunk >>= 8;
- }
- }
- state = st;
- return Continue;
- }
- };
-
- /// The main function: runs a scanner through given memory range.
- template<class Scanner, class Pred>
+ size_t chunk = Impl::ToLittleEndian(*p) >> 8*pos;
+ const char* ptr = (const char*) p + pos + size + 1;
+
+ for (size_t i = size; i != 0; --i) {
+ Step(scanner, state, chunk & 0xFF);
+ if (pred(scanner, state, ptr - i) == Stop)
+ return Stop;
+ chunk >>= 8;
+ }
+ return Continue;
+ }
+
+ template<class Scanner>
+ struct AlignedRunner {
+
+ // Generic version for LongestPrefix()/ShortestPrefix() impelementations
+ template<class Pred>
+ static inline PIRE_HOT_FUNCTION
+ Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, Pred stop)
+ {
+ typename Scanner::State st = state;
+ Action ret = Continue;
+ for (; begin != end && (ret = RunChunk(scanner, st, begin, 0, sizeof(void*), stop)) == Continue; ++begin)
+ ;
+ state = st;
+ return ret;
+ }
+
+ // A special version for Run() impelementation that skips predicate checks
+ static inline PIRE_HOT_FUNCTION
+ Action RunAligned(const Scanner& scanner, typename Scanner::State& state, const size_t* begin, const size_t* end, RunPred<Scanner>)
+ {
+ typename Scanner::State st = state;
+ for (; begin != end; ++begin) {
+ size_t chunk = *begin;
+ for (size_t i = sizeof(chunk); i != 0; --i) {
+ Step(scanner, st, chunk & 0xFF);
+ chunk >>= 8;
+ }
+ }
+ state = st;
+ return Continue;
+ }
+ };
+
+ /// The main function: runs a scanner through given memory range.
+ template<class Scanner, class Pred>
inline void DoRun(const Scanner& scanner, typename Scanner::State& st, TStringBuf str, Pred pred)
- {
-
+ {
+
const size_t* head = reinterpret_cast<const size_t*>((reinterpret_cast<uintptr_t>(str.begin())) & ~(sizeof(size_t)-1));
const size_t* tail = reinterpret_cast<const size_t*>((reinterpret_cast<uintptr_t>(str.end())) & ~(sizeof(size_t)-1));
-
+
size_t headSize = (sizeof(size_t) - (str.begin() - (const char*)head)); // The distance from @p begin to the end of the word containing @p begin
size_t tailSize = str.end() - (const char*) tail; // The distance from the beginning of the word containing @p end to the @p end
-
+
Y_ASSERT(headSize >= 1 && headSize <= sizeof(size_t));
Y_ASSERT(tailSize < sizeof(size_t));
-
- if (head == tail) {
+
+ if (head == tail) {
Impl::SafeRunChunk(scanner, st, head, sizeof(size_t) - headSize, str.end() - str.begin(), pred);
- return;
- }
-
- // st is passed by reference to this function. If we use it directly on each step the compiler will have to
- // update it in memory because of pointer aliasing assumptions. Copying it into a local var allows the
- // compiler to store it in a register. This saves some instructions and cycles
- typename Scanner::State state = st;
-
+ return;
+ }
+
+ // st is passed by reference to this function. If we use it directly on each step the compiler will have to
+ // update it in memory because of pointer aliasing assumptions. Copying it into a local var allows the
+ // compiler to store it in a register. This saves some instructions and cycles
+ typename Scanner::State state = st;
+
if (str.begin() != (const char*) head) {
if (Impl::RunChunk(scanner, state, head, sizeof(size_t) - headSize, headSize, pred) == Stop) {
- st = state;
- return;
- }
- ++head;
- }
-
- if (Impl::AlignedRunner<Scanner>::RunAligned(scanner, state, head, tail, pred) == Stop) {
- st = state;
- return;
- }
-
- if (tailSize)
+ st = state;
+ return;
+ }
+ ++head;
+ }
+
+ if (Impl::AlignedRunner<Scanner>::RunAligned(scanner, state, head, tail, pred) == Stop) {
+ st = state;
+ return;
+ }
+
+ if (tailSize)
Impl::SafeRunChunk(scanner, state, tail, 0, tailSize, pred);
-
- st = state;
- }
-
-}
-
-/// Runs two scanners through given memory range simultaneously.
-/// This is several percent faster than running them independently.
-template<class Scanner1, class Scanner2>
+
+ st = state;
+ }
+
+}
+
+/// Runs two scanners through given memory range simultaneously.
+/// This is several percent faster than running them independently.
+template<class Scanner1, class Scanner2>
inline void Run(const Scanner1& scanner1, const Scanner2& scanner2, typename Scanner1::State& state1, typename Scanner2::State& state2, TStringBuf str)
-{
- typedef ScannerPair<Scanner1, Scanner2> Scanners;
- Scanners pair(scanner1, scanner2);
- typename Scanners::State states(state1, state2);
+{
+ typedef ScannerPair<Scanner1, Scanner2> Scanners;
+ Scanners pair(scanner1, scanner2);
+ typename Scanners::State states(state1, state2);
Run(pair, states, str);
- state1 = states.first;
- state2 = states.second;
-}
-
-#else
-
-namespace Impl {
- /// A debug version of all Run() methods.
- template<class Scanner, class Pred>
- inline void DoRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end, Pred pred)
- {
- Cdbg << "Running regexp on string " << ystring(begin, ymin(end - begin, static_cast<ptrdiff_t>(100u))) << Endl;
- Cdbg << "Initial state " << StDump(scanner, state) << Endl;
-
- if (pred(scanner, state, begin) == Stop) {
- Cdbg << " exiting" << Endl;
- return;
- }
-
- for (; begin != end; ++begin) {
- Step(scanner, state, (unsigned char)*begin);
- Cdbg << *begin << " => state " << StDump(scanner, state) << Endl;
- if (pred(scanner, state, begin + 1) == Stop) {
- Cdbg << " exiting" << Endl;
- return;
- }
- }
- }
-}
-
-#endif
+ state1 = states.first;
+ state2 = states.second;
+}
+
+#else
+
+namespace Impl {
+ /// A debug version of all Run() methods.
+ template<class Scanner, class Pred>
+ inline void DoRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end, Pred pred)
+ {
+ Cdbg << "Running regexp on string " << ystring(begin, ymin(end - begin, static_cast<ptrdiff_t>(100u))) << Endl;
+ Cdbg << "Initial state " << StDump(scanner, state) << Endl;
+
+ if (pred(scanner, state, begin) == Stop) {
+ Cdbg << " exiting" << Endl;
+ return;
+ }
+
+ for (; begin != end; ++begin) {
+ Step(scanner, state, (unsigned char)*begin);
+ Cdbg << *begin << " => state " << StDump(scanner, state) << Endl;
+ if (pred(scanner, state, begin + 1) == Stop) {
+ Cdbg << " exiting" << Endl;
+ return;
+ }
+ }
+ }
+}
+
+#endif
-template<class Scanner>
+template<class Scanner>
void Run(const Scanner& sc, typename Scanner::State& st, TStringBuf str)
{
Impl::DoRun(sc, st, str, Impl::RunPred<Scanner>());
}
template<class Scanner>
-void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, const char* end)
-{
+void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, const char* end)
+{
Run(sc, st, TStringBuf(begin, end));
-}
-
+}
+
/// Returns default constructed string_view{} if there is no matching prefix
/// Returns str.substr(0, 0) if matching prefix is empty
-template<class Scanner>
+template<class Scanner>
std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
-{
- typename Scanner::State st;
- sc.Initialize(st);
+{
+ typename Scanner::State st;
+ sc.Initialize(st);
if (throughBeginMark)
Pire::Step(sc, st, BeginMark);
const char* pos = (sc.Final(st) ? str.data() : nullptr);
@@ -300,11 +300,11 @@ std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool thr
pos = str.data() + str.size();
}
return pos ? str.substr(0, pos - str.data()) : std::string_view{};
-}
-
-template<class Scanner>
+}
+
+template<class Scanner>
const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false)
-{
+{
auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark);
return prefix.data() + prefix.size();
}
@@ -314,11 +314,11 @@ const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end,
template<class Scanner>
std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
{
- typename Scanner::State st;
- sc.Initialize(st);
+ typename Scanner::State st;
+ sc.Initialize(st);
if (throughBeginMark)
Pire::Step(sc, st, BeginMark);
- if (sc.Final(st))
+ if (sc.Final(st))
return str.substr(0, 0);
const char* pos = nullptr;
Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos));
@@ -328,8 +328,8 @@ std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool th
pos = str.data() + str.size();
}
return pos ? str.substr(0, pos - str.data()) : std::string_view{};
-}
-
+}
+
template<class Scanner>
const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false)
{
@@ -338,30 +338,30 @@ const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end
}
-/// The same as above, but scans string in reverse direction
-/// (consider using Fsm::Reverse() for using in this function).
+/// The same as above, but scans string in reverse direction
+/// (consider using Fsm::Reverse() for using in this function).
/// Returns default constructed string_view{} if there is no matching suffix
/// Returns str.substr(str.size(), 0) if matching suffix is empty
-template<class Scanner>
+template<class Scanner>
inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
-{
- typename Scanner::State state;
- scanner.Initialize(state);
+{
+ typename Scanner::State state;
+ scanner.Initialize(state);
if (throughEndMark)
Step(scanner, state, EndMark);
PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl);
- PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
-
+ PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
+
std::string_view suffix{};
auto begin = str.data() + str.size();
while (begin != str.data() && !scanner.Dead(state)) {
- if (scanner.Final(state))
+ if (scanner.Final(state))
suffix = str.substr(begin - str.data());
--begin;
Step(scanner, state, (unsigned char)*begin);
PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl);
- }
- if (scanner.Final(state))
+ }
+ if (scanner.Final(state))
suffix = str.substr(begin - str.data());
if (throughBeginMark) {
Step(scanner, state, BeginMark);
@@ -369,97 +369,97 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s
suffix = str.substr(begin - str.data());
}
return suffix;
-}
-
+}
+
template<class Scanner>
inline const char* LongestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) {
auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
return suffix.data() ? suffix.data() - 1 : nullptr;
}
-/// The same as above, but scans string in reverse direction
+/// The same as above, but scans string in reverse direction
/// Returns default constructed string_view{} if there is no matching suffix
/// Returns str.substr(str.size(), 0) if matching suffix is empty
-template<class Scanner>
+template<class Scanner>
inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
-{
+{
auto begin = str.data() + str.size();
- typename Scanner::State state;
- scanner.Initialize(state);
+ typename Scanner::State state;
+ scanner.Initialize(state);
if (throughEndMark)
Step(scanner, state, EndMark);
PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl);
PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
-
+
while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) {
--begin;
scanner.Next(state, (unsigned char)*begin);
PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl);
- }
+ }
if (throughBeginMark)
Step(scanner, state, BeginMark);
return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{};
-}
-
+}
+
template<class Scanner>
inline const char* ShortestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) {
auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
return suffix.data() ? suffix.data() - 1 : nullptr;
}
-
-
-template<class Scanner>
-class RunHelper {
-public:
- RunHelper(const Scanner& sc, typename Scanner::State st): Sc(&sc), St(st) {}
- explicit RunHelper(const Scanner& sc): Sc(&sc) { Sc->Initialize(St); }
-
- RunHelper<Scanner>& Step(Char letter) { Pire::Step(*Sc, St, letter); return *this; }
+
+
+template<class Scanner>
+class RunHelper {
+public:
+ RunHelper(const Scanner& sc, typename Scanner::State st): Sc(&sc), St(st) {}
+ explicit RunHelper(const Scanner& sc): Sc(&sc) { Sc->Initialize(St); }
+
+ RunHelper<Scanner>& Step(Char letter) { Pire::Step(*Sc, St, letter); return *this; }
RunHelper<Scanner>& Run(TStringBuf str) { Pire::Run(*Sc, St, str); return *this; }
RunHelper<Scanner>& Run(const char* begin, const char* end) { return Run(TStringBuf(begin, end)); }
RunHelper<Scanner>& Run(const char* begin, size_t size) { return Run(TStringBuf(begin, begin + size)); }
- RunHelper<Scanner>& Begin() { return Step(BeginMark); }
- RunHelper<Scanner>& End() { return Step(EndMark); }
-
- const typename Scanner::State& State() const { return St; }
- struct Tag {};
- operator const Tag*() const { return Sc->Final(St) ? (const Tag*) this : 0; }
- bool operator ! () const { return !Sc->Final(St); }
-
-private:
- const Scanner* Sc;
- typename Scanner::State St;
-};
-
-template<class Scanner>
-RunHelper<Scanner> Runner(const Scanner& sc) { return RunHelper<Scanner>(sc); }
-
-template<class Scanner>
-RunHelper<Scanner> Runner(const Scanner& sc, typename Scanner::State st) { return RunHelper<Scanner>(sc, st); }
-
-
-/// Provided for testing purposes and convinience
-template<class Scanner>
+ RunHelper<Scanner>& Begin() { return Step(BeginMark); }
+ RunHelper<Scanner>& End() { return Step(EndMark); }
+
+ const typename Scanner::State& State() const { return St; }
+ struct Tag {};
+ operator const Tag*() const { return Sc->Final(St) ? (const Tag*) this : 0; }
+ bool operator ! () const { return !Sc->Final(St); }
+
+private:
+ const Scanner* Sc;
+ typename Scanner::State St;
+};
+
+template<class Scanner>
+RunHelper<Scanner> Runner(const Scanner& sc) { return RunHelper<Scanner>(sc); }
+
+template<class Scanner>
+RunHelper<Scanner> Runner(const Scanner& sc, typename Scanner::State st) { return RunHelper<Scanner>(sc, st); }
+
+
+/// Provided for testing purposes and convinience
+template<class Scanner>
bool Matches(const Scanner& scanner, TStringBuf str)
{
return Runner(scanner).Run(str);
}
template<class Scanner>
-bool Matches(const Scanner& scanner, const char* begin, const char* end)
-{
+bool Matches(const Scanner& scanner, const char* begin, const char* end)
+{
return Runner(scanner).Run(TStringBuf(begin, end));
-}
-
-/// Constructs an inline scanner in one statement
-template<class Scanner>
-Scanner MmappedScanner(const char* ptr, size_t size)
-{
- Scanner s;
- s.Mmap(ptr, size);
- return s;
-}
-
-}
-
-#endif
+}
+
+/// Constructs an inline scanner in one statement
+template<class Scanner>
+Scanner MmappedScanner(const char* ptr, size_t size)
+{
+ Scanner s;
+ s.Mmap(ptr, size);
+ return s;
+}
+
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp
index 3956e3c6ed..22fcccf665 100644
--- a/contrib/libs/pire/pire/scanner_io.cpp
+++ b/contrib/libs/pire/pire/scanner_io.cpp
@@ -1,26 +1,26 @@
-/*
- * scanner_io.cpp -- scanner serialization and deserialization
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * scanner_io.cpp -- scanner serialization and deserialization
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/saveload.h>
#include <contrib/libs/pire/pire/scanners/common.h>
@@ -28,75 +28,75 @@
#include <contrib/libs/pire/pire/scanners/simple.h>
#include <contrib/libs/pire/pire/scanners/loaded.h>
-#include "align.h"
-
-namespace Pire {
-
-void SimpleScanner::Save(yostream* s) const
-{
+#include "align.h"
+
+namespace Pire {
+
+void SimpleScanner::Save(yostream* s) const
+{
SavePodType(s, Header(ScannerIOTypes::SimpleScanner, sizeof(m)));
- Impl::AlignSave(s, sizeof(Header));
- Locals mc = m;
- mc.initial -= reinterpret_cast<size_t>(m_transitions);
- SavePodType(s, mc);
- Impl::AlignSave(s, sizeof(mc));
- SavePodType(s, Empty());
- Impl::AlignSave(s, sizeof(Empty()));
- if (!Empty()) {
+ Impl::AlignSave(s, sizeof(Header));
+ Locals mc = m;
+ mc.initial -= reinterpret_cast<size_t>(m_transitions);
+ SavePodType(s, mc);
+ Impl::AlignSave(s, sizeof(mc));
+ SavePodType(s, Empty());
+ Impl::AlignSave(s, sizeof(Empty()));
+ if (!Empty()) {
Y_ASSERT(m_buffer);
Impl::AlignedSaveArray(s, m_buffer.Get(), BufSize());
- }
-}
-
-void SimpleScanner::Load(yistream* s)
-{
- SimpleScanner sc;
+ }
+}
+
+void SimpleScanner::Load(yistream* s)
+{
+ SimpleScanner sc;
Impl::ValidateHeader(s, ScannerIOTypes::SimpleScanner, sizeof(sc.m));
- LoadPodType(s, sc.m);
- Impl::AlignLoad(s, sizeof(sc.m));
- bool empty;
- LoadPodType(s, empty);
- Impl::AlignLoad(s, sizeof(empty));
- if (empty) {
- sc.Alias(Null());
- } else {
+ LoadPodType(s, sc.m);
+ Impl::AlignLoad(s, sizeof(sc.m));
+ bool empty;
+ LoadPodType(s, empty);
+ Impl::AlignLoad(s, sizeof(empty));
+ if (empty) {
+ sc.Alias(Null());
+ } else {
sc.m_buffer = BufferType(new char[sc.BufSize()]);
Impl::AlignedLoadArray(s, sc.m_buffer.Get(), sc.BufSize());
sc.Markup(sc.m_buffer.Get());
- sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions);
- }
- Swap(sc);
-}
-
-void SlowScanner::Save(yostream* s) const
-{
+ sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions);
+ }
+ Swap(sc);
+}
+
+void SlowScanner::Save(yostream* s) const
+{
SavePodType(s, Header(ScannerIOTypes::SlowScanner, sizeof(m)));
- Impl::AlignSave(s, sizeof(Header));
- SavePodType(s, m);
- Impl::AlignSave(s, sizeof(m));
- SavePodType(s, Empty());
- Impl::AlignSave(s, sizeof(Empty()));
- if (!Empty()) {
+ Impl::AlignSave(s, sizeof(Header));
+ SavePodType(s, m);
+ Impl::AlignSave(s, sizeof(m));
+ SavePodType(s, Empty());
+ Impl::AlignSave(s, sizeof(Empty()));
+ if (!Empty()) {
Y_ASSERT(!m_vec.empty());
- Impl::AlignedSaveArray(s, m_letters, MaxChar);
- Impl::AlignedSaveArray(s, m_finals, m.statesCount);
-
- size_t c = 0;
- SavePodType<size_t>(s, 0);
+ Impl::AlignedSaveArray(s, m_letters, MaxChar);
+ Impl::AlignedSaveArray(s, m_finals, m.statesCount);
+
+ size_t c = 0;
+ SavePodType<size_t>(s, 0);
for (auto&& i : m_vec) {
size_t n = c + i.size();
- SavePodType(s, n);
- c = n;
- }
- Impl::AlignSave(s, (m_vec.size() + 1) * sizeof(size_t));
-
- size_t size = 0;
+ SavePodType(s, n);
+ c = n;
+ }
+ Impl::AlignSave(s, (m_vec.size() + 1) * sizeof(size_t));
+
+ size_t size = 0;
for (auto&& i : m_vec)
if (!i.empty()) {
SavePodArray(s, &(i)[0], i.size());
size += sizeof(unsigned) * i.size();
- }
- Impl::AlignSave(s, size);
+ }
+ Impl::AlignSave(s, size);
if (need_actions) {
size_t pos = 0;
for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i)
@@ -106,55 +106,55 @@ void SlowScanner::Save(yostream* s) const
}
Impl::AlignSave(s, pos);
}
- }
-}
-
-void SlowScanner::Load(yistream* s)
-{
- SlowScanner sc;
+ }
+}
+
+void SlowScanner::Load(yistream* s)
+{
+ SlowScanner sc;
Impl::ValidateHeader(s, ScannerIOTypes::SlowScanner, sizeof(sc.m));
- LoadPodType(s, sc.m);
- Impl::AlignLoad(s, sizeof(sc.m));
- bool empty;
- LoadPodType(s, empty);
- Impl::AlignLoad(s, sizeof(empty));
+ LoadPodType(s, sc.m);
+ Impl::AlignLoad(s, sizeof(sc.m));
+ bool empty;
+ LoadPodType(s, empty);
+ Impl::AlignLoad(s, sizeof(empty));
sc.need_actions = need_actions;
- if (empty) {
- sc.Alias(Null());
- } else {
- sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount);
+ if (empty) {
+ sc.Alias(Null());
+ } else {
+ sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount);
if (sc.need_actions)
sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount);
- sc.m_vecptr = &sc.m_vec;
-
- sc.alloc(sc.m_letters, MaxChar);
- Impl::AlignedLoadArray(s, sc.m_letters, MaxChar);
-
- sc.alloc(sc.m_finals, sc.m.statesCount);
- Impl::AlignedLoadArray(s, sc.m_finals, sc.m.statesCount);
-
- size_t c;
- LoadPodType(s, c);
+ sc.m_vecptr = &sc.m_vec;
+
+ sc.alloc(sc.m_letters, MaxChar);
+ Impl::AlignedLoadArray(s, sc.m_letters, MaxChar);
+
+ sc.alloc(sc.m_finals, sc.m.statesCount);
+ Impl::AlignedLoadArray(s, sc.m_finals, sc.m.statesCount);
+
+ size_t c;
+ LoadPodType(s, c);
auto act = sc.m_actionsvec.begin();
for (auto&& i : sc.m_vec) {
- size_t n;
- LoadPodType(s, n);
+ size_t n;
+ LoadPodType(s, n);
i.resize(n - c);
if (sc.need_actions) {
act->resize(n - c);
++act;
}
- c = n;
- }
- Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t));
-
- size_t size = 0;
+ c = n;
+ }
+ Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t));
+
+ size_t size = 0;
for (auto&& i : sc.m_vec)
if (!i.empty()) {
LoadPodArray(s, &(i)[0], i.size());
size += sizeof(unsigned) * i.size();
- }
- Impl::AlignLoad(s, size);
+ }
+ Impl::AlignLoad(s, size);
size_t actSize = 0;
if (sc.need_actions) {
for (auto&& i : sc.m_actionsvec) {
@@ -165,53 +165,53 @@ void SlowScanner::Load(yistream* s)
}
Impl::AlignLoad(s, actSize);
}
- }
- Swap(sc);
-}
-
+ }
+ Swap(sc);
+}
+
void LoadedScanner::Save(yostream* s) const {
Save(s, ScannerIOTypes::LoadedScanner);
}
void LoadedScanner::Save(yostream* s, ui32 type) const
-{
+{
Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner);
SavePodType(s, Header(type, sizeof(m)));
- Impl::AlignSave(s, sizeof(Header));
- Locals mc = m;
- mc.initial -= reinterpret_cast<size_t>(m_jumps);
- SavePodType(s, mc);
- Impl::AlignSave(s, sizeof(mc));
-
- Impl::AlignedSaveArray(s, m_letters, MaxChar);
- Impl::AlignedSaveArray(s, m_jumps, m.statesCount * m.lettersCount);
- Impl::AlignedSaveArray(s, m_tags, m.statesCount);
-}
-
+ Impl::AlignSave(s, sizeof(Header));
+ Locals mc = m;
+ mc.initial -= reinterpret_cast<size_t>(m_jumps);
+ SavePodType(s, mc);
+ Impl::AlignSave(s, sizeof(mc));
+
+ Impl::AlignedSaveArray(s, m_letters, MaxChar);
+ Impl::AlignedSaveArray(s, m_jumps, m.statesCount * m.lettersCount);
+ Impl::AlignedSaveArray(s, m_tags, m.statesCount);
+}
+
void LoadedScanner::Load(yistream* s) {
Load(s, nullptr);
}
void LoadedScanner::Load(yistream* s, ui32* type)
-{
- LoadedScanner sc;
+{
+ LoadedScanner sc;
Header header = Impl::ValidateHeader(s, ScannerIOTypes::LoadedScanner, sizeof(sc.m));
if (type) {
*type = header.Type;
}
- LoadPodType(s, sc.m);
- Impl::AlignLoad(s, sizeof(sc.m));
+ LoadPodType(s, sc.m);
+ Impl::AlignLoad(s, sizeof(sc.m));
sc.m_buffer = BufferType(new char[sc.BufSize()]);
sc.Markup(sc.m_buffer.Get());
- Impl::AlignedLoadArray(s, sc.m_letters, MaxChar);
- Impl::AlignedLoadArray(s, sc.m_jumps, sc.m.statesCount * sc.m.lettersCount);
+ Impl::AlignedLoadArray(s, sc.m_letters, MaxChar);
+ Impl::AlignedLoadArray(s, sc.m_jumps, sc.m.statesCount * sc.m.lettersCount);
if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
TVector<Action> actions(sc.m.statesCount * sc.m.lettersCount);
Impl::AlignedLoadArray(s, actions.data(), actions.size());
}
- Impl::AlignedLoadArray(s, sc.m_tags, sc.m.statesCount);
- sc.m.initial += reinterpret_cast<size_t>(sc.m_jumps);
- Swap(sc);
-}
-
-}
+ Impl::AlignedLoadArray(s, sc.m_tags, sc.m.statesCount);
+ sc.m.initial += reinterpret_cast<size_t>(sc.m_jumps);
+ Swap(sc);
+}
+
+}
diff --git a/contrib/libs/pire/pire/scanners/common.h b/contrib/libs/pire/pire/scanners/common.h
index de5ea0af7b..4d03c1e4bc 100644
--- a/contrib/libs/pire/pire/scanners/common.h
+++ b/contrib/libs/pire/pire/scanners/common.h
@@ -1,35 +1,35 @@
-/*
- * common.h -- common declaration for Pire scanners
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * common.h -- common declaration for Pire scanners
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#ifndef PIRE_SCANNERS_COMMON_H_INCLUDED
-#define PIRE_SCANNERS_COMMON_H_INCLUDED
-
-#include <stdlib.h>
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#ifndef PIRE_SCANNERS_COMMON_H_INCLUDED
+#define PIRE_SCANNERS_COMMON_H_INCLUDED
+
+#include <stdlib.h>
#include <contrib/libs/pire/pire/align.h>
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/defs.h>
#include <contrib/libs/pire/pire/platform.h>
-
-namespace Pire {
+
+namespace Pire {
namespace ScannerIOTypes {
enum {
NoScanner = 0,
@@ -40,84 +40,84 @@ namespace Pire {
NoGlueLimitCountingScanner = 5,
};
}
-
- struct Header {
- ui32 Magic;
- ui32 Version;
- ui32 PtrSize;
- ui32 MaxWordSize;
- ui32 Type;
- ui32 HdrSize;
-
- static const ui32 MAGIC = 0x45524950; // "PIRE" on litte-endian
+
+ struct Header {
+ ui32 Magic;
+ ui32 Version;
+ ui32 PtrSize;
+ ui32 MaxWordSize;
+ ui32 Type;
+ ui32 HdrSize;
+
+ static const ui32 MAGIC = 0x45524950; // "PIRE" on litte-endian
static const ui32 RE_VERSION = 7; // Should be incremented each time when the format of serialized scanner changes
static const ui32 RE_VERSION_WITH_MACTIONS = 6; // LoadedScanner with m_actions, which is ignored
-
- explicit Header(ui32 type, size_t hdrsize)
- : Magic(MAGIC)
- , Version(RE_VERSION)
- , PtrSize(sizeof(void*))
- , MaxWordSize(sizeof(Impl::MaxSizeWord))
- , Type(type)
+
+ explicit Header(ui32 type, size_t hdrsize)
+ : Magic(MAGIC)
+ , Version(RE_VERSION)
+ , PtrSize(sizeof(void*))
+ , MaxWordSize(sizeof(Impl::MaxSizeWord))
+ , Type(type)
, HdrSize((ui32)hdrsize)
- {}
-
- void Validate(ui32 type, size_t hdrsize) const
- {
- if (Magic != MAGIC || PtrSize != sizeof(void*) || MaxWordSize != sizeof(Impl::MaxSizeWord))
- throw Error("Serialized regexp incompatible with your system");
+ {}
+
+ void Validate(ui32 type, size_t hdrsize) const
+ {
+ if (Magic != MAGIC || PtrSize != sizeof(void*) || MaxWordSize != sizeof(Impl::MaxSizeWord))
+ throw Error("Serialized regexp incompatible with your system");
if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS)
- throw Error("You are trying to used an incompatible version of a serialized regexp");
+ throw Error("You are trying to used an incompatible version of a serialized regexp");
if (type != ScannerIOTypes::NoScanner && type != Type &&
!(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) {
- throw Error("Serialized regexp incompatible with your system");
+ throw Error("Serialized regexp incompatible with your system");
}
if (hdrsize != 0 && HdrSize != hdrsize)
throw Error("Serialized regexp incompatible with your system");
- }
- };
-
- namespace Impl {
- inline const void* AdvancePtr(const size_t*& ptr, size_t& size, size_t delta)
- {
- ptr = (const size_t*) ((const char*) ptr + delta);
- size -= delta;
- return (const void*) ptr;
- }
-
- template<class T>
- inline void MapPtr(T*& field, size_t count, const size_t*& p, size_t& size)
- {
- if (size < count * sizeof(*field))
- throw Error("EOF reached while mapping Pire::SlowScanner");
- field = (T*) p;
- Impl::AdvancePtr(p, size, count * sizeof(*field));
- Impl::AlignPtr(p, size);
- }
-
- inline void CheckAlign(const void* ptr, size_t bound = sizeof(size_t))
- {
- if (!IsAligned(ptr, bound))
- throw Error("Tried to mmap scanner at misaligned address");
- }
-
+ }
+ };
+
+ namespace Impl {
+ inline const void* AdvancePtr(const size_t*& ptr, size_t& size, size_t delta)
+ {
+ ptr = (const size_t*) ((const char*) ptr + delta);
+ size -= delta;
+ return (const void*) ptr;
+ }
+
+ template<class T>
+ inline void MapPtr(T*& field, size_t count, const size_t*& p, size_t& size)
+ {
+ if (size < count * sizeof(*field))
+ throw Error("EOF reached while mapping Pire::SlowScanner");
+ field = (T*) p;
+ Impl::AdvancePtr(p, size, count * sizeof(*field));
+ Impl::AlignPtr(p, size);
+ }
+
+ inline void CheckAlign(const void* ptr, size_t bound = sizeof(size_t))
+ {
+ if (!IsAligned(ptr, bound))
+ throw Error("Tried to mmap scanner at misaligned address");
+ }
+
inline Header ValidateHeader(const size_t*& ptr, size_t& size, ui32 type, size_t hdrsize)
- {
- const Header* hdr;
- MapPtr(hdr, 1, ptr, size);
- hdr->Validate(type, hdrsize);
+ {
+ const Header* hdr;
+ MapPtr(hdr, 1, ptr, size);
+ hdr->Validate(type, hdrsize);
return *hdr;
- }
-
+ }
+
inline Header ValidateHeader(yistream* s, ui32 type, size_t hdrsize)
- {
+ {
Header hdr(ScannerIOTypes::NoScanner, 0);
- LoadPodType(s, hdr);
- AlignLoad(s, sizeof(hdr));
- hdr.Validate(type, hdrsize);
+ LoadPodType(s, hdr);
+ AlignLoad(s, sizeof(hdr));
+ hdr.Validate(type, hdrsize);
return hdr;
- }
- }
-}
-
-#endif
+ }
+ }
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 120dc403b7..7d5d6a50d7 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -1,108 +1,108 @@
-/*
- * loaded.h -- a definition of the LoadedScanner
+/*
+ * loaded.h -- a definition of the LoadedScanner
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_SCANNERS_LOADED_H
-#define PIRE_SCANNERS_LOADED_H
-
-#include <string.h>
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_SCANNERS_LOADED_H
+#define PIRE_SCANNERS_LOADED_H
+
+#include <string.h>
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/partition.h>
-#include "common.h"
-
-#ifdef PIRE_DEBUG
-#include <iostream>
-#endif
-
-namespace Pire {
-
-/**
-* A loaded scanner -- the deterministic scanner having actions
-* associated with states and transitions
-*
-* Not a complete scanner itself (hence abstract), this class provides
-* infrastructure for regexp-based algorithms (e.g. counts or captures),
-* supporting major part of scanner construction, (de)serialization,
-* mmap()-ing, etc.
-*
-* It is a good idea to override copy ctor, operator= and swap()
-* in subclasses to avoid mixing different scanner types in these methods.
-* Also please note that subclasses should not have any data members of thier own.
-*/
-class LoadedScanner {
-public:
- typedef ui8 Letter;
- typedef ui32 Action;
- typedef ui8 Tag;
-
- typedef size_t InternalState;
-
- union Transition {
- size_t raw; // alignment hint for compiler
- struct {
- ui32 shift;
- Action action;
- };
- };
-
- // Override in subclass, if neccessary
+#include "common.h"
+
+#ifdef PIRE_DEBUG
+#include <iostream>
+#endif
+
+namespace Pire {
+
+/**
+* A loaded scanner -- the deterministic scanner having actions
+* associated with states and transitions
+*
+* Not a complete scanner itself (hence abstract), this class provides
+* infrastructure for regexp-based algorithms (e.g. counts or captures),
+* supporting major part of scanner construction, (de)serialization,
+* mmap()-ing, etc.
+*
+* It is a good idea to override copy ctor, operator= and swap()
+* in subclasses to avoid mixing different scanner types in these methods.
+* Also please note that subclasses should not have any data members of thier own.
+*/
+class LoadedScanner {
+public:
+ typedef ui8 Letter;
+ typedef ui32 Action;
+ typedef ui8 Tag;
+
+ typedef size_t InternalState;
+
+ union Transition {
+ size_t raw; // alignment hint for compiler
+ struct {
+ ui32 shift;
+ Action action;
+ };
+ };
+
+ // Override in subclass, if neccessary
enum {
- FinalFlag = 0,
- DeadFlag = 0
- };
-
+ FinalFlag = 0,
+ DeadFlag = 0
+ };
+
static const size_t MAX_RE_COUNT = 16;
protected:
- LoadedScanner() { Alias(Null()); }
+ LoadedScanner() { Alias(Null()); }
- LoadedScanner(const LoadedScanner& s): m(s.m)
- {
- if (s.m_buffer) {
+ LoadedScanner(const LoadedScanner& s): m(s.m)
+ {
+ if (s.m_buffer) {
m_buffer = BufferType(new char [BufSize()]);
memcpy(m_buffer.Get(), s.m_buffer.Get(), BufSize());
Markup(m_buffer.Get());
- m.initial = (InternalState)m_jumps + (s.m.initial - (InternalState)s.m_jumps);
- } else {
- Alias(s);
- }
- }
-
- void Swap(LoadedScanner& s)
- {
- DoSwap(m_buffer, s.m_buffer);
- DoSwap(m.statesCount, s.m.statesCount);
- DoSwap(m.lettersCount, s.m.lettersCount);
- DoSwap(m.regexpsCount, s.m.regexpsCount);
- DoSwap(m.initial, s.m.initial);
- DoSwap(m_letters, s.m_letters);
- DoSwap(m_jumps, s.m_jumps);
- DoSwap(m_tags, s.m_tags);
- }
-
- LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; }
+ m.initial = (InternalState)m_jumps + (s.m.initial - (InternalState)s.m_jumps);
+ } else {
+ Alias(s);
+ }
+ }
+
+ void Swap(LoadedScanner& s)
+ {
+ DoSwap(m_buffer, s.m_buffer);
+ DoSwap(m.statesCount, s.m.statesCount);
+ DoSwap(m.lettersCount, s.m.lettersCount);
+ DoSwap(m.regexpsCount, s.m.regexpsCount);
+ DoSwap(m.initial, s.m.initial);
+ DoSwap(m_letters, s.m_letters);
+ DoSwap(m_jumps, s.m_jumps);
+ DoSwap(m_tags, s.m_tags);
+ }
+
+ LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; }
LoadedScanner (LoadedScanner&& other) : LoadedScanner() {
Swap(other);
}
@@ -110,14 +110,14 @@ protected:
Swap(other);
return *this;
}
-
-public:
- size_t Size() const { return m.statesCount; }
-
- bool Empty() const { return m_jumps == Null().m_jumps; }
-
- size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; }
-
+
+public:
+ size_t Size() const { return m.statesCount; }
+
+ bool Empty() const { return m_jumps == Null().m_jumps; }
+
+ size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; }
+
size_t LettersCount() const { return m.lettersCount; }
const void* Mmap(const void* ptr, size_t size) {
@@ -125,93 +125,93 @@ public:
}
const void* Mmap(const void* ptr, size_t size, ui32* type)
- {
- Impl::CheckAlign(ptr);
- LoadedScanner s;
- const size_t* p = reinterpret_cast<const size_t*>(ptr);
+ {
+ Impl::CheckAlign(ptr);
+ LoadedScanner s;
+ const size_t* p = reinterpret_cast<const size_t*>(ptr);
Header header = Impl::ValidateHeader(p, size, ScannerIOTypes::LoadedScanner, sizeof(s.m));
if (type) {
*type = header.Type;
}
+
+ Locals* locals;
+ Impl::MapPtr(locals, 1, p, size);
+ memcpy(&s.m, locals, sizeof(s.m));
- Locals* locals;
- Impl::MapPtr(locals, 1, p, size);
- memcpy(&s.m, locals, sizeof(s.m));
-
- Impl::MapPtr(s.m_letters, MaxChar, p, size);
- Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size);
+ Impl::MapPtr(s.m_letters, MaxChar, p, size);
+ Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size);
if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
Action* actions = 0;
Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size);
}
- Impl::MapPtr(s.m_tags, s.m.statesCount, p, size);
-
- s.m.initial += reinterpret_cast<size_t>(s.m_jumps);
- Swap(s);
-
- return (const void*) p;
- }
-
+ Impl::MapPtr(s.m_tags, s.m.statesCount, p, size);
+
+ s.m.initial += reinterpret_cast<size_t>(s.m_jumps);
+ Swap(s);
+
+ return (const void*) p;
+ }
+
void Save(yostream*, ui32 type) const;
- void Save(yostream*) const;
+ void Save(yostream*) const;
void Load(yistream*, ui32* type);
- void Load(yistream*);
-
- template<class Eq>
- void Init(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1)
- {
- m.statesCount = states;
- m.lettersCount = letters.Size();
- m.regexpsCount = regexpsCount;
+ void Load(yistream*);
+
+ template<class Eq>
+ void Init(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1)
+ {
+ m.statesCount = states;
+ m.lettersCount = letters.Size();
+ m.regexpsCount = regexpsCount;
m_buffer = BufferType(new char[BufSize()]);
memset(m_buffer.Get(), 0, BufSize());
Markup(m_buffer.Get());
-
- m.initial = reinterpret_cast<size_t>(m_jumps + startState * m.lettersCount);
-
- // Build letter translation table
+
+ m.initial = reinterpret_cast<size_t>(m_jumps + startState * m.lettersCount);
+
+ // Build letter translation table
Fill(m_letters, m_letters + MaxChar, 0);
for (auto&& letter : letters)
for (auto&& character : letter.second.second)
m_letters[character] = letter.second.first;
- }
-
+ }
+
size_t StateSize() const
{
return m.lettersCount * sizeof(*m_jumps);
}
-
+
size_t TransitionIndex(size_t state, Char c) const
{
return state * m.lettersCount + m_letters[c];
}
- void SetJump(size_t oldState, Char c, size_t newState, Action action)
- {
+ void SetJump(size_t oldState, Char c, size_t newState, Action action)
+ {
Y_ASSERT(m_buffer);
Y_ASSERT(oldState < m.statesCount);
Y_ASSERT(newState < m.statesCount);
-
+
size_t shift = (newState - oldState) * StateSize();
- Transition tr;
+ Transition tr;
tr.shift = (ui32)shift;
- tr.action = action;
+ tr.action = action;
m_jumps[TransitionIndex(oldState, c)] = tr;
- }
-
- Action RemapAction(Action action) { return action; }
-
+ }
+
+ Action RemapAction(Action action) { return action; }
+
void SetInitial(size_t state) { Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_jumps + state * m.lettersCount); }
void SetTag(size_t state, Tag tag) { Y_ASSERT(m_buffer); m_tags[state] = tag; }
- void FinishBuild() {}
-
- size_t StateIdx(InternalState s) const
- {
- return (reinterpret_cast<Transition*>(s) - m_jumps) / m.lettersCount;
- }
-
- i64 SignExtend(i32 i) const { return i; }
-
+ void FinishBuild() {}
+
+ size_t StateIdx(InternalState s) const
+ {
+ return (reinterpret_cast<Transition*>(s) - m_jumps) / m.lettersCount;
+ }
+
+ i64 SignExtend(i32 i) const { return i; }
+
size_t BufSize() const
{
return
@@ -221,74 +221,74 @@ public:
;
}
-protected:
-
+protected:
+
static const Action IncrementMask = (1 << MAX_RE_COUNT) - 1;
static const Action ResetMask = IncrementMask << MAX_RE_COUNT;
-
- // TODO: maybe, put fields in private section and provide data accessors
-
- struct Locals {
- ui32 statesCount;
- ui32 lettersCount;
- ui32 regexpsCount;
- size_t initial;
- } m;
-
+
+ // TODO: maybe, put fields in private section and provide data accessors
+
+ struct Locals {
+ ui32 statesCount;
+ ui32 lettersCount;
+ ui32 regexpsCount;
+ size_t initial;
+ } m;
+
using BufferType = TArrayHolder<char>;
BufferType m_buffer;
-
- Letter* m_letters;
- Transition* m_jumps;
- Tag* m_tags;
-
- virtual ~LoadedScanner();
-
-private:
+
+ Letter* m_letters;
+ Transition* m_jumps;
+ Tag* m_tags;
+
+ virtual ~LoadedScanner();
+
+private:
explicit LoadedScanner(Fsm& fsm, size_t distance = 0)
- {
+ {
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
- fsm.Canonize();
- Init(fsm.Size(), fsm.Letters(), fsm.Initial());
- BuildScanner(fsm, *this);
- }
-
- inline static const LoadedScanner& Null()
- {
- static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>();
- return n;
- }
-
- void Markup(void* buf)
- {
- m_letters = reinterpret_cast<Letter*>(buf);
- m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar);
+ fsm.Canonize();
+ Init(fsm.Size(), fsm.Letters(), fsm.Initial());
+ BuildScanner(fsm, *this);
+ }
+
+ inline static const LoadedScanner& Null()
+ {
+ static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>();
+ return n;
+ }
+
+ void Markup(void* buf)
+ {
+ m_letters = reinterpret_cast<Letter*>(buf);
+ m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar);
m_tags = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount);
- }
-
- void Alias(const LoadedScanner& s)
- {
- memcpy(&m, &s.m, sizeof(m));
- m_buffer = 0;
- m_letters = s.m_letters;
- m_jumps = s.m_jumps;
- m_tags = s.m_tags;
- }
-
- template<class Eq>
- LoadedScanner(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1)
- {
- Init(states, letters, startState, regexpsCount);
- }
-
+ }
+
+ void Alias(const LoadedScanner& s)
+ {
+ memcpy(&m, &s.m, sizeof(m));
+ m_buffer = 0;
+ m_letters = s.m_letters;
+ m_jumps = s.m_jumps;
+ m_tags = s.m_tags;
+ }
+
+ template<class Eq>
+ LoadedScanner(size_t states, const Partition<Char, Eq>& letters, size_t startState, size_t regexpsCount = 1)
+ {
+ Init(states, letters, startState, regexpsCount);
+ }
+
friend class Fsm;
-};
+};
inline LoadedScanner::~LoadedScanner() = default;
-
-}
-
-
-#endif
+
+}
+
+
+#endif
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index 29679e416e..8b6c537836 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -1,31 +1,31 @@
-/*
- * multi.h -- definition of the Scanner
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_SCANNERS_MULTI_H
-#define PIRE_SCANNERS_MULTI_H
-
+/*
+ * multi.h -- definition of the Scanner
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_SCANNERS_MULTI_H
+#define PIRE_SCANNERS_MULTI_H
+
#include <cstring>
-#include <string.h>
+#include <string.h>
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/partition.h>
@@ -38,1094 +38,1094 @@
#include <contrib/libs/pire/pire/stub/saveload.h>
#include <contrib/libs/pire/pire/stub/lexical_cast.h>
-#include "common.h"
-
-namespace Pire {
-
-namespace Impl {
-
- inline static ssize_t SignExtend(i32 i) { return i; }
- template<class T>
- class ScannerGlueCommon;
-
- template<class T>
- class ScannerGlueTask;
-
- // This strategy allows to mmap() saved representation of a scanner. This is achieved by
- // storing shifts instead of addresses in the transition table.
- struct Relocatable {
- static const size_t Signature = 1;
- // Please note that Transition size is hardcoded as 32 bits.
- // This limits size of transition table to 4G, but compresses
- // it twice compared to 64-bit transitions. In future Transition
- // can be made a template parameter if this is a concern.
- typedef ui32 Transition;
-
- typedef const void* RetvalForMmap;
-
- static size_t Go(size_t state, Transition shift) { return state + SignExtend(shift); }
- static Transition Diff(size_t from, size_t to) { return static_cast<Transition>(to - from); }
- };
-
- // With this strategy the transition table stores addresses. This makes the scanner faster
- // compared to mmap()-ed
- struct Nonrelocatable {
- static const size_t Signature = 2;
- typedef size_t Transition;
-
- // Generates a compile-time error if Scanner<Nonrelocatable>::Mmap()
- // (which is unsupported) is mistakenly called
- typedef struct {} RetvalForMmap;
-
- static size_t Go(size_t /*state*/, Transition shift) { return shift; }
- static Transition Diff(size_t /*from*/, size_t to) { return to; }
- };
-
-
+#include "common.h"
+
+namespace Pire {
+
+namespace Impl {
+
+ inline static ssize_t SignExtend(i32 i) { return i; }
+ template<class T>
+ class ScannerGlueCommon;
+
+ template<class T>
+ class ScannerGlueTask;
+
+ // This strategy allows to mmap() saved representation of a scanner. This is achieved by
+ // storing shifts instead of addresses in the transition table.
+ struct Relocatable {
+ static const size_t Signature = 1;
+ // Please note that Transition size is hardcoded as 32 bits.
+ // This limits size of transition table to 4G, but compresses
+ // it twice compared to 64-bit transitions. In future Transition
+ // can be made a template parameter if this is a concern.
+ typedef ui32 Transition;
+
+ typedef const void* RetvalForMmap;
+
+ static size_t Go(size_t state, Transition shift) { return state + SignExtend(shift); }
+ static Transition Diff(size_t from, size_t to) { return static_cast<Transition>(to - from); }
+ };
+
+ // With this strategy the transition table stores addresses. This makes the scanner faster
+ // compared to mmap()-ed
+ struct Nonrelocatable {
+ static const size_t Signature = 2;
+ typedef size_t Transition;
+
+ // Generates a compile-time error if Scanner<Nonrelocatable>::Mmap()
+ // (which is unsupported) is mistakenly called
+ typedef struct {} RetvalForMmap;
+
+ static size_t Go(size_t /*state*/, Transition shift) { return shift; }
+ static Transition Diff(size_t /*from*/, size_t to) { return to; }
+ };
+
+
// Scanner implementation parametrized by
-// - transition table representation strategy
-// - strategy for fast forwarding through memory ranges
-template<class Relocation, class Shortcutting>
-class Scanner {
-protected:
- enum {
- FinalFlag = 1,
- DeadFlag = 2,
- Flags = FinalFlag | DeadFlag
- };
-
- static const size_t End = static_cast<size_t>(-1);
-
-public:
- typedef typename Relocation::Transition Transition;
-
- typedef ui16 Letter;
- typedef ui32 Action;
- typedef ui8 Tag;
-
- /// Some properties of the particular state.
- struct CommonRowHeader {
- size_t Flags; ///< Holds FinalFlag, DeadFlag, etc...
-
- CommonRowHeader(): Flags(0) {}
-
- template <class OtherCommonRowHeader>
- CommonRowHeader& operator =(const OtherCommonRowHeader& other)
- {
- Flags = other.Flags;
- return *this;
- }
- };
-
- typedef typename Shortcutting::template ExtendedRowHeader<Scanner> ScannerRowHeader;
-
- Scanner() { Alias(Null()); }
+// - transition table representation strategy
+// - strategy for fast forwarding through memory ranges
+template<class Relocation, class Shortcutting>
+class Scanner {
+protected:
+ enum {
+ FinalFlag = 1,
+ DeadFlag = 2,
+ Flags = FinalFlag | DeadFlag
+ };
+
+ static const size_t End = static_cast<size_t>(-1);
+
+public:
+ typedef typename Relocation::Transition Transition;
+
+ typedef ui16 Letter;
+ typedef ui32 Action;
+ typedef ui8 Tag;
+
+ /// Some properties of the particular state.
+ struct CommonRowHeader {
+ size_t Flags; ///< Holds FinalFlag, DeadFlag, etc...
+
+ CommonRowHeader(): Flags(0) {}
+
+ template <class OtherCommonRowHeader>
+ CommonRowHeader& operator =(const OtherCommonRowHeader& other)
+ {
+ Flags = other.Flags;
+ return *this;
+ }
+ };
+
+ typedef typename Shortcutting::template ExtendedRowHeader<Scanner> ScannerRowHeader;
+
+ Scanner() { Alias(Null()); }
explicit Scanner(Fsm& fsm, size_t distance = 0)
- {
+ {
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
- fsm.Canonize();
- Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1);
- BuildScanner(fsm, *this);
- }
-
-
- size_t Size() const { return m.statesCount; }
- bool Empty() const { return m_transitions == Null().m_transitions; }
-
- typedef size_t State;
-
- size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; }
- size_t LettersCount() const { return m.lettersCount; }
-
- /// Checks whether specified state is in any of the final sets
- bool Final(const State& state) const { return (Header(state).Common.Flags & FinalFlag) != 0; }
-
- /// Checks whether specified state is 'dead' (i.e. scanner will never
- /// reach any final state from current one)
- bool Dead(const State& state) const { return (Header(state).Common.Flags & DeadFlag) != 0; }
-
- ypair<const size_t*, const size_t*> AcceptedRegexps(const State& state) const
- {
- size_t idx = (state - reinterpret_cast<size_t>(m_transitions)) /
- (RowSize() * sizeof(Transition));
- const size_t* b = m_final + m_finalIndex[idx];
- const size_t* e = b;
- while (*e != End)
- ++e;
- return ymake_pair(b, e);
- }
-
- /// Returns an initial state for this scanner
- void Initialize(State& state) const { state = m.initial; }
-
+ fsm.Canonize();
+ Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1);
+ BuildScanner(fsm, *this);
+ }
+
+
+ size_t Size() const { return m.statesCount; }
+ bool Empty() const { return m_transitions == Null().m_transitions; }
+
+ typedef size_t State;
+
+ size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; }
+ size_t LettersCount() const { return m.lettersCount; }
+
+ /// Checks whether specified state is in any of the final sets
+ bool Final(const State& state) const { return (Header(state).Common.Flags & FinalFlag) != 0; }
+
+ /// Checks whether specified state is 'dead' (i.e. scanner will never
+ /// reach any final state from current one)
+ bool Dead(const State& state) const { return (Header(state).Common.Flags & DeadFlag) != 0; }
+
+ ypair<const size_t*, const size_t*> AcceptedRegexps(const State& state) const
+ {
+ size_t idx = (state - reinterpret_cast<size_t>(m_transitions)) /
+ (RowSize() * sizeof(Transition));
+ const size_t* b = m_final + m_finalIndex[idx];
+ const size_t* e = b;
+ while (*e != End)
+ ++e;
+ return ymake_pair(b, e);
+ }
+
+ /// Returns an initial state for this scanner
+ void Initialize(State& state) const { state = m.initial; }
+
Char Translate(Char ch) const
- {
+ {
return m_letters[static_cast<size_t>(ch)];
}
/// Handles one letter
Action NextTranslated(State& state, Char letter) const
{
- PIRE_IFDEBUG(
+ PIRE_IFDEBUG(
Y_ASSERT(state >= (size_t)m_transitions);
Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size()));
Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0);
- );
-
+ );
+
state = Relocation::Go(state, reinterpret_cast<const Transition*>(state)[letter]);
-
- PIRE_IFDEBUG(
+
+ PIRE_IFDEBUG(
Y_ASSERT(state >= (size_t)m_transitions);
Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size()));
Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0);
- );
-
- return 0;
- }
-
+ );
+
+ return 0;
+ }
+
/// Handles one character
Action Next(State& state, Char c) const
{
return NextTranslated(state, Translate(c));
}
- void TakeAction(State&, Action) const {}
-
+ void TakeAction(State&, Action) const {}
+
Scanner(const Scanner& s): m(s.m)
- {
- if (!s.m_buffer) {
- // Empty or mmap()-ed scanner
- Alias(s);
- } else {
- // In-memory scanner
- DeepCopy(s);
- }
- }
-
+ {
+ if (!s.m_buffer) {
+ // Empty or mmap()-ed scanner
+ Alias(s);
+ } else {
+ // In-memory scanner
+ DeepCopy(s);
+ }
+ }
+
Scanner(Scanner&& s)
{
Alias(Null());
Swap(s);
}
- template<class AnotherRelocation>
+ template<class AnotherRelocation>
Scanner(const Scanner<AnotherRelocation, Shortcutting>& s)
- {
- if (s.Empty())
- Alias(Null());
- else
- DeepCopy(s);
- }
-
- void Swap(Scanner& s)
- {
+ {
+ if (s.Empty())
+ Alias(Null());
+ else
+ DeepCopy(s);
+ }
+
+ void Swap(Scanner& s)
+ {
Y_ASSERT(m.relocationSignature == s.m.relocationSignature);
Y_ASSERT(m.shortcuttingSignature == s.m.shortcuttingSignature);
- DoSwap(m_buffer, s.m_buffer);
- DoSwap(m.statesCount, s.m.statesCount);
- DoSwap(m.lettersCount, s.m.lettersCount);
- DoSwap(m.regexpsCount, s.m.regexpsCount);
- DoSwap(m.initial, s.m.initial);
- DoSwap(m_letters, s.m_letters);
- DoSwap(m.finalTableSize, s.m.finalTableSize);
- DoSwap(m_final, s.m_final);
- DoSwap(m_finalIndex, s.m_finalIndex);
- DoSwap(m_transitions, s.m_transitions);
- }
-
- Scanner& operator = (const Scanner& s) { Scanner(s).Swap(*this); return *this; }
-
- /*
- * Constructs the scanner from mmap()-ed memory range, returning a pointer
- * to unconsumed part of the buffer.
- */
- typename Relocation::RetvalForMmap Mmap(const void* ptr, size_t size)
- {
- Impl::CheckAlign(ptr, sizeof(size_t));
- Scanner s;
-
- const size_t* p = reinterpret_cast<const size_t*>(ptr);
+ DoSwap(m_buffer, s.m_buffer);
+ DoSwap(m.statesCount, s.m.statesCount);
+ DoSwap(m.lettersCount, s.m.lettersCount);
+ DoSwap(m.regexpsCount, s.m.regexpsCount);
+ DoSwap(m.initial, s.m.initial);
+ DoSwap(m_letters, s.m_letters);
+ DoSwap(m.finalTableSize, s.m.finalTableSize);
+ DoSwap(m_final, s.m_final);
+ DoSwap(m_finalIndex, s.m_finalIndex);
+ DoSwap(m_transitions, s.m_transitions);
+ }
+
+ Scanner& operator = (const Scanner& s) { Scanner(s).Swap(*this); return *this; }
+
+ /*
+ * Constructs the scanner from mmap()-ed memory range, returning a pointer
+ * to unconsumed part of the buffer.
+ */
+ typename Relocation::RetvalForMmap Mmap(const void* ptr, size_t size)
+ {
+ Impl::CheckAlign(ptr, sizeof(size_t));
+ Scanner s;
+
+ const size_t* p = reinterpret_cast<const size_t*>(ptr);
Impl::ValidateHeader(p, size, ScannerIOTypes::Scanner, sizeof(m));
- if (size < sizeof(s.m))
- throw Error("EOF reached while mapping Pire::Scanner");
-
- memcpy(&s.m, p, sizeof(s.m));
- if (s.m.relocationSignature != Relocation::Signature)
- throw Error("Type mismatch while mmapping Pire::Scanner");
- Impl::AdvancePtr(p, size, sizeof(s.m));
- Impl::AlignPtr(p, size);
-
- if (Shortcutting::Signature != s.m.shortcuttingSignature)
- throw Error("This scanner has different shortcutting type");
-
- bool empty = *((const bool*) p);
- Impl::AdvancePtr(p, size, sizeof(empty));
- Impl::AlignPtr(p, size);
-
- if (empty)
- s.Alias(Null());
- else {
- if (size < s.BufSize())
- throw Error("EOF reached while mapping NPire::Scanner");
- s.Markup(const_cast<size_t*>(p));
- Impl::AdvancePtr(p, size, s.BufSize());
- s.m.initial += reinterpret_cast<size_t>(s.m_transitions);
- }
-
- Swap(s);
- return Impl::AlignPtr(p, size);
- }
-
- size_t StateIndex(State s) const
- {
- return (s - reinterpret_cast<size_t>(m_transitions)) / (RowSize() * sizeof(Transition));
- }
-
- /**
- * Agglutinates two scanners together, producing a larger scanner.
- * Checkig a string against that scanner effectively checks them against both agglutinated regexps
- * (detailed information about matched regexps can be obtained with AcceptedRegexps()).
- *
- * Returns default-constructed scanner in case of failure
- * (consult Scanner::Empty() to find out whether the operation was successful).
- */
- static Scanner Glue(const Scanner& a, const Scanner& b, size_t maxSize = 0);
-
- // Returns the size of the memory buffer used (or required) by scanner.
- size_t BufSize() const
- {
- return AlignUp(
- MaxChar * sizeof(Letter) // Letters translation table
- + m.finalTableSize * sizeof(size_t) // Final table
- + m.statesCount * sizeof(size_t) // Final index
- + RowSize() * m.statesCount * sizeof(Transition), // Transitions table
- sizeof(size_t));
- }
-
- void Save(yostream*) const;
- void Load(yistream*);
-
- ScannerRowHeader& Header(State s) { return *(ScannerRowHeader*) s; }
- const ScannerRowHeader& Header(State s) const { return *(const ScannerRowHeader*) s; }
-
+ if (size < sizeof(s.m))
+ throw Error("EOF reached while mapping Pire::Scanner");
+
+ memcpy(&s.m, p, sizeof(s.m));
+ if (s.m.relocationSignature != Relocation::Signature)
+ throw Error("Type mismatch while mmapping Pire::Scanner");
+ Impl::AdvancePtr(p, size, sizeof(s.m));
+ Impl::AlignPtr(p, size);
+
+ if (Shortcutting::Signature != s.m.shortcuttingSignature)
+ throw Error("This scanner has different shortcutting type");
+
+ bool empty = *((const bool*) p);
+ Impl::AdvancePtr(p, size, sizeof(empty));
+ Impl::AlignPtr(p, size);
+
+ if (empty)
+ s.Alias(Null());
+ else {
+ if (size < s.BufSize())
+ throw Error("EOF reached while mapping NPire::Scanner");
+ s.Markup(const_cast<size_t*>(p));
+ Impl::AdvancePtr(p, size, s.BufSize());
+ s.m.initial += reinterpret_cast<size_t>(s.m_transitions);
+ }
+
+ Swap(s);
+ return Impl::AlignPtr(p, size);
+ }
+
+ size_t StateIndex(State s) const
+ {
+ return (s - reinterpret_cast<size_t>(m_transitions)) / (RowSize() * sizeof(Transition));
+ }
+
+ /**
+ * Agglutinates two scanners together, producing a larger scanner.
+ * Checkig a string against that scanner effectively checks them against both agglutinated regexps
+ * (detailed information about matched regexps can be obtained with AcceptedRegexps()).
+ *
+ * Returns default-constructed scanner in case of failure
+ * (consult Scanner::Empty() to find out whether the operation was successful).
+ */
+ static Scanner Glue(const Scanner& a, const Scanner& b, size_t maxSize = 0);
+
+ // Returns the size of the memory buffer used (or required) by scanner.
+ size_t BufSize() const
+ {
+ return AlignUp(
+ MaxChar * sizeof(Letter) // Letters translation table
+ + m.finalTableSize * sizeof(size_t) // Final table
+ + m.statesCount * sizeof(size_t) // Final index
+ + RowSize() * m.statesCount * sizeof(Transition), // Transitions table
+ sizeof(size_t));
+ }
+
+ void Save(yostream*) const;
+ void Load(yistream*);
+
+ ScannerRowHeader& Header(State s) { return *(ScannerRowHeader*) s; }
+ const ScannerRowHeader& Header(State s) const { return *(const ScannerRowHeader*) s; }
+
protected:
-
- struct Locals {
- ui32 statesCount;
- ui32 lettersCount;
- ui32 regexpsCount;
- size_t initial;
- ui32 finalTableSize;
- size_t relocationSignature;
- size_t shortcuttingSignature;
- } m;
-
+
+ struct Locals {
+ ui32 statesCount;
+ ui32 lettersCount;
+ ui32 regexpsCount;
+ size_t initial;
+ ui32 finalTableSize;
+ size_t relocationSignature;
+ size_t shortcuttingSignature;
+ } m;
+
using BufferType = TArrayHolder<char>;
BufferType m_buffer;
- Letter* m_letters;
-
- size_t* m_final;
- size_t* m_finalIndex;
-
- Transition* m_transitions;
-
- inline static const Scanner& Null()
- {
- static const Scanner n = Fsm::MakeFalse().Compile< Scanner<Relocation, Shortcutting> >();
+ Letter* m_letters;
+
+ size_t* m_final;
+ size_t* m_finalIndex;
+
+ Transition* m_transitions;
+
+ inline static const Scanner& Null()
+ {
+ static const Scanner n = Fsm::MakeFalse().Compile< Scanner<Relocation, Shortcutting> >();
return n;
- }
-
- // Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord)
- size_t RowSize() const { return AlignUp(m.lettersCount + HEADER_SIZE, sizeof(MaxSizeWord)/sizeof(Transition)); }
-
- static const size_t HEADER_SIZE = sizeof(ScannerRowHeader) / sizeof(Transition);
- PIRE_STATIC_ASSERT(sizeof(ScannerRowHeader) % sizeof(Transition) == 0);
-
- template<class Eq>
- void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1)
- {
+ }
+
+ // Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord)
+ size_t RowSize() const { return AlignUp(m.lettersCount + HEADER_SIZE, sizeof(MaxSizeWord)/sizeof(Transition)); }
+
+ static const size_t HEADER_SIZE = sizeof(ScannerRowHeader) / sizeof(Transition);
+ PIRE_STATIC_ASSERT(sizeof(ScannerRowHeader) % sizeof(Transition) == 0);
+
+ template<class Eq>
+ void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1)
+ {
std::memset(&m, 0, sizeof(m));
- m.relocationSignature = Relocation::Signature;
- m.shortcuttingSignature = Shortcutting::Signature;
- m.statesCount = states;
- m.lettersCount = letters.Size();
- m.regexpsCount = regexpsCount;
- m.finalTableSize = finalStatesCount + states;
-
+ m.relocationSignature = Relocation::Signature;
+ m.shortcuttingSignature = Shortcutting::Signature;
+ m.statesCount = states;
+ m.lettersCount = letters.Size();
+ m.regexpsCount = regexpsCount;
+ m.finalTableSize = finalStatesCount + states;
+
m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]);
memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t));
Markup(AlignUp(m_buffer.Get(), sizeof(size_t)));
-
- for (size_t i = 0; i != Size(); ++i)
- Header(IndexToState(i)) = ScannerRowHeader();
-
- m.initial = reinterpret_cast<size_t>(m_transitions + startState * RowSize());
-
- // Build letter translation table
+
+ for (size_t i = 0; i != Size(); ++i)
+ Header(IndexToState(i)) = ScannerRowHeader();
+
+ m.initial = reinterpret_cast<size_t>(m_transitions + startState * RowSize());
+
+ // Build letter translation table
for (auto&& letter : letters)
for (auto&& character : letter.second.second)
m_letters[character] = letter.second.first + HEADER_SIZE;
- }
-
- /*
- * Initializes pointers depending on buffer start, letters and states count
- */
- void Markup(void* ptr)
- {
- Impl::CheckAlign(ptr, sizeof(size_t));
- m_letters = reinterpret_cast<Letter*>(ptr);
- m_final = reinterpret_cast<size_t*>(m_letters + MaxChar);
- m_finalIndex = reinterpret_cast<size_t*>(m_final + m.finalTableSize);
- m_transitions = reinterpret_cast<Transition*>(m_finalIndex + m.statesCount);
- }
-
- // Makes a shallow ("weak") copy of the given scanner.
- // The copied scanner does not maintain lifetime of the original's entrails.
- void Alias(const Scanner<Relocation, Shortcutting>& s)
- {
- memcpy(&m, &s.m, sizeof(m));
+ }
+
+ /*
+ * Initializes pointers depending on buffer start, letters and states count
+ */
+ void Markup(void* ptr)
+ {
+ Impl::CheckAlign(ptr, sizeof(size_t));
+ m_letters = reinterpret_cast<Letter*>(ptr);
+ m_final = reinterpret_cast<size_t*>(m_letters + MaxChar);
+ m_finalIndex = reinterpret_cast<size_t*>(m_final + m.finalTableSize);
+ m_transitions = reinterpret_cast<Transition*>(m_finalIndex + m.statesCount);
+ }
+
+ // Makes a shallow ("weak") copy of the given scanner.
+ // The copied scanner does not maintain lifetime of the original's entrails.
+ void Alias(const Scanner<Relocation, Shortcutting>& s)
+ {
+ memcpy(&m, &s.m, sizeof(m));
m_buffer.Reset();
- m_letters = s.m_letters;
- m_final = s.m_final;
- m_finalIndex = s.m_finalIndex;
- m_transitions = s.m_transitions;
- }
-
- template<class AnotherRelocation>
- void DeepCopy(const Scanner<AnotherRelocation, Shortcutting>& s)
- {
- // Don't want memory leaks, but we cannot free the buffer because there might be aliased instances
+ m_letters = s.m_letters;
+ m_final = s.m_final;
+ m_finalIndex = s.m_finalIndex;
+ m_transitions = s.m_transitions;
+ }
+
+ template<class AnotherRelocation>
+ void DeepCopy(const Scanner<AnotherRelocation, Shortcutting>& s)
+ {
+ // Don't want memory leaks, but we cannot free the buffer because there might be aliased instances
Y_ASSERT(m_buffer == nullptr);
-
- // Ensure that specializations of Scanner across different Relocations do not touch its Locals
+
+ // Ensure that specializations of Scanner across different Relocations do not touch its Locals
static_assert(sizeof(m) == sizeof(s.m), "sizeof(m) == sizeof(s.m)");
- memcpy(&m, &s.m, sizeof(s.m));
- m.relocationSignature = Relocation::Signature;
- m.shortcuttingSignature = Shortcutting::Signature;
+ memcpy(&m, &s.m, sizeof(s.m));
+ m.relocationSignature = Relocation::Signature;
+ m.shortcuttingSignature = Shortcutting::Signature;
m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]);
std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t));
Markup(AlignUp(m_buffer.Get(), sizeof(size_t)));
-
- // Values in letter-to-leterclass table take into account row header size
- for (size_t c = 0; c < MaxChar; ++c) {
- m_letters[c] = s.m_letters[c] - s.HEADER_SIZE + HEADER_SIZE;
+
+ // Values in letter-to-leterclass table take into account row header size
+ for (size_t c = 0; c < MaxChar; ++c) {
+ m_letters[c] = s.m_letters[c] - s.HEADER_SIZE + HEADER_SIZE;
Y_ASSERT(c == Epsilon || m_letters[c] >= HEADER_SIZE);
Y_ASSERT(c == Epsilon || m_letters[c] < RowSize());
- }
- memcpy(m_final, s.m_final, m.finalTableSize * sizeof(*m_final));
- memcpy(m_finalIndex, s.m_finalIndex, m.statesCount * sizeof(*m_finalIndex));
-
- m.initial = IndexToState(s.StateIndex(s.m.initial));
-
- for (size_t st = 0; st != m.statesCount; ++st) {
- size_t oldstate = s.IndexToState(st);
- size_t newstate = IndexToState(st);
- Header(newstate) = s.Header(oldstate);
- const typename Scanner<AnotherRelocation, Shortcutting>::Transition* os
- = reinterpret_cast<const typename Scanner<AnotherRelocation, Shortcutting>::Transition*>(oldstate);
- Transition* ns = reinterpret_cast<Transition*>(newstate);
-
- for (size_t let = 0; let != LettersCount(); ++let) {
- size_t destIndex = s.StateIndex(AnotherRelocation::Go(oldstate, os[let + s.HEADER_SIZE]));
- Transition tr = Relocation::Diff(newstate, IndexToState(destIndex));
- ns[let + HEADER_SIZE] = tr;
+ }
+ memcpy(m_final, s.m_final, m.finalTableSize * sizeof(*m_final));
+ memcpy(m_finalIndex, s.m_finalIndex, m.statesCount * sizeof(*m_finalIndex));
+
+ m.initial = IndexToState(s.StateIndex(s.m.initial));
+
+ for (size_t st = 0; st != m.statesCount; ++st) {
+ size_t oldstate = s.IndexToState(st);
+ size_t newstate = IndexToState(st);
+ Header(newstate) = s.Header(oldstate);
+ const typename Scanner<AnotherRelocation, Shortcutting>::Transition* os
+ = reinterpret_cast<const typename Scanner<AnotherRelocation, Shortcutting>::Transition*>(oldstate);
+ Transition* ns = reinterpret_cast<Transition*>(newstate);
+
+ for (size_t let = 0; let != LettersCount(); ++let) {
+ size_t destIndex = s.StateIndex(AnotherRelocation::Go(oldstate, os[let + s.HEADER_SIZE]));
+ Transition tr = Relocation::Diff(newstate, IndexToState(destIndex));
+ ns[let + HEADER_SIZE] = tr;
Y_ASSERT(Relocation::Go(newstate, tr) >= (size_t)m_transitions);
Y_ASSERT(Relocation::Go(newstate, tr) < (size_t)(m_transitions + RowSize()*Size()));
- }
- }
- }
-
-
- size_t IndexToState(size_t stateIndex) const
- {
- return reinterpret_cast<size_t>(m_transitions + stateIndex * RowSize());
- }
-
- void SetJump(size_t oldState, Char c, size_t newState, unsigned long /*payload*/ = 0)
- {
+ }
+ }
+ }
+
+
+ size_t IndexToState(size_t stateIndex) const
+ {
+ return reinterpret_cast<size_t>(m_transitions + stateIndex * RowSize());
+ }
+
+ void SetJump(size_t oldState, Char c, size_t newState, unsigned long /*payload*/ = 0)
+ {
Y_ASSERT(m_buffer);
Y_ASSERT(oldState < m.statesCount);
Y_ASSERT(newState < m.statesCount);
-
- m_transitions[oldState * RowSize() + m_letters[c]]
- = Relocation::Diff(IndexToState(oldState), IndexToState(newState));
- }
-
- unsigned long RemapAction(unsigned long action) { return action; }
-
- void SetInitial(size_t state)
- {
+
+ m_transitions[oldState * RowSize() + m_letters[c]]
+ = Relocation::Diff(IndexToState(oldState), IndexToState(newState));
+ }
+
+ unsigned long RemapAction(unsigned long action) { return action; }
+
+ void SetInitial(size_t state)
+ {
Y_ASSERT(m_buffer);
- m.initial = IndexToState(state);
- }
-
- void SetTag(size_t state, size_t value)
- {
+ m.initial = IndexToState(state);
+ }
+
+ void SetTag(size_t state, size_t value)
+ {
Y_ASSERT(m_buffer);
- Header(IndexToState(state)).Common.Flags = value;
- }
-
- // Fill shortcut masks for all the states
- void BuildShortcuts()
- {
+ Header(IndexToState(state)).Common.Flags = value;
+ }
+
+ // Fill shortcut masks for all the states
+ void BuildShortcuts()
+ {
Y_ASSERT(m_buffer);
-
- // Build the mapping from letter classes to characters
+
+ // Build the mapping from letter classes to characters
TVector< TVector<char> > letters(RowSize());
- for (unsigned ch = 0; ch != 1 << (sizeof(char)*8); ++ch)
- letters[m_letters[ch]].push_back(ch);
-
- // Loop through all states in the transition table and
- // check if it is possible to setup shortcuts
- for (size_t i = 0; i != Size(); ++i) {
- State st = IndexToState(i);
- ScannerRowHeader& header = Header(st);
- Shortcutting::SetNoExit(header);
- size_t ind = 0;
- size_t let = HEADER_SIZE;
- for (; let != LettersCount() + HEADER_SIZE; ++let) {
- // Check if the transition is not the same state
- if (Relocation::Go(st, reinterpret_cast<const Transition*>(st)[let]) != st) {
- if (ind + letters[let].size() > Shortcutting::ExitMaskCount)
- break;
- // For each character setup a mask
+ for (unsigned ch = 0; ch != 1 << (sizeof(char)*8); ++ch)
+ letters[m_letters[ch]].push_back(ch);
+
+ // Loop through all states in the transition table and
+ // check if it is possible to setup shortcuts
+ for (size_t i = 0; i != Size(); ++i) {
+ State st = IndexToState(i);
+ ScannerRowHeader& header = Header(st);
+ Shortcutting::SetNoExit(header);
+ size_t ind = 0;
+ size_t let = HEADER_SIZE;
+ for (; let != LettersCount() + HEADER_SIZE; ++let) {
+ // Check if the transition is not the same state
+ if (Relocation::Go(st, reinterpret_cast<const Transition*>(st)[let]) != st) {
+ if (ind + letters[let].size() > Shortcutting::ExitMaskCount)
+ break;
+ // For each character setup a mask
for (auto&& character : letters[let]) {
Shortcutting::SetMask(header, ind, character);
- ++ind;
- }
- }
- }
-
- if (let != LettersCount() + HEADER_SIZE) {
- // Not enough space in ExitMasks, so reset all masks (which leads to bypassing the optimization)
- Shortcutting::SetNoShortcut(header);
- }
- // Fill the rest of the shortcut masks with the last used mask
- Shortcutting::FinishMasks(header, ind);
- }
- }
-
- // Fills final states table and builds shortcuts if possible
- void FinishBuild()
- {
+ ++ind;
+ }
+ }
+ }
+
+ if (let != LettersCount() + HEADER_SIZE) {
+ // Not enough space in ExitMasks, so reset all masks (which leads to bypassing the optimization)
+ Shortcutting::SetNoShortcut(header);
+ }
+ // Fill the rest of the shortcut masks with the last used mask
+ Shortcutting::FinishMasks(header, ind);
+ }
+ }
+
+ // Fills final states table and builds shortcuts if possible
+ void FinishBuild()
+ {
Y_ASSERT(m_buffer);
auto finalWriter = m_final;
- for (size_t state = 0; state != Size(); ++state) {
+ for (size_t state = 0; state != Size(); ++state) {
m_finalIndex[state] = finalWriter - m_final;
- if (Header(IndexToState(state)).Common.Flags & FinalFlag)
+ if (Header(IndexToState(state)).Common.Flags & FinalFlag)
*finalWriter++ = 0;
*finalWriter++ = static_cast<size_t>(-1);
- }
- BuildShortcuts();
- }
-
- size_t AcceptedRegexpsCount(size_t idx) const
- {
- const size_t* b = m_final + m_finalIndex[idx];
- const size_t* e = b;
- while (*e != End)
- ++e;
- return e - b;
- }
-
- template <class Scanner>
- friend void Pire::BuildScanner(const Fsm&, Scanner&);
-
- typedef State InternalState; // Needed for agglutination
- friend class ScannerGlueCommon<Scanner>;
- friend class ScannerGlueTask<Scanner>;
-
- template<class AnotherRelocation, class AnotherShortcutting>
- friend class Scanner;
-
- friend struct ScannerSaver;
-
-#ifndef PIRE_DEBUG
- friend struct AlignedRunner< Scanner<Relocation, Shortcutting> >;
-#endif
-};
-
-// Helper class for Save/Load partial specialization
-struct ScannerSaver {
- template<class Shortcutting>
- static void SaveScanner(const Scanner<Relocatable, Shortcutting>& scanner, yostream* s)
- {
- typedef Scanner<Relocatable, Shortcutting> ScannerType;
-
- typename ScannerType::Locals mc = scanner.m;
- mc.initial -= reinterpret_cast<size_t>(scanner.m_transitions);
+ }
+ BuildShortcuts();
+ }
+
+ size_t AcceptedRegexpsCount(size_t idx) const
+ {
+ const size_t* b = m_final + m_finalIndex[idx];
+ const size_t* e = b;
+ while (*e != End)
+ ++e;
+ return e - b;
+ }
+
+ template <class Scanner>
+ friend void Pire::BuildScanner(const Fsm&, Scanner&);
+
+ typedef State InternalState; // Needed for agglutination
+ friend class ScannerGlueCommon<Scanner>;
+ friend class ScannerGlueTask<Scanner>;
+
+ template<class AnotherRelocation, class AnotherShortcutting>
+ friend class Scanner;
+
+ friend struct ScannerSaver;
+
+#ifndef PIRE_DEBUG
+ friend struct AlignedRunner< Scanner<Relocation, Shortcutting> >;
+#endif
+};
+
+// Helper class for Save/Load partial specialization
+struct ScannerSaver {
+ template<class Shortcutting>
+ static void SaveScanner(const Scanner<Relocatable, Shortcutting>& scanner, yostream* s)
+ {
+ typedef Scanner<Relocatable, Shortcutting> ScannerType;
+
+ typename ScannerType::Locals mc = scanner.m;
+ mc.initial -= reinterpret_cast<size_t>(scanner.m_transitions);
SavePodType(s, Pire::Header(ScannerIOTypes::Scanner, sizeof(mc)));
- Impl::AlignSave(s, sizeof(Pire::Header));
- SavePodType(s, mc);
- Impl::AlignSave(s, sizeof(mc));
- SavePodType(s, scanner.Empty());
- Impl::AlignSave(s, sizeof(scanner.Empty()));
- if (!scanner.Empty())
+ Impl::AlignSave(s, sizeof(Pire::Header));
+ SavePodType(s, mc);
+ Impl::AlignSave(s, sizeof(mc));
+ SavePodType(s, scanner.Empty());
+ Impl::AlignSave(s, sizeof(scanner.Empty()));
+ if (!scanner.Empty())
Impl::AlignedSaveArray(s, scanner.m_buffer.Get(), scanner.BufSize());
- }
-
- template<class Shortcutting>
- static void LoadScanner(Scanner<Relocatable, Shortcutting>& scanner, yistream* s)
- {
- typedef Scanner<Relocatable, Shortcutting> ScannerType;
-
- Scanner<Relocatable, Shortcutting> sc;
+ }
+
+ template<class Shortcutting>
+ static void LoadScanner(Scanner<Relocatable, Shortcutting>& scanner, yistream* s)
+ {
+ typedef Scanner<Relocatable, Shortcutting> ScannerType;
+
+ Scanner<Relocatable, Shortcutting> sc;
Impl::ValidateHeader(s, ScannerIOTypes::Scanner, sizeof(sc.m));
- LoadPodType(s, sc.m);
- Impl::AlignLoad(s, sizeof(sc.m));
- if (Shortcutting::Signature != sc.m.shortcuttingSignature)
- throw Error("This scanner has different shortcutting type");
- bool empty;
- LoadPodType(s, empty);
- Impl::AlignLoad(s, sizeof(empty));
-
- if (empty) {
- sc.Alias(ScannerType::Null());
- } else {
+ LoadPodType(s, sc.m);
+ Impl::AlignLoad(s, sizeof(sc.m));
+ if (Shortcutting::Signature != sc.m.shortcuttingSignature)
+ throw Error("This scanner has different shortcutting type");
+ bool empty;
+ LoadPodType(s, empty);
+ Impl::AlignLoad(s, sizeof(empty));
+
+ if (empty) {
+ sc.Alias(ScannerType::Null());
+ } else {
sc.m_buffer = TArrayHolder<char>(new char[sc.BufSize()]);
Impl::AlignedLoadArray(s, sc.m_buffer.Get(), sc.BufSize());
sc.Markup(sc.m_buffer.Get());
- sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions);
- }
- scanner.Swap(sc);
- }
-
- // TODO: implement more effective serialization
- // of nonrelocatable scanner if necessary
-
- template<class Shortcutting>
- static void SaveScanner(const Scanner<Nonrelocatable, Shortcutting>& scanner, yostream* s)
- {
- Scanner<Relocatable, Shortcutting>(scanner).Save(s);
- }
-
- template<class Shortcutting>
- static void LoadScanner(Scanner<Nonrelocatable, Shortcutting>& scanner, yistream* s)
- {
- Scanner<Relocatable, Shortcutting> rs;
- rs.Load(s);
- Scanner<Nonrelocatable, Shortcutting>(rs).Swap(scanner);
- }
-};
-
-
-template<class Relocation, class Shortcutting>
-void Scanner<Relocation, Shortcutting>::Save(yostream* s) const
-{
- ScannerSaver::SaveScanner(*this, s);
-}
-
-template<class Relocation, class Shortcutting>
-void Scanner<Relocation, Shortcutting>::Load(yistream* s)
-{
- ScannerSaver::LoadScanner(*this, s);
-}
-
-// Shortcutting policy that checks state exit masks
-template <size_t MaskCount>
-class ExitMasks {
-private:
- enum {
- NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts
- NO_EXIT_MASK = 2 // the state has only transtions to itself (we can stop the scan)
- };
-
- template<class ScannerRowHeader, unsigned N>
- struct MaskCheckerBase {
+ sc.m.initial += reinterpret_cast<size_t>(sc.m_transitions);
+ }
+ scanner.Swap(sc);
+ }
+
+ // TODO: implement more effective serialization
+ // of nonrelocatable scanner if necessary
+
+ template<class Shortcutting>
+ static void SaveScanner(const Scanner<Nonrelocatable, Shortcutting>& scanner, yostream* s)
+ {
+ Scanner<Relocatable, Shortcutting>(scanner).Save(s);
+ }
+
+ template<class Shortcutting>
+ static void LoadScanner(Scanner<Nonrelocatable, Shortcutting>& scanner, yistream* s)
+ {
+ Scanner<Relocatable, Shortcutting> rs;
+ rs.Load(s);
+ Scanner<Nonrelocatable, Shortcutting>(rs).Swap(scanner);
+ }
+};
+
+
+template<class Relocation, class Shortcutting>
+void Scanner<Relocation, Shortcutting>::Save(yostream* s) const
+{
+ ScannerSaver::SaveScanner(*this, s);
+}
+
+template<class Relocation, class Shortcutting>
+void Scanner<Relocation, Shortcutting>::Load(yistream* s)
+{
+ ScannerSaver::LoadScanner(*this, s);
+}
+
+// Shortcutting policy that checks state exit masks
+template <size_t MaskCount>
+class ExitMasks {
+private:
+ enum {
+ NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts
+ NO_EXIT_MASK = 2 // the state has only transtions to itself (we can stop the scan)
+ };
+
+ template<class ScannerRowHeader, unsigned N>
+ struct MaskCheckerBase {
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool Check(const ScannerRowHeader& hdr, size_t alignOffset, Word chunk)
- {
- Word mask = CheckBytes(hdr.Mask(N, alignOffset), chunk);
- for (int i = N-1; i >= 0; --i) {
- mask = Or(mask, CheckBytes(hdr.Mask(i, alignOffset), chunk));
- }
- return !IsAnySet(mask);
- }
+ bool Check(const ScannerRowHeader& hdr, size_t alignOffset, Word chunk)
+ {
+ Word mask = CheckBytes(hdr.Mask(N, alignOffset), chunk);
+ for (int i = N-1; i >= 0; --i) {
+ mask = Or(mask, CheckBytes(hdr.Mask(i, alignOffset), chunk));
+ }
+ return !IsAnySet(mask);
+ }
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
- {
- for (; begin != end && Check(hdr, alignOffset, ToLittleEndian(*begin)); ++begin) {}
- return begin;
- }
- };
-
- template<class ScannerRowHeader, unsigned N, unsigned Nmax>
- struct MaskChecker : MaskCheckerBase<ScannerRowHeader, N> {
- typedef MaskCheckerBase<ScannerRowHeader, N> Base;
- typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next;
+ const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
+ {
+ for (; begin != end && Check(hdr, alignOffset, ToLittleEndian(*begin)); ++begin) {}
+ return begin;
+ }
+ };
+
+ template<class ScannerRowHeader, unsigned N, unsigned Nmax>
+ struct MaskChecker : MaskCheckerBase<ScannerRowHeader, N> {
+ typedef MaskCheckerBase<ScannerRowHeader, N> Base;
+ typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next;
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
- {
- if (hdr.Mask(N) == hdr.Mask(N + 1))
- return Base::DoRun(hdr, alignOffset, begin, end);
- else
- return Next::Run(hdr, alignOffset, begin, end);
- }
- };
-
- template<class ScannerRowHeader, unsigned N>
- struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N> {
- typedef MaskCheckerBase<ScannerRowHeader, N> Base;
+ const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
+ {
+ if (hdr.Mask(N) == hdr.Mask(N + 1))
+ return Base::DoRun(hdr, alignOffset, begin, end);
+ else
+ return Next::Run(hdr, alignOffset, begin, end);
+ }
+ };
+
+ template<class ScannerRowHeader, unsigned N>
+ struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N> {
+ typedef MaskCheckerBase<ScannerRowHeader, N> Base;
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
- {
- return Base::DoRun(hdr, alignOffset, begin, end);
- }
+ const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
+ {
+ return Base::DoRun(hdr, alignOffset, begin, end);
+ }
};
-
- // Compares the ExitMask[0] value without SSE reads which seems to be more optimal
- template <class Relocation>
+
+ // Compares the ExitMask[0] value without SSE reads which seems to be more optimal
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool CheckFirstMask(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t val)
- {
- return (scanner.Header(state).Mask(0) == val);
- }
-
-public:
-
- static const size_t ExitMaskCount = MaskCount;
- static const size_t Signature = 0x2000 + MaskCount;
-
- template <class Scanner>
- struct ExtendedRowHeader {
- private:
- /// In order to allow transition table to be aligned at sizeof(size_t) instead of
- /// sizeof(Word) and still be able to read Masks at Word-aligned addresses each mask
- /// occupies 2x space and only properly aligned part of it is read
- enum {
- SizeTInMaxSizeWord = sizeof(MaxSizeWord) / sizeof(size_t),
- MaskSizeInSizeT = 2 * SizeTInMaxSizeWord,
- };
-
+ bool CheckFirstMask(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t val)
+ {
+ return (scanner.Header(state).Mask(0) == val);
+ }
+
+public:
+
+ static const size_t ExitMaskCount = MaskCount;
+ static const size_t Signature = 0x2000 + MaskCount;
+
+ template <class Scanner>
+ struct ExtendedRowHeader {
+ private:
+ /// In order to allow transition table to be aligned at sizeof(size_t) instead of
+ /// sizeof(Word) and still be able to read Masks at Word-aligned addresses each mask
+ /// occupies 2x space and only properly aligned part of it is read
+ enum {
+ SizeTInMaxSizeWord = sizeof(MaxSizeWord) / sizeof(size_t),
+ MaskSizeInSizeT = 2 * SizeTInMaxSizeWord,
+ };
+
public:
- static const size_t ExitMaskCount = MaskCount;
-
- inline
- const Word& Mask(size_t i, size_t alignOffset) const
- {
+ static const size_t ExitMaskCount = MaskCount;
+
+ inline
+ const Word& Mask(size_t i, size_t alignOffset) const
+ {
Y_ASSERT(i < ExitMaskCount);
Y_ASSERT(alignOffset < SizeTInMaxSizeWord);
- const Word* p = (const Word*)(ExitMasksArray + alignOffset + MaskSizeInSizeT * i);
+ const Word* p = (const Word*)(ExitMasksArray + alignOffset + MaskSizeInSizeT * i);
Y_ASSERT(IsAligned(p, sizeof(Word)));
- return *p;
- }
+ return *p;
+ }
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- size_t Mask(size_t i) const
- {
+ size_t Mask(size_t i) const
+ {
Y_ASSERT(i < ExitMaskCount);
- return ExitMasksArray[MaskSizeInSizeT*i];
- }
-
- void SetMask(size_t i, size_t val)
- {
- for (size_t j = 0; j < MaskSizeInSizeT; ++j)
- ExitMasksArray[MaskSizeInSizeT*i + j] = val;
- }
-
- ExtendedRowHeader()
- {
- for (size_t i = 0; i < ExitMaskCount; ++i)
- SetMask(i, NO_SHORTCUT_MASK);
- }
-
- template <class OtherScanner>
- ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other)
- {
- PIRE_STATIC_ASSERT(ExitMaskCount == ExtendedRowHeader<OtherScanner>::ExitMaskCount);
- Common = other.Common;
- for (size_t i = 0; i < ExitMaskCount; ++i)
- SetMask(i, other.Mask(i));
- return *this;
- }
-
- private:
- /// If this state loops for all letters except particular set
- /// (common thing when matching something like /.*[Aa]/),
- /// each ExitMask contains that letter in each byte of size_t.
- ///
- /// These masks are most commonly used for fast forwarding through parts
- /// of the string matching /.*/ somewhere in the middle regexp.
- size_t ExitMasksArray[ExitMaskCount * MaskSizeInSizeT];
-
- public:
- typename Scanner::CommonRowHeader Common;
- };
-
- template <class Header>
- static void SetNoExit(Header& header)
- {
- header.SetMask(0, NO_EXIT_MASK);
- }
-
- template <class Header>
- static void SetNoShortcut(Header& header)
- {
- header.SetMask(0, NO_SHORTCUT_MASK);
- }
-
- template <class Header>
- static void SetMask(Header& header, size_t ind, char c)
- {
- header.SetMask(ind, FillSizeT(c));
- }
-
- template <class Header>
- static void FinishMasks(Header& header, size_t ind)
- {
- if (ind == 0)
- ind = 1;
- // Fill the rest of the shortcut masks with the last used mask
- size_t lastMask = header.Mask(ind - 1);
- while (ind != ExitMaskCount) {
- header.SetMask(ind, lastMask);
- ++ind;
- }
- }
-
- template <class Relocation>
+ return ExitMasksArray[MaskSizeInSizeT*i];
+ }
+
+ void SetMask(size_t i, size_t val)
+ {
+ for (size_t j = 0; j < MaskSizeInSizeT; ++j)
+ ExitMasksArray[MaskSizeInSizeT*i + j] = val;
+ }
+
+ ExtendedRowHeader()
+ {
+ for (size_t i = 0; i < ExitMaskCount; ++i)
+ SetMask(i, NO_SHORTCUT_MASK);
+ }
+
+ template <class OtherScanner>
+ ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other)
+ {
+ PIRE_STATIC_ASSERT(ExitMaskCount == ExtendedRowHeader<OtherScanner>::ExitMaskCount);
+ Common = other.Common;
+ for (size_t i = 0; i < ExitMaskCount; ++i)
+ SetMask(i, other.Mask(i));
+ return *this;
+ }
+
+ private:
+ /// If this state loops for all letters except particular set
+ /// (common thing when matching something like /.*[Aa]/),
+ /// each ExitMask contains that letter in each byte of size_t.
+ ///
+ /// These masks are most commonly used for fast forwarding through parts
+ /// of the string matching /.*/ somewhere in the middle regexp.
+ size_t ExitMasksArray[ExitMaskCount * MaskSizeInSizeT];
+
+ public:
+ typename Scanner::CommonRowHeader Common;
+ };
+
+ template <class Header>
+ static void SetNoExit(Header& header)
+ {
+ header.SetMask(0, NO_EXIT_MASK);
+ }
+
+ template <class Header>
+ static void SetNoShortcut(Header& header)
+ {
+ header.SetMask(0, NO_SHORTCUT_MASK);
+ }
+
+ template <class Header>
+ static void SetMask(Header& header, size_t ind, char c)
+ {
+ header.SetMask(ind, FillSizeT(c));
+ }
+
+ template <class Header>
+ static void FinishMasks(Header& header, size_t ind)
+ {
+ if (ind == 0)
+ ind = 1;
+ // Fill the rest of the shortcut masks with the last used mask
+ size_t lastMask = header.Mask(ind - 1);
+ while (ind != ExitMaskCount) {
+ header.SetMask(ind, lastMask);
+ ++ind;
+ }
+ }
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool NoExit(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
- {
- return CheckFirstMask(scanner, state, NO_EXIT_MASK);
- }
-
- template <class Relocation>
+ bool NoExit(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
+ {
+ return CheckFirstMask(scanner, state, NO_EXIT_MASK);
+ }
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool NoShortcut(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
- {
- return CheckFirstMask(scanner, state, NO_SHORTCUT_MASK);
- }
-
- template <class Relocation>
+ bool NoShortcut(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
+ {
+ return CheckFirstMask(scanner, state, NO_SHORTCUT_MASK);
+ }
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- const Word* Run(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t alignOffset, const Word* begin, const Word* end)
- {
- return MaskChecker<typename Scanner<Relocation, ExitMasks<MaskCount> >::ScannerRowHeader, 0, MaskCount - 1>::Run(scanner.Header(state), alignOffset, begin, end);
- }
-
-};
-
-
-// Shortcutting policy that doesn't do shortcuts
-struct NoShortcuts {
-
- static const size_t ExitMaskCount = 0;
- static const size_t Signature = 0x1000;
-
- template <class Scanner>
- struct ExtendedRowHeader {
- typename Scanner::CommonRowHeader Common;
-
- template <class OtherScanner>
- ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other)
- {
- PIRE_STATIC_ASSERT(sizeof(ExtendedRowHeader) == sizeof(ExtendedRowHeader<OtherScanner>));
- Common = other.Common;
- return *this;
- }
- };
-
- template <class Header>
- static void SetNoExit(Header&) {}
-
- template <class Header>
- static void SetNoShortcut(Header&) {}
-
- template <class Header>
- static void SetMask(Header&, size_t, char) {}
-
- template <class Header>
- static void FinishMasks(Header&, size_t) {}
-
- template <class Relocation>
+ const Word* Run(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t alignOffset, const Word* begin, const Word* end)
+ {
+ return MaskChecker<typename Scanner<Relocation, ExitMasks<MaskCount> >::ScannerRowHeader, 0, MaskCount - 1>::Run(scanner.Header(state), alignOffset, begin, end);
+ }
+
+};
+
+
+// Shortcutting policy that doesn't do shortcuts
+struct NoShortcuts {
+
+ static const size_t ExitMaskCount = 0;
+ static const size_t Signature = 0x1000;
+
+ template <class Scanner>
+ struct ExtendedRowHeader {
+ typename Scanner::CommonRowHeader Common;
+
+ template <class OtherScanner>
+ ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other)
+ {
+ PIRE_STATIC_ASSERT(sizeof(ExtendedRowHeader) == sizeof(ExtendedRowHeader<OtherScanner>));
+ Common = other.Common;
+ return *this;
+ }
+ };
+
+ template <class Header>
+ static void SetNoExit(Header&) {}
+
+ template <class Header>
+ static void SetNoShortcut(Header&) {}
+
+ template <class Header>
+ static void SetMask(Header&, size_t, char) {}
+
+ template <class Header>
+ static void FinishMasks(Header&, size_t) {}
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool NoExit(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
- {
- // Cannot exit prematurely
- return false;
- }
-
- template <class Relocation>
+ bool NoExit(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
+ {
+ // Cannot exit prematurely
+ return false;
+ }
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- bool NoShortcut(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
- {
- // There's no shortcut regardless of the state
- return true;
- }
-
- template <class Relocation>
+ bool NoShortcut(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
+ {
+ // There's no shortcut regardless of the state
+ return true;
+ }
+
+ template <class Relocation>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- const Word* Run(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State, size_t, const Word* begin, const Word*)
- {
- // Stop shortcutting right at the beginning
- return begin;
- }
-};
-
-#ifndef PIRE_DEBUG
-
-// The purpose of this template is to produce a number of ProcessChunk() calls
-// instead of writing for(...){ProcessChunk()} loop that GCC refuses to unroll.
-// Manually unrolled code proves to be faster
-template <class Scanner, unsigned Count>
-struct MultiChunk {
- // Process Word-sized chunk which consist of >=1 size_t-sized chunks
- template<class Pred>
+ const Word* Run(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State, size_t, const Word* begin, const Word*)
+ {
+ // Stop shortcutting right at the beginning
+ return begin;
+ }
+};
+
+#ifndef PIRE_DEBUG
+
+// The purpose of this template is to produce a number of ProcessChunk() calls
+// instead of writing for(...){ProcessChunk()} loop that GCC refuses to unroll.
+// Manually unrolled code proves to be faster
+template <class Scanner, unsigned Count>
+struct MultiChunk {
+ // Process Word-sized chunk which consist of >=1 size_t-sized chunks
+ template<class Pred>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action Process(const Scanner& scanner, typename Scanner::State& state, const size_t* p, Pred pred)
- {
- if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Continue)
- return MultiChunk<Scanner, Count-1>::Process(scanner, state, ++p, pred);
- else
- return Stop;
- }
-};
-
-template <class Scanner>
-struct MultiChunk<Scanner, 0> {
- // Process Word-sized chunk which consist of >=1 size_t-sized chunks
- template<class Pred>
+ Action Process(const Scanner& scanner, typename Scanner::State& state, const size_t* p, Pred pred)
+ {
+ if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Continue)
+ return MultiChunk<Scanner, Count-1>::Process(scanner, state, ++p, pred);
+ else
+ return Stop;
+ }
+};
+
+template <class Scanner>
+struct MultiChunk<Scanner, 0> {
+ // Process Word-sized chunk which consist of >=1 size_t-sized chunks
+ template<class Pred>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action Process(const Scanner&, typename Scanner::State, const size_t*, Pred)
- {
- return Continue;
- }
-};
-
-// Efficiently runs a scanner through size_t-aligned memory range
-template<class Relocation, class Shortcutting>
-struct AlignedRunner< Scanner<Relocation, Shortcutting> > {
-private:
- typedef Scanner<Relocation, Shortcutting> ScannerType;
-
- // Processes Word-sized chuck of memory (depending on the platform a Word might
- // consist of multiple size_t chuncks)
- template <class Pred>
+ Action Process(const Scanner&, typename Scanner::State, const size_t*, Pred)
+ {
+ return Continue;
+ }
+};
+
+// Efficiently runs a scanner through size_t-aligned memory range
+template<class Relocation, class Shortcutting>
+struct AlignedRunner< Scanner<Relocation, Shortcutting> > {
+private:
+ typedef Scanner<Relocation, Shortcutting> ScannerType;
+
+ // Processes Word-sized chuck of memory (depending on the platform a Word might
+ // consist of multiple size_t chuncks)
+ template <class Pred>
static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action RunMultiChunk(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, Pred pred)
- {
- return MultiChunk<ScannerType, sizeof(Word)/sizeof(size_t)>::Process(scanner, st, begin, pred);
- }
-
- // Asserts if the scanner changes state while processing the byte range that is
- // supposed to be skipped by a shortcut
- static void ValidateSkip(const ScannerType& scanner, typename ScannerType::State st, const char* begin, const char* end)
- {
- typename ScannerType::State stateBefore = st;
- for (const char* pos = begin; pos != end; ++pos) {
- Step(scanner, st, (unsigned char)*pos);
+ Action RunMultiChunk(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, Pred pred)
+ {
+ return MultiChunk<ScannerType, sizeof(Word)/sizeof(size_t)>::Process(scanner, st, begin, pred);
+ }
+
+ // Asserts if the scanner changes state while processing the byte range that is
+ // supposed to be skipped by a shortcut
+ static void ValidateSkip(const ScannerType& scanner, typename ScannerType::State st, const char* begin, const char* end)
+ {
+ typename ScannerType::State stateBefore = st;
+ for (const char* pos = begin; pos != end; ++pos) {
+ Step(scanner, st, (unsigned char)*pos);
Y_ASSERT(st == stateBefore);
- }
- }
-
-public:
-
- template<class Pred>
- static inline PIRE_HOT_FUNCTION
- Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred)
- {
+ }
+ }
+
+public:
+
+ template<class Pred>
+ static inline PIRE_HOT_FUNCTION
+ Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred)
+ {
typename ScannerType::State state = st;
- const Word* head = AlignUp((const Word*) begin, sizeof(Word));
- const Word* tail = AlignDown((const Word*) end, sizeof(Word));
- for (; begin != (const size_t*) head && begin != end; ++begin)
- if (RunChunk(scanner, state, begin, 0, sizeof(void*), pred) == Stop) {
- st = state;
- return Stop;
- }
-
- if (begin == end) {
- st = state;
- return Continue;
- }
- if (Shortcutting::NoExit(scanner, state)) {
- st = state;
- return pred(scanner, state, ((const char*) end));
- }
-
- // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state
+ const Word* head = AlignUp((const Word*) begin, sizeof(Word));
+ const Word* tail = AlignDown((const Word*) end, sizeof(Word));
+ for (; begin != (const size_t*) head && begin != end; ++begin)
+ if (RunChunk(scanner, state, begin, 0, sizeof(void*), pred) == Stop) {
+ st = state;
+ return Stop;
+ }
+
+ if (begin == end) {
+ st = state;
+ return Continue;
+ }
+ if (Shortcutting::NoExit(scanner, state)) {
+ st = state;
+ return pred(scanner, state, ((const char*) end));
+ }
+
+ // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state
Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0);
- size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t);
-
- bool noShortcut = Shortcutting::NoShortcut(scanner, state);
-
- while (true) {
- // Do normal processing until a shortcut is possible
- while (noShortcut && head != tail) {
- if (RunMultiChunk(scanner, state, (const size_t*)head, pred) == Stop) {
- st = state;
- return Stop;
- }
- ++head;
- noShortcut = Shortcutting::NoShortcut(scanner, state);
- }
- if (head == tail)
- break;
-
- if (Shortcutting::NoExit(scanner, state)) {
- st = state;
- return pred(scanner, state, ((const char*) end));
- }
-
- // Do fast forwarding while it is possible
- const Word* skipEnd = Shortcutting::Run(scanner, state, alignOffset, head, tail);
- PIRE_IF_CHECKED(ValidateSkip(scanner, state, (const char*)head, (const char*)skipEnd));
- head = skipEnd;
- noShortcut = true;
- }
-
- for (size_t* p = (size_t*) tail; p != end; ++p) {
- if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) {
- st = state;
- return Stop;
- }
- }
-
- st = state;
- return Continue;
- }
-};
-
-#endif
-
-template<class Scanner>
-class ScannerGlueTask: public ScannerGlueCommon<Scanner> {
-public:
- typedef ScannerGlueCommon<Scanner> Base;
- typedef typename Base::State State;
- using Base::Lhs;
- using Base::Rhs;
- using Base::Sc;
- using Base::Letters;
-
- typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates;
-
- ScannerGlueTask(const Scanner& lhs, const Scanner& rhs)
- : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters))
- {
- }
+ size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t);
+
+ bool noShortcut = Shortcutting::NoShortcut(scanner, state);
+
+ while (true) {
+ // Do normal processing until a shortcut is possible
+ while (noShortcut && head != tail) {
+ if (RunMultiChunk(scanner, state, (const size_t*)head, pred) == Stop) {
+ st = state;
+ return Stop;
+ }
+ ++head;
+ noShortcut = Shortcutting::NoShortcut(scanner, state);
+ }
+ if (head == tail)
+ break;
+
+ if (Shortcutting::NoExit(scanner, state)) {
+ st = state;
+ return pred(scanner, state, ((const char*) end));
+ }
+
+ // Do fast forwarding while it is possible
+ const Word* skipEnd = Shortcutting::Run(scanner, state, alignOffset, head, tail);
+ PIRE_IF_CHECKED(ValidateSkip(scanner, state, (const char*)head, (const char*)skipEnd));
+ head = skipEnd;
+ noShortcut = true;
+ }
+
+ for (size_t* p = (size_t*) tail; p != end; ++p) {
+ if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) {
+ st = state;
+ return Stop;
+ }
+ }
+
+ st = state;
+ return Continue;
+ }
+};
+
+#endif
+
+template<class Scanner>
+class ScannerGlueTask: public ScannerGlueCommon<Scanner> {
+public:
+ typedef ScannerGlueCommon<Scanner> Base;
+ typedef typename Base::State State;
+ using Base::Lhs;
+ using Base::Rhs;
+ using Base::Sc;
+ using Base::Letters;
+
+ typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates;
+
+ ScannerGlueTask(const Scanner& lhs, const Scanner& rhs)
+ : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters))
+ {
+ }
void AcceptStates(const TVector<State>& states)
- {
- // Make up a new scanner and fill in the final table
+ {
+ // Make up a new scanner and fill in the final table
- size_t finalTableSize = 0;
+ size_t finalTableSize = 0;
for (auto&& i : states)
finalTableSize += RangeLen(Lhs().AcceptedRegexps(i.first)) + RangeLen(Rhs().AcceptedRegexps(i.second));
this->SetSc(THolder<Scanner>(new Scanner));
- Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount());
+ Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount());
auto finalWriter = Sc().m_final;
- for (size_t state = 0; state != states.size(); ++state) {
+ for (size_t state = 0; state != states.size(); ++state) {
Sc().m_finalIndex[state] = finalWriter - Sc().m_final;
finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter);
finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter);
*finalWriter++ = static_cast<size_t>(-1);
- Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0)
- | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0));
- }
- }
-
- void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); }
+ Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0)
+ | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0));
+ }
+ }
- const Scanner& Success()
- {
- Sc().BuildShortcuts();
- return Sc();
- }
+ void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); }
+
+ const Scanner& Success()
+ {
+ Sc().BuildShortcuts();
+ return Sc();
+ }
private:
- template<class Iter>
- size_t RangeLen(ypair<Iter, Iter> range) const
- {
- return std::distance(range.first, range.second);
- }
-
- template<class Iter, class OutIter>
- OutIter Shift(ypair<Iter, Iter> range, size_t shift, OutIter out) const
- {
- for (; range.first != range.second; ++range.first, ++out)
- *out = *range.first + shift;
- return out;
- }
-};
-
-}
-
-
-template<class Relocation, class Shortcutting>
-struct StDumper< Impl::Scanner<Relocation, Shortcutting> > {
-
- typedef Impl::Scanner<Relocation, Shortcutting> ScannerType;
-
- StDumper(const ScannerType& sc, typename ScannerType::State st): m_sc(&sc), m_st(st) {}
-
- void Dump(yostream& stream) const
- {
- stream << m_sc->StateIndex(m_st);
- if (m_sc->Final(m_st))
- stream << " [final]";
- if (m_sc->Dead(m_st))
- stream << " [dead]";
- }
-private:
- const ScannerType* m_sc;
- typename ScannerType::State m_st;
-};
-
-
-template<class Relocation, class Shortcutting>
-Impl::Scanner<Relocation, Shortcutting> Impl::Scanner<Relocation, Shortcutting>::Glue(const Impl::Scanner<Relocation, Shortcutting>& lhs, const Impl::Scanner<Relocation, Shortcutting>& rhs, size_t maxSize /* = 0 */)
-{
- if (lhs.Empty())
- return rhs;
- if (rhs.Empty())
- return lhs;
-
- static const size_t DefMaxSize = 80000;
- Impl::ScannerGlueTask< Impl::Scanner<Relocation, Shortcutting> > task(lhs, rhs);
- return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
-}
-
-
-/**
- * A compiled multiregexp.
- * Can only find out whether a string matches the regexps or not,
- * but takes O( str.length() ) time.
- *
- * In addition, multiple scanners can be agglutinated together,
- * producting a scanner which can be used for checking
- * strings against several regexps in a single pass.
- */
-typedef Impl::Scanner<Impl::Relocatable, Impl::ExitMasks<2> > Scanner;
-typedef Impl::Scanner<Impl::Relocatable, Impl::NoShortcuts> ScannerNoMask;
-
-/**
- * Same as above, but does not allow relocation or mmap()-ing.
- * On the other hand, runs almost twice as fast as the Scanner.
- */
-typedef Impl::Scanner<Impl::Nonrelocatable, Impl::ExitMasks<2> > NonrelocScanner;
-typedef Impl::Scanner<Impl::Nonrelocatable, Impl::NoShortcuts> NonrelocScannerNoMask;
-
-}
-
-namespace std {
+ template<class Iter>
+ size_t RangeLen(ypair<Iter, Iter> range) const
+ {
+ return std::distance(range.first, range.second);
+ }
+
+ template<class Iter, class OutIter>
+ OutIter Shift(ypair<Iter, Iter> range, size_t shift, OutIter out) const
+ {
+ for (; range.first != range.second; ++range.first, ++out)
+ *out = *range.first + shift;
+ return out;
+ }
+};
+
+}
+
+
+template<class Relocation, class Shortcutting>
+struct StDumper< Impl::Scanner<Relocation, Shortcutting> > {
+
+ typedef Impl::Scanner<Relocation, Shortcutting> ScannerType;
+
+ StDumper(const ScannerType& sc, typename ScannerType::State st): m_sc(&sc), m_st(st) {}
+
+ void Dump(yostream& stream) const
+ {
+ stream << m_sc->StateIndex(m_st);
+ if (m_sc->Final(m_st))
+ stream << " [final]";
+ if (m_sc->Dead(m_st))
+ stream << " [dead]";
+ }
+private:
+ const ScannerType* m_sc;
+ typename ScannerType::State m_st;
+};
+
+
+template<class Relocation, class Shortcutting>
+Impl::Scanner<Relocation, Shortcutting> Impl::Scanner<Relocation, Shortcutting>::Glue(const Impl::Scanner<Relocation, Shortcutting>& lhs, const Impl::Scanner<Relocation, Shortcutting>& rhs, size_t maxSize /* = 0 */)
+{
+ if (lhs.Empty())
+ return rhs;
+ if (rhs.Empty())
+ return lhs;
+
+ static const size_t DefMaxSize = 80000;
+ Impl::ScannerGlueTask< Impl::Scanner<Relocation, Shortcutting> > task(lhs, rhs);
+ return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
+}
+
+
+/**
+ * A compiled multiregexp.
+ * Can only find out whether a string matches the regexps or not,
+ * but takes O( str.length() ) time.
+ *
+ * In addition, multiple scanners can be agglutinated together,
+ * producting a scanner which can be used for checking
+ * strings against several regexps in a single pass.
+ */
+typedef Impl::Scanner<Impl::Relocatable, Impl::ExitMasks<2> > Scanner;
+typedef Impl::Scanner<Impl::Relocatable, Impl::NoShortcuts> ScannerNoMask;
+
+/**
+ * Same as above, but does not allow relocation or mmap()-ing.
+ * On the other hand, runs almost twice as fast as the Scanner.
+ */
+typedef Impl::Scanner<Impl::Nonrelocatable, Impl::ExitMasks<2> > NonrelocScanner;
+typedef Impl::Scanner<Impl::Nonrelocatable, Impl::NoShortcuts> NonrelocScannerNoMask;
+
+}
+
+namespace std {
inline void swap(Pire::Scanner& a, Pire::Scanner& b) {
- a.Swap(b);
- }
-
+ a.Swap(b);
+ }
+
inline void swap(Pire::NonrelocScanner& a, Pire::NonrelocScanner& b) {
- a.Swap(b);
- }
-}
-
-
-#endif
+ a.Swap(b);
+ }
+}
+
+
+#endif
diff --git a/contrib/libs/pire/pire/scanners/null.cpp b/contrib/libs/pire/pire/scanners/null.cpp
index f0e21ce4d3..3a7fee7220 100644
--- a/contrib/libs/pire/pire/scanners/null.cpp
+++ b/contrib/libs/pire/pire/scanners/null.cpp
@@ -1,6 +1,6 @@
#include <contrib/libs/pire/pire/fsm.h>
-#include "multi.h"
+#include "multi.h"
#include "half_final.h"
-#include "simple.h"
-#include "slow.h"
-#include "loaded.h"
+#include "simple.h"
+#include "slow.h"
+#include "loaded.h"
diff --git a/contrib/libs/pire/pire/scanners/pair.h b/contrib/libs/pire/pire/scanners/pair.h
index c12338a2a0..16fc14a59f 100644
--- a/contrib/libs/pire/pire/scanners/pair.h
+++ b/contrib/libs/pire/pire/scanners/pair.h
@@ -1,99 +1,99 @@
-/*
- * pair.h -- definition of the pair of scanners
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * pair.h -- definition of the pair of scanners
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#ifndef PIRE_SCANNER_PAIR_INCLUDED
-#define PIRE_SCANNER_PAIR_INCLUDED
-
-namespace Pire {
-
- /**
- * A pair of scanner, providing the interface of a scanner itself.
- * If you need to run two scanners on the same string, using ScannerPair
- * is usually faster then running those scanners sequentially.
- */
- template<class Scanner1, class Scanner2>
- class ScannerPair {
- public:
- typedef ypair<typename Scanner1::State, typename Scanner2::State> State;
- typedef ypair<typename Scanner1::Action, typename Scanner2::Action> Action;
-
- ScannerPair()
- : m_scanner1()
- , m_scanner2()
- {
- }
- ScannerPair(const Scanner1& s1, const Scanner2& s2)
- : m_scanner1(&s1)
- , m_scanner2(&s2)
- {
- }
-
- void Initialize(State& state) const
- {
- m_scanner1->Initialize(state.first);
- m_scanner2->Initialize(state.second);
- }
-
- Action Next(State& state, Char ch) const
- {
- return ymake_pair(
- m_scanner1->Next(state.first, ch),
- m_scanner2->Next(state.second, ch)
- );
- }
-
- void TakeAction(State& s, Action a) const
- {
- m_scanner1->TakeAction(s.first, a.first);
- m_scanner2->TakeAction(s.second, a.second);
- }
-
- bool Final(const State& state) const
- {
- return m_scanner1->Final(state.first) || m_scanner2->Final(state.second);
- }
-
- bool Dead(const State& state) const
- {
- return m_scanner1->Dead(state.first) && m_scanner2->Dead(state.second);
- }
-
- ypair<size_t, size_t> StateIndex(const State& state) const
- {
- return ymake_pair(m_scanner1->StateIndex(state.first), m_scanner2->StateIndex(state.second));
- }
-
- Scanner1& First() { return *m_scanner1; }
- Scanner2& Second() { return *m_scanner2; }
-
- const Scanner1& First() const { return *m_scanner1; }
- const Scanner2& Second() const { return *m_scanner2; }
-
- private:
- const Scanner1* m_scanner1;
- const Scanner2* m_scanner2;
- };
-
-
-}
-
-#endif
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#ifndef PIRE_SCANNER_PAIR_INCLUDED
+#define PIRE_SCANNER_PAIR_INCLUDED
+
+namespace Pire {
+
+ /**
+ * A pair of scanner, providing the interface of a scanner itself.
+ * If you need to run two scanners on the same string, using ScannerPair
+ * is usually faster then running those scanners sequentially.
+ */
+ template<class Scanner1, class Scanner2>
+ class ScannerPair {
+ public:
+ typedef ypair<typename Scanner1::State, typename Scanner2::State> State;
+ typedef ypair<typename Scanner1::Action, typename Scanner2::Action> Action;
+
+ ScannerPair()
+ : m_scanner1()
+ , m_scanner2()
+ {
+ }
+ ScannerPair(const Scanner1& s1, const Scanner2& s2)
+ : m_scanner1(&s1)
+ , m_scanner2(&s2)
+ {
+ }
+
+ void Initialize(State& state) const
+ {
+ m_scanner1->Initialize(state.first);
+ m_scanner2->Initialize(state.second);
+ }
+
+ Action Next(State& state, Char ch) const
+ {
+ return ymake_pair(
+ m_scanner1->Next(state.first, ch),
+ m_scanner2->Next(state.second, ch)
+ );
+ }
+
+ void TakeAction(State& s, Action a) const
+ {
+ m_scanner1->TakeAction(s.first, a.first);
+ m_scanner2->TakeAction(s.second, a.second);
+ }
+
+ bool Final(const State& state) const
+ {
+ return m_scanner1->Final(state.first) || m_scanner2->Final(state.second);
+ }
+
+ bool Dead(const State& state) const
+ {
+ return m_scanner1->Dead(state.first) && m_scanner2->Dead(state.second);
+ }
+
+ ypair<size_t, size_t> StateIndex(const State& state) const
+ {
+ return ymake_pair(m_scanner1->StateIndex(state.first), m_scanner2->StateIndex(state.second));
+ }
+
+ Scanner1& First() { return *m_scanner1; }
+ Scanner2& Second() { return *m_scanner2; }
+
+ const Scanner1& First() const { return *m_scanner1; }
+ const Scanner2& Second() const { return *m_scanner2; }
+
+ private:
+ const Scanner1* m_scanner1;
+ const Scanner2* m_scanner2;
+ };
+
+
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h
index ef959aeed1..3175e105da 100644
--- a/contrib/libs/pire/pire/scanners/simple.h
+++ b/contrib/libs/pire/pire/scanners/simple.h
@@ -1,190 +1,190 @@
-/*
- * simple.h -- the definition of the SimpleScanner
+/*
+ * simple.h -- the definition of the SimpleScanner
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_SCANNERS_SIMPLE_H
-#define PIRE_SCANNERS_SIMPLE_H
-
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_SCANNERS_SIMPLE_H
+#define PIRE_SCANNERS_SIMPLE_H
+
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/stub/saveload.h>
-#include "common.h"
-
-namespace Pire {
-
-/**
- * More faster version than the Scanner, but incapable of storing multiple
- * regexps and taking more memory for the same regexp.
- */
-class SimpleScanner {
-private:
- static const size_t STATE_ROW_SIZE = MaxChar + 1; // All characters + 1 element to store final state flag
-
-public:
- typedef size_t Transition;
- typedef ui16 Letter;
- typedef ui32 Action;
- typedef ui8 Tag;
-
- SimpleScanner() { Alias(Null()); }
+#include "common.h"
+
+namespace Pire {
+
+/**
+ * More faster version than the Scanner, but incapable of storing multiple
+ * regexps and taking more memory for the same regexp.
+ */
+class SimpleScanner {
+private:
+ static const size_t STATE_ROW_SIZE = MaxChar + 1; // All characters + 1 element to store final state flag
+
+public:
+ typedef size_t Transition;
+ typedef ui16 Letter;
+ typedef ui32 Action;
+ typedef ui8 Tag;
+
+ SimpleScanner() { Alias(Null()); }
explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
-
- size_t Size() const { return m.statesCount; }
- bool Empty() const { return m_transitions == Null().m_transitions; }
-
- typedef size_t State;
-
- size_t RegexpsCount() const { return Empty() ? 0 : 1; }
- size_t LettersCount() const { return MaxChar; }
-
- /// Checks whether specified state is in any of the final sets
- bool Final(const State& state) const { return *(((const Transition*) state) - 1) != 0; }
-
- bool Dead(const State&) const { return false; }
-
+
+ size_t Size() const { return m.statesCount; }
+ bool Empty() const { return m_transitions == Null().m_transitions; }
+
+ typedef size_t State;
+
+ size_t RegexpsCount() const { return Empty() ? 0 : 1; }
+ size_t LettersCount() const { return MaxChar; }
+
+ /// Checks whether specified state is in any of the final sets
+ bool Final(const State& state) const { return *(((const Transition*) state) - 1) != 0; }
+
+ bool Dead(const State&) const { return false; }
+
ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const {
return Final(s) ? Accept() : Deny();
}
- /// returns an initial state for this scanner
- void Initialize(State& state) const { state = m.initial; }
-
- /// Handles one characters
- Action Next(State& state, Char c) const
- {
- Transition shift = reinterpret_cast<const Transition*>(state)[c];
- state += shift;
- return 0;
- }
-
- bool TakeAction(State&, Action) const { return false; }
-
- SimpleScanner(const SimpleScanner& s): m(s.m)
- {
- if (!s.m_buffer) {
- // Empty or mmap()-ed scanner, just copy pointers
- m_buffer = 0;
- m_transitions = s.m_transitions;
- } else {
- // In-memory scanner, perform deep copy
+ /// returns an initial state for this scanner
+ void Initialize(State& state) const { state = m.initial; }
+
+ /// Handles one characters
+ Action Next(State& state, Char c) const
+ {
+ Transition shift = reinterpret_cast<const Transition*>(state)[c];
+ state += shift;
+ return 0;
+ }
+
+ bool TakeAction(State&, Action) const { return false; }
+
+ SimpleScanner(const SimpleScanner& s): m(s.m)
+ {
+ if (!s.m_buffer) {
+ // Empty or mmap()-ed scanner, just copy pointers
+ m_buffer = 0;
+ m_transitions = s.m_transitions;
+ } else {
+ // In-memory scanner, perform deep copy
m_buffer = BufferType(new char[BufSize()]);
memcpy(m_buffer.Get(), s.m_buffer.Get(), BufSize());
Markup(m_buffer.Get());
-
- m.initial += (m_transitions - s.m_transitions) * sizeof(Transition);
- }
- }
-
- // Makes a shallow ("weak") copy of the given scanner.
- // The copied scanner does not maintain lifetime of the original's entrails.
- void Alias(const SimpleScanner& s)
- {
- m = s.m;
+
+ m.initial += (m_transitions - s.m_transitions) * sizeof(Transition);
+ }
+ }
+
+ // Makes a shallow ("weak") copy of the given scanner.
+ // The copied scanner does not maintain lifetime of the original's entrails.
+ void Alias(const SimpleScanner& s)
+ {
+ m = s.m;
m_buffer.Reset();
- m_transitions = s.m_transitions;
- }
-
- void Swap(SimpleScanner& s)
- {
- DoSwap(m_buffer, s.m_buffer);
- DoSwap(m.statesCount, s.m.statesCount);
- DoSwap(m.initial, s.m.initial);
- DoSwap(m_transitions, s.m_transitions);
- }
-
- SimpleScanner& operator = (const SimpleScanner& s) { SimpleScanner(s).Swap(*this); return *this; }
-
+ m_transitions = s.m_transitions;
+ }
+
+ void Swap(SimpleScanner& s)
+ {
+ DoSwap(m_buffer, s.m_buffer);
+ DoSwap(m.statesCount, s.m.statesCount);
+ DoSwap(m.initial, s.m.initial);
+ DoSwap(m_transitions, s.m_transitions);
+ }
+
+ SimpleScanner& operator = (const SimpleScanner& s) { SimpleScanner(s).Swap(*this); return *this; }
+
~SimpleScanner() = default;
-
- /*
- * Constructs the scanner from mmap()-ed memory range, returning a pointer
- * to unconsumed part of the buffer.
- */
- const void* Mmap(const void* ptr, size_t size)
- {
- Impl::CheckAlign(ptr);
- SimpleScanner s;
-
- const size_t* p = reinterpret_cast<const size_t*>(ptr);
+
+ /*
+ * Constructs the scanner from mmap()-ed memory range, returning a pointer
+ * to unconsumed part of the buffer.
+ */
+ const void* Mmap(const void* ptr, size_t size)
+ {
+ Impl::CheckAlign(ptr);
+ SimpleScanner s;
+
+ const size_t* p = reinterpret_cast<const size_t*>(ptr);
Impl::ValidateHeader(p, size, ScannerIOTypes::SimpleScanner, sizeof(m));
- if (size < sizeof(s.m))
- throw Error("EOF reached while mapping NPire::Scanner");
-
- memcpy(&s.m, p, sizeof(s.m));
- Impl::AdvancePtr(p, size, sizeof(s.m));
- Impl::AlignPtr(p, size);
-
- bool empty = *((const bool*) p);
- Impl::AdvancePtr(p, size, sizeof(empty));
- Impl::AlignPtr(p, size);
-
- if (empty)
- s.Alias(Null());
- else {
- if (size < s.BufSize())
- throw Error("EOF reached while mapping NPire::Scanner");
- s.Markup(const_cast<size_t*>(p));
- s.m.initial += reinterpret_cast<size_t>(s.m_transitions);
-
- Swap(s);
- Impl::AdvancePtr(p, size, BufSize());
- }
- return Impl::AlignPtr(p, size);
- }
-
- size_t StateIndex(State s) const
- {
- return (s - reinterpret_cast<size_t>(m_transitions)) / (STATE_ROW_SIZE * sizeof(Transition));
- }
-
- // Returns the size of the memory buffer used (or required) by scanner.
- size_t BufSize() const
- {
- return STATE_ROW_SIZE * m.statesCount * sizeof(Transition); // Transitions table
- }
-
- void Save(yostream*) const;
- void Load(yistream*);
-
-protected:
- struct Locals {
- size_t statesCount;
- size_t initial;
- } m;
-
+ if (size < sizeof(s.m))
+ throw Error("EOF reached while mapping NPire::Scanner");
+
+ memcpy(&s.m, p, sizeof(s.m));
+ Impl::AdvancePtr(p, size, sizeof(s.m));
+ Impl::AlignPtr(p, size);
+
+ bool empty = *((const bool*) p);
+ Impl::AdvancePtr(p, size, sizeof(empty));
+ Impl::AlignPtr(p, size);
+
+ if (empty)
+ s.Alias(Null());
+ else {
+ if (size < s.BufSize())
+ throw Error("EOF reached while mapping NPire::Scanner");
+ s.Markup(const_cast<size_t*>(p));
+ s.m.initial += reinterpret_cast<size_t>(s.m_transitions);
+
+ Swap(s);
+ Impl::AdvancePtr(p, size, BufSize());
+ }
+ return Impl::AlignPtr(p, size);
+ }
+
+ size_t StateIndex(State s) const
+ {
+ return (s - reinterpret_cast<size_t>(m_transitions)) / (STATE_ROW_SIZE * sizeof(Transition));
+ }
+
+ // Returns the size of the memory buffer used (or required) by scanner.
+ size_t BufSize() const
+ {
+ return STATE_ROW_SIZE * m.statesCount * sizeof(Transition); // Transitions table
+ }
+
+ void Save(yostream*) const;
+ void Load(yistream*);
+
+protected:
+ struct Locals {
+ size_t statesCount;
+ size_t initial;
+ } m;
+
using BufferType = TArrayHolder<char>;
BufferType m_buffer;
-
- Transition* m_transitions;
-
- inline static const SimpleScanner& Null()
- {
- static const SimpleScanner n = Fsm::MakeFalse().Compile<SimpleScanner>();
- return n;
- }
-
+
+ Transition* m_transitions;
+
+ inline static const SimpleScanner& Null()
+ {
+ static const SimpleScanner n = Fsm::MakeFalse().Compile<SimpleScanner>();
+ return n;
+ }
+
static ypair<const size_t*, const size_t*> Accept()
{
static size_t v[1] = { 0 };
@@ -197,65 +197,65 @@ protected:
return ymake_pair(v, v);
}
- /*
- * Initializes pointers depending on buffer start, letters and states count
- */
- void Markup(void* ptr)
- {
- m_transitions = reinterpret_cast<Transition*>(ptr);
- }
-
- void SetJump(size_t oldState, Char c, size_t newState)
- {
+ /*
+ * Initializes pointers depending on buffer start, letters and states count
+ */
+ void Markup(void* ptr)
+ {
+ m_transitions = reinterpret_cast<Transition*>(ptr);
+ }
+
+ void SetJump(size_t oldState, Char c, size_t newState)
+ {
Y_ASSERT(m_buffer);
Y_ASSERT(oldState < m.statesCount);
Y_ASSERT(newState < m.statesCount);
- m_transitions[oldState * STATE_ROW_SIZE + 1 + c]
- = (((newState - oldState) * STATE_ROW_SIZE) * sizeof(Transition));
- }
-
- unsigned long RemapAction(unsigned long action) { return action; }
-
- void SetInitial(size_t state)
- {
+ m_transitions[oldState * STATE_ROW_SIZE + 1 + c]
+ = (((newState - oldState) * STATE_ROW_SIZE) * sizeof(Transition));
+ }
+
+ unsigned long RemapAction(unsigned long action) { return action; }
+
+ void SetInitial(size_t state)
+ {
Y_ASSERT(m_buffer);
- m.initial = reinterpret_cast<size_t>(m_transitions + state * STATE_ROW_SIZE + 1);
- }
-
- void SetTag(size_t state, size_t tag)
- {
+ m.initial = reinterpret_cast<size_t>(m_transitions + state * STATE_ROW_SIZE + 1);
+ }
+
+ void SetTag(size_t state, size_t tag)
+ {
Y_ASSERT(m_buffer);
- m_transitions[state * STATE_ROW_SIZE] = tag;
- }
-
-};
+ m_transitions[state * STATE_ROW_SIZE] = tag;
+ }
+
+};
inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
-{
+{
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
- fsm.Canonize();
+ fsm.Canonize();
- m.statesCount = fsm.Size();
+ m.statesCount = fsm.Size();
m_buffer = BufferType(new char[BufSize()]);
memset(m_buffer.Get(), 0, BufSize());
Markup(m_buffer.Get());
- m.initial = reinterpret_cast<size_t>(m_transitions + fsm.Initial() * STATE_ROW_SIZE + 1);
- for (size_t state = 0; state < fsm.Size(); ++state)
- SetTag(state, fsm.Tag(state) | (fsm.IsFinal(state) ? 1 : 0));
-
- for (size_t from = 0; from != fsm.Size(); ++from)
+ m.initial = reinterpret_cast<size_t>(m_transitions + fsm.Initial() * STATE_ROW_SIZE + 1);
+ for (size_t state = 0; state < fsm.Size(); ++state)
+ SetTag(state, fsm.Tag(state) | (fsm.IsFinal(state) ? 1 : 0));
+
+ for (size_t from = 0; from != fsm.Size(); ++from)
for (auto&& i : fsm.Letters()) {
const auto& tos = fsm.Destinations(from, i.first);
- if (tos.empty())
- continue;
+ if (tos.empty())
+ continue;
for (auto&& l : i.second.second)
for (auto&& to : tos)
SetJump(from, l, to);
- }
-}
-
-
-}
+ }
+}
+
-#endif
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 6adfcb8c1d..fa449bb1c5 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -1,29 +1,29 @@
-/*
- * slow.h -- definition of the SlowScanner
+/*
+ * slow.h -- definition of the SlowScanner
+ *
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
*
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_SCANNERS_SLOW_H
-#define PIRE_SCANNERS_SLOW_H
-
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_SCANNERS_SLOW_H
+#define PIRE_SCANNERS_SLOW_H
+
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/partition.h>
#include <contrib/libs/pire/pire/vbitset.h>
@@ -32,271 +32,271 @@
#include <contrib/libs/pire/pire/stub/saveload.h>
#include <contrib/libs/pire/pire/stub/stl.h>
-#include "common.h"
-
-#ifdef PIRE_DEBUG
-#include <iostream>
+#include "common.h"
+
+#ifdef PIRE_DEBUG
+#include <iostream>
#include <contrib/libs/pire/pire/stub/lexical_cast.h>
-#endif
-
-namespace Pire {
-
-/**
- * A 'slow' scanner.
- * Takes O( str.length() * this->m_states.size() ) time to scan string,
- * but does not require FSM to be deterministic.
- * Thus can be used to handle something sorta /x.{40}$/,
- * where deterministic FSM contains 2^40 states and hence cannot fit
- * in memory.
- */
-class SlowScanner {
-public:
- typedef size_t Transition;
- typedef ui16 Letter;
- typedef ui32 Action;
- typedef ui8 Tag;
-
+#endif
+
+namespace Pire {
+
+/**
+ * A 'slow' scanner.
+ * Takes O( str.length() * this->m_states.size() ) time to scan string,
+ * but does not require FSM to be deterministic.
+ * Thus can be used to handle something sorta /x.{40}$/,
+ * where deterministic FSM contains 2^40 states and hence cannot fit
+ * in memory.
+ */
+class SlowScanner {
+public:
+ typedef size_t Transition;
+ typedef ui16 Letter;
+ typedef ui32 Action;
+ typedef ui8 Tag;
+
enum {
- FinalFlag = 1,
- DeadFlag = 0
- };
-
- struct State {
+ FinalFlag = 1,
+ DeadFlag = 0
+ };
+
+ struct State {
TVector<unsigned> states;
- BitSet flags;
-
- State() {}
- State(size_t size): flags(size) { states.reserve(size); }
- void Swap(State& s) { states.swap(s.states); flags.Swap(s.flags); }
-
-#ifdef PIRE_DEBUG
- friend yostream& operator << (yostream& stream, const State& state) { return stream << Join(state.states.begin(), state.states.end(), ", "); }
-#endif
- };
-
+ BitSet flags;
+
+ State() {}
+ State(size_t size): flags(size) { states.reserve(size); }
+ void Swap(State& s) { states.swap(s.states); flags.Swap(s.flags); }
+
+#ifdef PIRE_DEBUG
+ friend yostream& operator << (yostream& stream, const State& state) { return stream << Join(state.states.begin(), state.states.end(), ", "); }
+#endif
+ };
+
SlowScanner(bool needActions = false) {
Alias(Null());
need_actions = needActions;
}
-
+
size_t GetLettersCount() const {return m.lettersCount; };
size_t Size() const { return GetSize(); }
size_t GetSize() const { return m.statesCount; }
- bool Empty() const { return m_finals == Null().m_finals; }
-
- size_t Id() const {return (size_t) -1;}
- size_t RegexpsCount() const { return Empty() ? 0 : 1; }
-
- void Initialize(State& state) const
- {
- state.states.clear();
- state.states.reserve(m.statesCount);
- state.states.push_back(m.start);
- BitSet(m.statesCount).Swap(state.flags);
- }
-
+ bool Empty() const { return m_finals == Null().m_finals; }
+
+ size_t Id() const {return (size_t) -1;}
+ size_t RegexpsCount() const { return Empty() ? 0 : 1; }
+
+ void Initialize(State& state) const
+ {
+ state.states.clear();
+ state.states.reserve(m.statesCount);
+ state.states.push_back(m.start);
+ BitSet(m.statesCount).Swap(state.flags);
+ }
+
Char Translate(Char ch) const
- {
+ {
return m_letters[static_cast<size_t>(ch)];
}
Action NextTranslated(const State& current, State& next, Char l) const
{
- next.flags.Clear();
- next.states.clear();
+ next.flags.Clear();
+ next.states.clear();
for (auto&& state : current.states) {
- const unsigned* begin = 0;
- const unsigned* end = 0;
- if (!m_vecptr) {
+ const unsigned* begin = 0;
+ const unsigned* end = 0;
+ if (!m_vecptr) {
const size_t* pos = m_jumpPos + state * m.lettersCount + l;
- begin = m_jumps + pos[0];
- end = m_jumps + pos[1];
- } else {
+ begin = m_jumps + pos[0];
+ end = m_jumps + pos[1];
+ } else {
const auto& v = (*m_vecptr)[state * m.lettersCount + l];
- if (!v.empty()) {
- begin = &v[0];
- end = &v[0] + v.size();
- }
- }
-
- for (; begin != end; ++begin)
- if (!next.flags.Test(*begin)) {
- next.flags.Set(*begin);
- next.states.push_back(*begin);
- }
- }
-
- return 0;
- }
-
+ if (!v.empty()) {
+ begin = &v[0];
+ end = &v[0] + v.size();
+ }
+ }
+
+ for (; begin != end; ++begin)
+ if (!next.flags.Test(*begin)) {
+ next.flags.Set(*begin);
+ next.states.push_back(*begin);
+ }
+ }
+
+ return 0;
+ }
+
Action Next(const State& current, State& next, Char c) const
{
return NextTranslated(current, next, Translate(c));
}
- bool TakeAction(State&, Action) const { return false; }
-
+ bool TakeAction(State&, Action) const { return false; }
+
Action NextTranslated(State& s, Char l) const
- {
- State dest(m.statesCount);
+ {
+ State dest(m.statesCount);
Action a = NextTranslated(s, dest, l);
- s.Swap(dest);
- return a;
- }
-
+ s.Swap(dest);
+ return a;
+ }
+
Action Next(State& s, Char c) const
{
return NextTranslated(s, Translate(c));
}
- bool Final(const State& s) const
- {
+ bool Final(const State& s) const
+ {
for (auto&& state : s.states)
if (m_finals[state])
- return true;
- return false;
- }
-
- bool Dead(const State&) const
- {
- return false;
- }
-
- ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const {
- return Final(s) ? Accept() : Deny();
- }
-
- bool CanStop(const State& s) const {
- return Final(s);
- }
-
- const void* Mmap(const void* ptr, size_t size)
- {
- Impl::CheckAlign(ptr);
- SlowScanner s;
- const size_t* p = reinterpret_cast<const size_t*>(ptr);
-
+ return true;
+ return false;
+ }
+
+ bool Dead(const State&) const
+ {
+ return false;
+ }
+
+ ypair<const size_t*, const size_t*> AcceptedRegexps(const State& s) const {
+ return Final(s) ? Accept() : Deny();
+ }
+
+ bool CanStop(const State& s) const {
+ return Final(s);
+ }
+
+ const void* Mmap(const void* ptr, size_t size)
+ {
+ Impl::CheckAlign(ptr);
+ SlowScanner s;
+ const size_t* p = reinterpret_cast<const size_t*>(ptr);
+
Impl::ValidateHeader(p, size, ScannerIOTypes::SlowScanner, sizeof(s.m));
- Locals* locals;
- Impl::MapPtr(locals, 1, p, size);
- memcpy(&s.m, locals, sizeof(s.m));
-
- bool empty = *((const bool*) p);
- Impl::AdvancePtr(p, size, sizeof(empty));
- Impl::AlignPtr(p, size);
-
- if (empty)
- s.Alias(Null());
- else {
- s.m_vecptr = 0;
- Impl::MapPtr(s.m_letters, MaxChar, p, size);
- Impl::MapPtr(s.m_finals, s.m.statesCount, p, size);
- Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size);
- Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
+ Locals* locals;
+ Impl::MapPtr(locals, 1, p, size);
+ memcpy(&s.m, locals, sizeof(s.m));
+
+ bool empty = *((const bool*) p);
+ Impl::AdvancePtr(p, size, sizeof(empty));
+ Impl::AlignPtr(p, size);
+
+ if (empty)
+ s.Alias(Null());
+ else {
+ s.m_vecptr = 0;
+ Impl::MapPtr(s.m_letters, MaxChar, p, size);
+ Impl::MapPtr(s.m_finals, s.m.statesCount, p, size);
+ Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size);
+ Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
if (need_actions)
Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
- Swap(s);
- }
- return (const void*) p;
- }
-
- void Swap(SlowScanner& s)
- {
- DoSwap(m_finals, s.m_finals);
- DoSwap(m_jumps, s.m_jumps);
+ Swap(s);
+ }
+ return (const void*) p;
+ }
+
+ void Swap(SlowScanner& s)
+ {
+ DoSwap(m_finals, s.m_finals);
+ DoSwap(m_jumps, s.m_jumps);
DoSwap(m_actions, s.m_actions);
- DoSwap(m_jumpPos, s.m_jumpPos);
- DoSwap(m.statesCount, s.m.statesCount);
- DoSwap(m.lettersCount, s.m.lettersCount);
- DoSwap(m.start, s.m.start);
- DoSwap(m_letters, s.m_letters);
- DoSwap(m_pool, s.m_pool);
- DoSwap(m_vec, s.m_vec);
-
- DoSwap(m_vecptr, s.m_vecptr);
+ DoSwap(m_jumpPos, s.m_jumpPos);
+ DoSwap(m.statesCount, s.m.statesCount);
+ DoSwap(m.lettersCount, s.m.lettersCount);
+ DoSwap(m.start, s.m.start);
+ DoSwap(m_letters, s.m_letters);
+ DoSwap(m_pool, s.m_pool);
+ DoSwap(m_vec, s.m_vec);
+
+ DoSwap(m_vecptr, s.m_vecptr);
DoSwap(need_actions, s.need_actions);
DoSwap(m_actionsvec, s.m_actionsvec);
- if (m_vecptr == &s.m_vec)
- m_vecptr = &m_vec;
- if (s.m_vecptr == &m_vec)
- s.m_vecptr = &s.m_vec;
- }
-
- SlowScanner(const SlowScanner& s)
- : m(s.m)
- , m_vec(s.m_vec)
+ if (m_vecptr == &s.m_vec)
+ m_vecptr = &m_vec;
+ if (s.m_vecptr == &m_vec)
+ s.m_vecptr = &s.m_vec;
+ }
+
+ SlowScanner(const SlowScanner& s)
+ : m(s.m)
+ , m_vec(s.m_vec)
, need_actions(s.need_actions)
, m_actionsvec(s.m_actionsvec)
- {
- if (s.m_vec.empty()) {
- // Empty or mmap()-ed scanner, just copy pointers
- m_finals = s.m_finals;
- m_jumps = s.m_jumps;
+ {
+ if (s.m_vec.empty()) {
+ // Empty or mmap()-ed scanner, just copy pointers
+ m_finals = s.m_finals;
+ m_jumps = s.m_jumps;
m_actions = s.m_actions;
- m_jumpPos = s.m_jumpPos;
- m_letters = s.m_letters;
- m_vecptr = 0;
- } else {
- // In-memory scanner, perform deep copy
- alloc(m_letters, MaxChar);
- memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar);
- m_jumps = 0;
- m_jumpPos = 0;
+ m_jumpPos = s.m_jumpPos;
+ m_letters = s.m_letters;
+ m_vecptr = 0;
+ } else {
+ // In-memory scanner, perform deep copy
+ alloc(m_letters, MaxChar);
+ memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar);
+ m_jumps = 0;
+ m_jumpPos = 0;
m_actions = 0;
- alloc(m_finals, m.statesCount);
- memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount);
- m_vecptr = &m_vec;
- }
- }
+ alloc(m_finals, m.statesCount);
+ memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount);
+ m_vecptr = &m_vec;
+ }
+ }
explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
: need_actions(needActions)
- {
+ {
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
if (removeEpsilons)
fsm.RemoveEpsilons();
fsm.Sparse(!removeEpsilons);
-
- m.statesCount = fsm.Size();
- m.lettersCount = fsm.Letters().Size();
-
- m_vec.resize(m.statesCount * m.lettersCount);
+
+ m.statesCount = fsm.Size();
+ m.lettersCount = fsm.Letters().Size();
+
+ m_vec.resize(m.statesCount * m.lettersCount);
if (need_actions)
m_actionsvec.resize(m.statesCount * m.lettersCount);
- m_vecptr = &m_vec;
- alloc(m_letters, MaxChar);
- m_jumps = 0;
+ m_vecptr = &m_vec;
+ alloc(m_letters, MaxChar);
+ m_jumps = 0;
m_actions = 0;
- m_jumpPos = 0;
- alloc(m_finals, m.statesCount);
-
- // Build letter translation table
+ m_jumpPos = 0;
+ alloc(m_finals, m.statesCount);
+
+ // Build letter translation table
Fill(m_letters, m_letters + MaxChar, 0);
for (auto&& letter : fsm.Letters())
for (auto&& character : letter.second.second)
m_letters[character] = letter.second.first;
-
- m.start = fsm.Initial();
- BuildScanner(fsm, *this);
- }
-
-
- SlowScanner& operator = (const SlowScanner& s) { SlowScanner(s).Swap(*this); return *this; }
-
- ~SlowScanner()
- {
+
+ m.start = fsm.Initial();
+ BuildScanner(fsm, *this);
+ }
+
+
+ SlowScanner& operator = (const SlowScanner& s) { SlowScanner(s).Swap(*this); return *this; }
+
+ ~SlowScanner()
+ {
for (auto&& i : m_pool)
free(i);
- }
-
- void Save(yostream*) const;
- void Load(yistream*);
-
- const State& StateIndex(const State& s) const { return s; }
-
+ }
+
+ void Save(yostream*) const;
+ void Load(yistream*);
+
+ const State& StateIndex(const State& s) const { return s; }
+
protected:
bool IsMmaped() const
{
@@ -338,84 +338,84 @@ protected:
return m_finals[pos];
}
-private:
-
- struct Locals {
- size_t statesCount;
- size_t lettersCount;
- size_t start;
- } m;
-
- bool* m_finals;
- unsigned* m_jumps;
+private:
+
+ struct Locals {
+ size_t statesCount;
+ size_t lettersCount;
+ size_t start;
+ } m;
+
+ bool* m_finals;
+ unsigned* m_jumps;
Action* m_actions;
- size_t* m_jumpPos;
- size_t* m_letters;
-
+ size_t* m_jumpPos;
+ size_t* m_letters;
+
TVector<void*> m_pool;
TVector< TVector<unsigned> > m_vec, *m_vecptr;
-
+
bool need_actions;
TVector<TVector<Action>> m_actionsvec;
static const SlowScanner& Null();
-
- template<class T> void alloc(T*& p, size_t size)
- {
- p = static_cast<T*>(malloc(size * sizeof(T)));
- memset(p, 0, size * sizeof(T));
- m_pool.push_back(p);
- }
-
- void Alias(const SlowScanner& s)
+
+ template<class T> void alloc(T*& p, size_t size)
+ {
+ p = static_cast<T*>(malloc(size * sizeof(T)));
+ memset(p, 0, size * sizeof(T));
+ m_pool.push_back(p);
+ }
+
+ void Alias(const SlowScanner& s)
{
- memcpy(&m, &s.m, sizeof(m));
- m_vec.clear();
+ memcpy(&m, &s.m, sizeof(m));
+ m_vec.clear();
need_actions = s.need_actions;
m_actionsvec.clear();
- m_finals = s.m_finals;
- m_jumps = s.m_jumps;
+ m_finals = s.m_finals;
+ m_jumps = s.m_jumps;
m_actions = s.m_actions;
- m_jumpPos = s.m_jumpPos;
- m_letters = s.m_letters;
- m_vecptr = s.m_vecptr;
- m_pool.clear();
- }
+ m_jumpPos = s.m_jumpPos;
+ m_letters = s.m_letters;
+ m_vecptr = s.m_vecptr;
+ m_pool.clear();
+ }
void SetJump(size_t oldState, Char c, size_t newState, unsigned long action)
- {
+ {
Y_ASSERT(!m_vec.empty());
Y_ASSERT(oldState < m.statesCount);
Y_ASSERT(newState < m.statesCount);
-
- size_t idx = oldState * m.lettersCount + m_letters[c];
- m_vec[idx].push_back(newState);
+
+ size_t idx = oldState * m.lettersCount + m_letters[c];
+ m_vec[idx].push_back(newState);
if (need_actions)
m_actionsvec[idx].push_back(action);
- }
-
- unsigned long RemapAction(unsigned long action) { return action; }
-
- void SetInitial(size_t state) { m.start = state; }
- void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); }
-
- void FinishBuild() {}
-
- static ypair<const size_t*, const size_t*> Accept()
- {
- static size_t v[1] = { 0 };
-
- return ymake_pair(v, v + 1);
- }
-
- static ypair<const size_t*, const size_t*> Deny()
- {
- static size_t v[1] = { 0 };
- return ymake_pair(v, v);
- }
-
- friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&);
-};
-
+ }
+
+ unsigned long RemapAction(unsigned long action) { return action; }
+
+ void SetInitial(size_t state) { m.start = state; }
+ void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); }
+
+ void FinishBuild() {}
+
+ static ypair<const size_t*, const size_t*> Accept()
+ {
+ static size_t v[1] = { 0 };
+
+ return ymake_pair(v, v + 1);
+ }
+
+ static ypair<const size_t*, const size_t*> Deny()
+ {
+ static size_t v[1] = { 0 };
+ return ymake_pair(v, v);
+ }
+
+ friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&);
+};
+
template<>
inline SlowScanner Fsm::Compile(size_t distance) {
return SlowScanner(*this, false, true, distance);
@@ -428,27 +428,27 @@ inline const SlowScanner& SlowScanner::Null()
}
#ifndef PIRE_DEBUG
-/// A specialization of Run(), since its state is much heavier than other ones
-/// and we thus want to avoid copying states.
-template<>
+/// A specialization of Run(), since its state is much heavier than other ones
+/// and we thus want to avoid copying states.
+template<>
inline void Run<SlowScanner>(const SlowScanner& scanner, SlowScanner::State& state, TStringBuf str)
-{
- SlowScanner::State temp;
- scanner.Initialize(temp);
-
- SlowScanner::State* src = &state;
- SlowScanner::State* dest = &temp;
-
+{
+ SlowScanner::State temp;
+ scanner.Initialize(temp);
+
+ SlowScanner::State* src = &state;
+ SlowScanner::State* dest = &temp;
+
for (auto it = str.begin(); it != str.end(); ++it) {
scanner.Next(*src, *dest, static_cast<unsigned char>(*it));
- DoSwap(src, dest);
- }
- if (src != &state)
- state = *src;
-}
-#endif
-
-}
-
-
-#endif
+ DoSwap(src, dest);
+ }
+ if (src != &state)
+ state = *src;
+}
+#endif
+
+}
+
+
+#endif
diff --git a/contrib/libs/pire/pire/static_assert.h b/contrib/libs/pire/pire/static_assert.h
index 90dd0ff4f0..f56a899ae7 100644
--- a/contrib/libs/pire/pire/static_assert.h
+++ b/contrib/libs/pire/pire/static_assert.h
@@ -1,36 +1,36 @@
-/*
- * static_assert.h -- compile-time assertions
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * static_assert.h -- compile-time assertions
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-#ifndef PIRE_ASSERT_H_INCLUDED
-#define PIRE_ASSERT_H_INCLUDED
-
-namespace Pire { namespace Impl {
-
- // A static (compile-tile) assertion.
- // The idea was shamelessly borrowed from Boost.
- template<bool x> struct StaticAssertion;
- template<> struct StaticAssertion<true> {};
-#define PIRE_STATIC_ASSERT(x) \
- enum { PireStaticAssertion ## __LINE__ = sizeof(Pire::Impl::StaticAssertion<(bool) (x)>) }
-}}
-
-#endif
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+#ifndef PIRE_ASSERT_H_INCLUDED
+#define PIRE_ASSERT_H_INCLUDED
+
+namespace Pire { namespace Impl {
+
+ // A static (compile-tile) assertion.
+ // The idea was shamelessly borrowed from Boost.
+ template<bool x> struct StaticAssertion;
+ template<> struct StaticAssertion<true> {};
+#define PIRE_STATIC_ASSERT(x) \
+ enum { PireStaticAssertion ## __LINE__ = sizeof(Pire::Impl::StaticAssertion<(bool) (x)>) }
+}}
+
+#endif
diff --git a/contrib/libs/pire/pire/stub/defaults.h b/contrib/libs/pire/pire/stub/defaults.h
index 561c23251b..50900a8731 100644
--- a/contrib/libs/pire/pire/stub/defaults.h
+++ b/contrib/libs/pire/pire/stub/defaults.h
@@ -1,3 +1,3 @@
-#include <util/system/defaults.h>
-#include <errno.h>
-
+#include <util/system/defaults.h>
+#include <errno.h>
+
diff --git a/contrib/libs/pire/pire/stub/hacks.h b/contrib/libs/pire/pire/stub/hacks.h
index 07319b0b37..6172a0ee2a 100644
--- a/contrib/libs/pire/pire/stub/hacks.h
+++ b/contrib/libs/pire/pire/stub/hacks.h
@@ -1,7 +1,7 @@
-#ifndef PIRE_STUB_USE_PIRE_H_INCLUDED
-#define PIRE_STUB_USE_PIRE_H_INCLUDED
-
-namespace Pire {}
-using namespace Pire;
-
-#endif
+#ifndef PIRE_STUB_USE_PIRE_H_INCLUDED
+#define PIRE_STUB_USE_PIRE_H_INCLUDED
+
+namespace Pire {}
+using namespace Pire;
+
+#endif
diff --git a/contrib/libs/pire/pire/stub/lexical_cast.h b/contrib/libs/pire/pire/stub/lexical_cast.h
index a060c4dddb..68d6dc92ae 100644
--- a/contrib/libs/pire/pire/stub/lexical_cast.h
+++ b/contrib/libs/pire/pire/stub/lexical_cast.h
@@ -1 +1 @@
-#include <util/string/cast.h>
+#include <util/string/cast.h>
diff --git a/contrib/libs/pire/pire/stub/memstreams.h b/contrib/libs/pire/pire/stub/memstreams.h
index 92c75ca6c9..5cfd9a7896 100644
--- a/contrib/libs/pire/pire/stub/memstreams.h
+++ b/contrib/libs/pire/pire/stub/memstreams.h
@@ -1,11 +1,11 @@
-#include <util/stream/mem.h>
-#include <util/stream/aligned.h>
-#include <util/stream/buffer.h>
-#include <util/generic/buffer.h>
-
-namespace Pire {
- typedef TBuffer Buffer;
- typedef TBuffer::TIterator BufferIterator;
- typedef TBufferOutput BufferOutput;
- typedef TAlignedOutput AlignedOutput;
+#include <util/stream/mem.h>
+#include <util/stream/aligned.h>
+#include <util/stream/buffer.h>
+#include <util/generic/buffer.h>
+
+namespace Pire {
+ typedef TBuffer Buffer;
+ typedef TBuffer::TIterator BufferIterator;
+ typedef TBufferOutput BufferOutput;
+ typedef TAlignedOutput AlignedOutput;
}
diff --git a/contrib/libs/pire/pire/stub/noncopyable.h b/contrib/libs/pire/pire/stub/noncopyable.h
index ab18546e51..1791f43638 100644
--- a/contrib/libs/pire/pire/stub/noncopyable.h
+++ b/contrib/libs/pire/pire/stub/noncopyable.h
@@ -1,5 +1,5 @@
-#pragma once
-#include <util/generic/noncopyable.h>
-namespace Pire {
- typedef TNonCopyable NonCopyable;
-}
+#pragma once
+#include <util/generic/noncopyable.h>
+namespace Pire {
+ typedef TNonCopyable NonCopyable;
+}
diff --git a/contrib/libs/pire/pire/stub/saveload.h b/contrib/libs/pire/pire/stub/saveload.h
index 97768ff463..6808c7a400 100644
--- a/contrib/libs/pire/pire/stub/saveload.h
+++ b/contrib/libs/pire/pire/stub/saveload.h
@@ -1,2 +1,2 @@
-#pragma once
-#include <util/ysaveload.h>
+#pragma once
+#include <util/ysaveload.h>
diff --git a/contrib/libs/pire/pire/stub/singleton.h b/contrib/libs/pire/pire/stub/singleton.h
index f24e924460..193817f100 100644
--- a/contrib/libs/pire/pire/stub/singleton.h
+++ b/contrib/libs/pire/pire/stub/singleton.h
@@ -1,8 +1,8 @@
-#pragma once
-#include <util/generic/singleton.h>
-namespace Pire {
- template<class T>
- const T& DefaultValue() {
- return Default<T>();
- }
-}
+#pragma once
+#include <util/generic/singleton.h>
+namespace Pire {
+ template<class T>
+ const T& DefaultValue() {
+ return Default<T>();
+ }
+}
diff --git a/contrib/libs/pire/pire/stub/stl.h b/contrib/libs/pire/pire/stub/stl.h
index 98ebd9f7c6..705981a7e6 100644
--- a/contrib/libs/pire/pire/stub/stl.h
+++ b/contrib/libs/pire/pire/stub/stl.h
@@ -1,66 +1,66 @@
-#ifndef PIRE_COMPAT_H_INCLUDED
-#define PIRE_COMPAT_H_INCLUDED
-
-#include <bitset>
-#include <algorithm>
-#include <iterator>
-#include <functional>
-#include <utility>
-#include <memory>
-
+#ifndef PIRE_COMPAT_H_INCLUDED
+#define PIRE_COMPAT_H_INCLUDED
+
+#include <bitset>
+#include <algorithm>
+#include <iterator>
+#include <functional>
+#include <utility>
+#include <memory>
+
#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/generic/deque.h>
-#include <util/generic/list.h>
-#include <util/generic/map.h>
-#include <util/generic/set.h>
-#include <util/generic/hash.h>
-#include <util/generic/hash_set.h>
-#include <util/generic/ptr.h>
-#include <util/generic/yexception.h>
-#include <util/generic/utility.h>
-#include <util/generic/algorithm.h>
-#include <util/stream/input.h>
-#include <util/stream/output.h>
+#include <util/generic/vector.h>
+#include <util/generic/deque.h>
+#include <util/generic/list.h>
+#include <util/generic/map.h>
+#include <util/generic/set.h>
+#include <util/generic/hash.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/ptr.h>
+#include <util/generic/yexception.h>
+#include <util/generic/utility.h>
+#include <util/generic/algorithm.h>
+#include <util/stream/input.h>
+#include <util/stream/output.h>
#include <util/string/reverse.h>
-#include <util/string/vector.h>
-
+#include <util/string/vector.h>
+
namespace Pire {
using ystring = TString;
template<size_t N> using ybitset = std::bitset<N>;
template<typename T1, typename T2> using ypair = std::pair<T1, T2>;
template<typename T> using yauto_ptr = std::auto_ptr<T>;
template<typename Arg1, typename Arg2, typename Result> using ybinary_function = std::binary_function<Arg1, Arg2, Result>;
-
+
template<typename T1, typename T2>
inline ypair<T1, T2> ymake_pair(T1 v1, T2 v2) {
return std::make_pair(v1, v2);
}
-
+
template<typename T>
inline T ymax(T v1, T v2) {
return std::max(v1, v2);
}
-
+
template<typename T>
inline T ymin(T v1, T v2) {
return std::min(v1, v2);
}
-
+
template<class Iter, class T>
void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); }
-
+
class Error: public yexception {
public:
Error(const char* msg) { *this << msg; }
Error(const ystring& msg) { *this << msg; }
};
-
+
typedef IOutputStream yostream;
typedef IInputStream yistream;
template<class Iter>
ystring Join(Iter begin, Iter end, const ystring& separator) { return JoinStrings(begin, end, separator); }
-}
-
-#endif
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/stub/utf8.h b/contrib/libs/pire/pire/stub/utf8.h
index 51ea0479d4..189520d2cb 100644
--- a/contrib/libs/pire/pire/stub/utf8.h
+++ b/contrib/libs/pire/pire/stub/utf8.h
@@ -1,7 +1,7 @@
#pragma once
#include <library/cpp/charset/codepage.h>
-#include <util/charset/unidata.h>
+#include <util/charset/unidata.h>
inline wchar32 to_lower(wchar32 c) {
return ToLower(c);
diff --git a/contrib/libs/pire/pire/vbitset.h b/contrib/libs/pire/pire/vbitset.h
index 69cb5aeba3..904c27d1cb 100644
--- a/contrib/libs/pire/pire/vbitset.h
+++ b/contrib/libs/pire/pire/vbitset.h
@@ -1,120 +1,120 @@
-/*
- * vbitset.h -- a bitset of variable size.
- *
- * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
- * Alexander Gololobov <agololobov@gmail.com>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
+/*
+ * vbitset.h -- a bitset of variable size.
*
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#ifndef PIRE_VBITSET_H
-#define PIRE_VBITSET_H
-
-
-#include <string.h>
-
+ * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
+ * Alexander Gololobov <agololobov@gmail.com>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#ifndef PIRE_VBITSET_H
+#define PIRE_VBITSET_H
+
+
+#include <string.h>
+
#include <contrib/libs/pire/pire/stub/stl.h>
-namespace Pire {
-
-#ifdef _DEBUG
-#define VBITSET_CHECK_SIZE(x) CheckSize(x)
-#else
-#define VBITSET_CHECK_SIZE(x) x
-#endif
-
-/// A bitset with variable width
-class BitSet {
-public:
- typedef size_t value_type;
- typedef size_t* pointer;
- typedef size_t& reference;
- typedef const size_t& const_reference;
-
- class const_iterator;
-
- BitSet()
- : m_data(1, 1)
- {
- }
- BitSet(size_t size)
- : m_data(RoundUp(size + 1) + 1)
- , m_size(size)
- {
- m_data[RoundDown(size)] |= (1U << Remainder(size));
- }
-
- void Swap(BitSet& s)
- {
- m_data.swap(s.m_data);
- DoSwap(m_size, s.m_size);
- }
-
- /// Sets the specified bit to 1.
- void Set(size_t pos) {
- m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] |= (1U << Remainder(pos));
- }
-
- /// Resets the specified bit to 0.
- void Reset(size_t pos) {
- m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] &= ~(1U << Remainder(pos));
- }
-
- /// Checks whether the specified bit is set to 1.
- bool Test(size_t pos) const {
- return (m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] & (1U << Remainder(pos))) != 0;
- }
-
- size_t Size() const {
- return m_size;
- }
-
- void Resize(size_t newsize)
- {
- m_data.resize(RoundUp(newsize + 1));
- if (Remainder(newsize) && !m_data.empty())
- m_data[m_data.size() - 1] &= ((1U << Remainder(newsize)) - 1); // Clear tail
- m_data[RoundDown(newsize)] |= (1U << Remainder(newsize));
- }
-
- /// Resets all bits to 0.
- void Clear() { memset(&m_data[0], 0, m_data.size() * sizeof(ContainerType)); }
-
-private:
- typedef unsigned char ContainerType;
- static const size_t ItemSize = sizeof(ContainerType) * 8;
+namespace Pire {
+
+#ifdef _DEBUG
+#define VBITSET_CHECK_SIZE(x) CheckSize(x)
+#else
+#define VBITSET_CHECK_SIZE(x) x
+#endif
+
+/// A bitset with variable width
+class BitSet {
+public:
+ typedef size_t value_type;
+ typedef size_t* pointer;
+ typedef size_t& reference;
+ typedef const size_t& const_reference;
+
+ class const_iterator;
+
+ BitSet()
+ : m_data(1, 1)
+ {
+ }
+ BitSet(size_t size)
+ : m_data(RoundUp(size + 1) + 1)
+ , m_size(size)
+ {
+ m_data[RoundDown(size)] |= (1U << Remainder(size));
+ }
+
+ void Swap(BitSet& s)
+ {
+ m_data.swap(s.m_data);
+ DoSwap(m_size, s.m_size);
+ }
+
+ /// Sets the specified bit to 1.
+ void Set(size_t pos) {
+ m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] |= (1U << Remainder(pos));
+ }
+
+ /// Resets the specified bit to 0.
+ void Reset(size_t pos) {
+ m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] &= ~(1U << Remainder(pos));
+ }
+
+ /// Checks whether the specified bit is set to 1.
+ bool Test(size_t pos) const {
+ return (m_data[RoundDown(VBITSET_CHECK_SIZE(pos))] & (1U << Remainder(pos))) != 0;
+ }
+
+ size_t Size() const {
+ return m_size;
+ }
+
+ void Resize(size_t newsize)
+ {
+ m_data.resize(RoundUp(newsize + 1));
+ if (Remainder(newsize) && !m_data.empty())
+ m_data[m_data.size() - 1] &= ((1U << Remainder(newsize)) - 1); // Clear tail
+ m_data[RoundDown(newsize)] |= (1U << Remainder(newsize));
+ }
+
+ /// Resets all bits to 0.
+ void Clear() { memset(&m_data[0], 0, m_data.size() * sizeof(ContainerType)); }
+
+private:
+ typedef unsigned char ContainerType;
+ static const size_t ItemSize = sizeof(ContainerType) * 8;
TVector<ContainerType> m_data;
- size_t m_size;
-
- static size_t RoundUp(size_t x) { return x / ItemSize + ((x % ItemSize) ? 1 : 0); }
- static size_t RoundDown(size_t x) { return x / ItemSize; }
- static size_t Remainder(size_t x) { return x % ItemSize; }
-
-#ifdef _DEBUG
- size_t CheckSize(size_t size) const
- {
- if (size < m_size)
- return size;
- else
- throw Error("BitSet: subscript out of range");
- }
-#endif
-};
-
-}
-
-#endif
+ size_t m_size;
+
+ static size_t RoundUp(size_t x) { return x / ItemSize + ((x % ItemSize) ? 1 : 0); }
+ static size_t RoundDown(size_t x) { return x / ItemSize; }
+ static size_t Remainder(size_t x) { return x % ItemSize; }
+
+#ifdef _DEBUG
+ size_t CheckSize(size_t size) const
+ {
+ if (size < m_size)
+ return size;
+ else
+ throw Error("BitSet: subscript out of range");
+ }
+#endif
+};
+
+}
+
+#endif