aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfagraph
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
treed5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/nfagraph
parent1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
downloadydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph')
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng.cpp934
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng.h192
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp1248
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp1006
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_asserts.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp496
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_builder.h184
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp698
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h96
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp458
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp628
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_depth.h146
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp142
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dominators.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dump.h328
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp1014
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h130
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp1076
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h94
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp636
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_execute.h134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp236
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp1290
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_extparam.h86
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp276
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h92
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp1324
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_haig.h128
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp160
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_holder.h298
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp402
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h120
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp586
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_lbr.h114
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp904
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex.h238
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp962
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h138
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp1316
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h146
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp422
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp476
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h104
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp984
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h162
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h274
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp1062
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h148
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp402
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_netflow.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp652
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp802
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prune.h150
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp1106
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_puff.h112
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp1742
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h108
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region.cpp764
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region.h412
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp516
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp4638
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_repeat.h314
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp170
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_reports.h112
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp328
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h114
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp592
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_revacc.h130
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp186
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_sep.h92
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp508
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h100
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som.cpp5940
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som.h144
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp390
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h94
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp660
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_util.h160
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_split.cpp392
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_split.h128
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp1228
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_squash.h124
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp368
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_stop.h120
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp870
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp558
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_utf8.h108
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_util.cpp996
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_util.h416
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp242
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_width.cpp432
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_width.h148
102 files changed, 25466 insertions, 25466 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
index 8dccf9863d..6545d55ac0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
@@ -1,229 +1,229 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief NG and graph handling.
- */
+ */
#include "ng.h"
-#include "grey.h"
-#include "ng_anchored_acyclic.h"
-#include "ng_anchored_dots.h"
-#include "ng_asserts.h"
-#include "ng_calc_components.h"
-#include "ng_cyclic_redundancy.h"
-#include "ng_dump.h"
-#include "ng_edge_redundancy.h"
-#include "ng_equivalence.h"
-#include "ng_extparam.h"
-#include "ng_fixed_width.h"
+#include "grey.h"
+#include "ng_anchored_acyclic.h"
+#include "ng_anchored_dots.h"
+#include "ng_asserts.h"
+#include "ng_calc_components.h"
+#include "ng_cyclic_redundancy.h"
+#include "ng_dump.h"
+#include "ng_edge_redundancy.h"
+#include "ng_equivalence.h"
+#include "ng_extparam.h"
+#include "ng_fixed_width.h"
#include "ng_fuzzy.h"
-#include "ng_haig.h"
-#include "ng_literal_component.h"
-#include "ng_literal_decorated.h"
-#include "ng_misc_opt.h"
-#include "ng_puff.h"
-#include "ng_prefilter.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_region_redundancy.h"
-#include "ng_reports.h"
-#include "ng_sep.h"
-#include "ng_small_literal_set.h"
-#include "ng_som.h"
-#include "ng_vacuous.h"
+#include "ng_haig.h"
+#include "ng_literal_component.h"
+#include "ng_literal_decorated.h"
+#include "ng_misc_opt.h"
+#include "ng_puff.h"
+#include "ng_prefilter.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_region_redundancy.h"
+#include "ng_reports.h"
+#include "ng_sep.h"
+#include "ng_small_literal_set.h"
+#include "ng_som.h"
+#include "ng_vacuous.h"
#include "ng_violet.h"
-#include "ng_utf8.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
+#include "ng_utf8.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "nfa/goughcompile.h"
+#include "nfa/goughcompile.h"
#include "rose/rose_build.h"
-#include "smallwrite/smallwrite_build.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/ue2string.h"
-
-using namespace std;
-
-namespace ue2 {
-
+#include "smallwrite/smallwrite_build.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/ue2string.h"
+
+using namespace std;
+
+namespace ue2 {
+
NG::NG(const CompileContext &in_cc, size_t num_patterns,
unsigned in_somPrecision)
- : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
- minWidth(depth::infinity()),
- rm(in_cc.grey),
- ssm(in_somPrecision),
- cc(in_cc),
+ : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
+ minWidth(depth::infinity()),
+ rm(in_cc.grey),
+ ssm(in_somPrecision),
+ cc(in_cc),
smwr(makeSmallWriteBuilder(num_patterns, rm, cc)),
rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) {
-}
-
-NG::~NG() {
- // empty
-}
-
-/** \brief SOM handling code, called by \ref addComponent.
- *
- * \return true if the component was handled completely by something (e.g. a
- * Haig outfix), false if SOM could be established but implementation via an
- * engine will be required.
- *
- * \throw CompileError if SOM cannot be supported for the component.
- */
-static
+}
+
+NG::~NG() {
+ // empty
+}
+
+/** \brief SOM handling code, called by \ref addComponent.
+ *
+ * \return true if the component was handled completely by something (e.g. a
+ * Haig outfix), false if SOM could be established but implementation via an
+ * engine will be required.
+ *
+ * \throw CompileError if SOM cannot be supported for the component.
+ */
+static
bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
- const som_type som, const u32 comp_id) {
- DEBUG_PRINTF("doing som\n");
+ const som_type som, const u32 comp_id) {
+ DEBUG_PRINTF("doing som\n");
dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
assert(allMatchStatesHaveReports(g));
-
- // First, we try the "SOM chain" support in ng_som.cpp.
-
+
+ // First, we try the "SOM chain" support in ng_som.cpp.
+
sombe_rv rv = doSom(ng, g, expr, comp_id, som);
- if (rv == SOMBE_HANDLED_INTERNAL) {
- return false;
- } else if (rv == SOMBE_HANDLED_ALL) {
- return true;
- }
- assert(rv == SOMBE_FAIL);
-
- /* Next, Sombe style approaches */
+ if (rv == SOMBE_HANDLED_INTERNAL) {
+ return false;
+ } else if (rv == SOMBE_HANDLED_ALL) {
+ return true;
+ }
+ assert(rv == SOMBE_FAIL);
+
+ /* Next, Sombe style approaches */
rv = doSomWithHaig(ng, g, expr, comp_id, som);
- if (rv == SOMBE_HANDLED_INTERNAL) {
- return false;
- } else if (rv == SOMBE_HANDLED_ALL) {
- return true;
- }
- assert(rv == SOMBE_FAIL);
-
- // If the previous approach could not support this pattern, we try treating
- // it monolithically, as a Haig outfix.
-
- vector<vector<CharReach> > triggers; /* empty for outfix */
-
- assert(g.kind == NFA_OUTFIX);
+ if (rv == SOMBE_HANDLED_INTERNAL) {
+ return false;
+ } else if (rv == SOMBE_HANDLED_ALL) {
+ return true;
+ }
+ assert(rv == SOMBE_FAIL);
+
+ // If the previous approach could not support this pattern, we try treating
+ // it monolithically, as a Haig outfix.
+
+ vector<vector<CharReach> > triggers; /* empty for outfix */
+
+ assert(g.kind == NFA_OUTFIX);
dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
makeReportsSomPass(ng.rm, g);
- auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey);
- if (haig) {
- DEBUG_PRINTF("built haig outfix\n");
- ng.rose->addOutfix(g, *haig);
- return true;
- }
-
- /* Our various strategies for supporting SOM for this pattern have failed.
- * Provide a generic pattern not supported/too large return value as it is
- * unclear what the meaning of a specific SOM error would be */
+ auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey);
+ if (haig) {
+ DEBUG_PRINTF("built haig outfix\n");
+ ng.rose->addOutfix(g, *haig);
+ return true;
+ }
+
+ /* Our various strategies for supporting SOM for this pattern have failed.
+ * Provide a generic pattern not supported/too large return value as it is
+ * unclear what the meaning of a specific SOM error would be */
throw CompileError(expr.index, "Pattern is too large.");
-
- assert(0); // unreachable
- return false;
-}
-
-void reduceGraph(NGHolder &g, som_type som, bool utf8,
- const CompileContext &cc) {
- if (!cc.grey.performGraphSimplification) {
- return;
- }
-
- // We run reduction passes until either the graph stops changing or we hit
- // a (small) limit.
-
- if (!som) {
- mergeCyclicDotStars(g);
- }
-
- const unsigned MAX_PASSES = 3;
- for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
- bool changed = false;
- DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
- changed |= removeEdgeRedundancy(g, som, cc);
- changed |= reduceGraphEquivalences(g, cc);
- changed |= removeRedundancy(g, som);
+
+ assert(0); // unreachable
+ return false;
+}
+
+void reduceGraph(NGHolder &g, som_type som, bool utf8,
+ const CompileContext &cc) {
+ if (!cc.grey.performGraphSimplification) {
+ return;
+ }
+
+ // We run reduction passes until either the graph stops changing or we hit
+ // a (small) limit.
+
+ if (!som) {
+ mergeCyclicDotStars(g);
+ }
+
+ const unsigned MAX_PASSES = 3;
+ for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
+ bool changed = false;
+ DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
+ changed |= removeEdgeRedundancy(g, som, cc);
+ changed |= reduceGraphEquivalences(g, cc);
+ changed |= removeRedundancy(g, som);
changed |= removeCyclicPathRedundancy(g);
- if (!changed) {
- DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
- break;
- }
- }
-
- if (utf8) {
- utf8DotRestoration(g, som);
- }
-
- /* Minor non-redundancy improvements */
- if (improveGraph(g, som)) {
- /* may be some more edges to remove */
- removeEdgeRedundancy(g, som, cc);
- }
-
- removeCyclicDominated(g, som);
-
- if (!som) {
- mergeCyclicDotStars(g);
- }
-
- if (!som) {
- removeSiblingsOfStartDotStar(g);
- }
-}
-
-static
+ if (!changed) {
+ DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
+ break;
+ }
+ }
+
+ if (utf8) {
+ utf8DotRestoration(g, som);
+ }
+
+ /* Minor non-redundancy improvements */
+ if (improveGraph(g, som)) {
+ /* may be some more edges to remove */
+ removeEdgeRedundancy(g, som, cc);
+ }
+
+ removeCyclicDominated(g, som);
+
+ if (!som) {
+ mergeCyclicDotStars(g);
+ }
+
+ if (!som) {
+ removeSiblingsOfStartDotStar(g);
+ }
+}
+
+static
bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
- const CompileContext &cc = ng.cc;
+ const CompileContext &cc = ng.cc;
assert(hasCorrectlyNumberedVertices(g));
-
- DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
+
+ DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
expr.index, comp_id, num_vertices(g), num_edges(g));
-
+
dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
-
+
assert(allMatchStatesHaveReports(g));
-
+
reduceExtendedParams(g, ng.rm, som);
reduceGraph(g, som, expr.utf8, cc);
-
+
dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
- // There may be redundant regions that we can remove
- if (cc.grey.performGraphSimplification) {
- removeRegionRedundancy(g, som);
- }
-
+ // There may be redundant regions that we can remove
+ if (cc.grey.performGraphSimplification) {
+ removeRegionRedundancy(g, som);
+ }
+
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
if (num_vertices(g) == N_SPECIALS) {
@@ -231,125 +231,125 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
return true;
}
- // "Short Exhaustible Passthrough" patterns always become outfixes.
- if (!som && isSEP(g, ng.rm, cc.grey)) {
- DEBUG_PRINTF("graph is SEP\n");
- if (ng.rose->addOutfix(g)) {
- return true;
- }
- }
-
- // Start Of Match handling.
- if (som) {
+ // "Short Exhaustible Passthrough" patterns always become outfixes.
+ if (!som && isSEP(g, ng.rm, cc.grey)) {
+ DEBUG_PRINTF("graph is SEP\n");
+ if (ng.rose->addOutfix(g)) {
+ return true;
+ }
+ }
+
+ // Start Of Match handling.
+ if (som) {
if (addComponentSom(ng, g, expr, som, comp_id)) {
- return true;
- }
- }
-
+ return true;
+ }
+ }
+
assert(allMatchStatesHaveReports(g));
- if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
- return true;
- }
-
- if (handleSmallLiteralSets(*ng.rose, g, cc)
- || handleFixedWidth(*ng.rose, g, cc.grey)) {
- return true;
- }
-
- if (handleDecoratedLiterals(*ng.rose, g, cc)) {
- return true;
- }
-
+ if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
+ return true;
+ }
+
+ if (handleSmallLiteralSets(*ng.rose, g, cc)
+ || handleFixedWidth(*ng.rose, g, cc.grey)) {
+ return true;
+ }
+
+ if (handleDecoratedLiterals(*ng.rose, g, cc)) {
+ return true;
+ }
+
if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
- return true;
- }
-
- if (handleSmallLiteralSets(*ng.rose, g, cc)
- || handleFixedWidth(*ng.rose, g, cc.grey)) {
- return true;
- }
-
- if (handleDecoratedLiterals(*ng.rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
+ if (handleSmallLiteralSets(*ng.rose, g, cc)
+ || handleFixedWidth(*ng.rose, g, cc.grey)) {
+ return true;
+ }
+
+ if (handleDecoratedLiterals(*ng.rose, g, cc)) {
+ return true;
+ }
+
if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
- return true;
- }
-
- DEBUG_PRINTF("testing for outfix\n");
- assert(allMatchStatesHaveReports(g));
- if (ng.rose->addOutfix(g)) {
- return true;
- }
-
- return false;
-}
-
-// Returns true if all components have been added.
-static
+ return true;
+ }
+
+ DEBUG_PRINTF("testing for outfix\n");
+ assert(allMatchStatesHaveReports(g));
+ if (ng.rose->addOutfix(g)) {
+ return true;
+ }
+
+ return false;
+}
+
+// Returns true if all components have been added.
+static
bool processComponents(NG &ng, ExpressionInfo &expr,
- deque<unique_ptr<NGHolder>> &g_comp,
- const som_type som) {
- const u32 num_components = g_comp.size();
-
- u32 failed = 0;
- for (u32 i = 0; i < num_components; i++) {
- if (!g_comp[i]) {
- continue;
- }
+ deque<unique_ptr<NGHolder>> &g_comp,
+ const som_type som) {
+ const u32 num_components = g_comp.size();
+
+ u32 failed = 0;
+ for (u32 i = 0; i < num_components; i++) {
+ if (!g_comp[i]) {
+ continue;
+ }
if (addComponent(ng, *g_comp[i], expr, som, i)) {
- g_comp[i].reset();
- continue;
- }
-
- if (som) { /* bail immediately */
- return false;
- }
- failed++;
- }
-
- if (!failed) {
- DEBUG_PRINTF("all components claimed\n");
- return true;
- }
-
- DEBUG_PRINTF("%u components still remain\n", failed);
- return false;
-}
-
+ g_comp[i].reset();
+ continue;
+ }
+
+ if (som) { /* bail immediately */
+ return false;
+ }
+ failed++;
+ }
+
+ if (!failed) {
+ DEBUG_PRINTF("all components claimed\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("%u components still remain\n", failed);
+ return false;
+}
+
bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
assert(g_ptr);
NGHolder &g = *g_ptr;
- // remove reports that aren't on vertices connected to accept.
+ // remove reports that aren't on vertices connected to accept.
clearReports(g);
-
+
som_type som = expr.som;
if (som && isVacuous(g)) {
throw CompileError(expr.index, "Start of match is not "
- "currently supported for patterns which match an "
- "empty buffer.");
- }
-
+ "currently supported for patterns which match an "
+ "empty buffer.");
+ }
+
dumpDotWrapper(g, expr, "01_initial", cc.grey);
assert(allMatchStatesHaveReports(g));
-
- /* ensure utf8 starts at cp boundary */
+
+ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, g, expr);
-
+
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
-
+
bool hamming = expr.hamm_distance > 0;
u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
-
+
DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false");
// validate graph's suitability for fuzzing before resolving asserts
@@ -367,10 +367,10 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
- }
-
+ }
+
optimiseVirtualStarts(g); /* good for som */
-
+
propagateExtendedParams(g, expr, rm);
reduceExtendedParams(g, rm, som);
@@ -387,61 +387,61 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
})) {
// We have at least one report with a minimum length constraint, which
// we currently use SOM to satisfy.
- som = SOM_LEFT;
- ssm.somPrecision(8);
- }
-
- if (som) {
- rose->setSom();
- }
-
- // first, we can perform graph work that can be done on an individual
- // expression basis.
-
+ som = SOM_LEFT;
+ ssm.somPrecision(8);
+ }
+
+ if (som) {
+ rose->setSom();
+ }
+
+ // first, we can perform graph work that can be done on an individual
+ // expression basis.
+
if (expr.utf8) {
relaxForbiddenUtf8(g, expr);
- }
-
+ }
+
if (all_of_in(all_reports(g), [&](ReportID id) {
const auto &report = rm.getReport(id);
return report.ekey != INVALID_EKEY && !report.minLength &&
!report.minOffset;
})) {
- // In highlander mode: if we don't have constraints on our reports that
- // may prevent us accepting our first match (i.e. extended params) we
- // can prune the other out-edges of all vertices connected to accept.
+ // In highlander mode: if we don't have constraints on our reports that
+ // may prevent us accepting our first match (i.e. extended params) we
+ // can prune the other out-edges of all vertices connected to accept.
// TODO: shift the report checking down into pruneHighlanderAccepts()
// to allow us to handle the parts we can in mixed cases.
pruneHighlanderAccepts(g, rm);
- }
-
+ }
+
dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
-
- // If we're a vacuous pattern, we can handle this early.
+
+ // If we're a vacuous pattern, we can handle this early.
if (splitOffVacuous(boundary, rm, g, expr)) {
- DEBUG_PRINTF("split off vacuous\n");
- }
-
- // We might be done at this point: if we've run out of vertices, we can
- // stop processing.
+ DEBUG_PRINTF("split off vacuous\n");
+ }
+
+ // We might be done at this point: if we've run out of vertices, we can
+ // stop processing.
if (num_vertices(g) == N_SPECIALS) {
- DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
- return true;
- }
-
- // Now that vacuous edges have been removed, update the min width exclusive
- // of boundary reports.
+ DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
+ return true;
+ }
+
+ // Now that vacuous edges have been removed, update the min width exclusive
+ // of boundary reports.
minWidth = min(minWidth, findMinWidth(g));
-
- // Add the pattern to the small write builder.
+
+ // Add the pattern to the small write builder.
smwr->add(g, expr);
-
- if (!som) {
+
+ if (!som) {
removeSiblingsOfStartDotStar(g);
- }
-
+ }
+
dumpDotWrapper(g, expr, "03_early", cc.grey);
-
+
// Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) {
removeRedundancy(g, som);
@@ -450,177 +450,177 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
dumpDotWrapper(g, expr, "04_reduced", cc.grey);
- // If we've got some literals that span the graph from start to accept, we
- // can split them off into Rose from here.
- if (!som) {
+ // If we've got some literals that span the graph from start to accept, we
+ // can split them off into Rose from here.
+ if (!som) {
if (splitOffLiterals(*this, g)) {
- DEBUG_PRINTF("some vertices claimed by literals\n");
- }
- }
-
- // We might be done at this point: if we've run out of vertices, we can
- // stop processing.
+ DEBUG_PRINTF("some vertices claimed by literals\n");
+ }
+ }
+
+ // We might be done at this point: if we've run out of vertices, we can
+ // stop processing.
if (num_vertices(g) == N_SPECIALS) {
- DEBUG_PRINTF("all vertices claimed before calc components\n");
- return true;
- }
-
+ DEBUG_PRINTF("all vertices claimed before calc components\n");
+ return true;
+ }
+
// Split the graph into a set of connected components and process those.
// Note: this invalidates g_ptr.
-
+
auto g_comp = calcComponents(std::move(g_ptr), cc.grey);
- assert(!g_comp.empty());
-
- if (!som) {
+ assert(!g_comp.empty());
+
+ if (!som) {
for (auto &gc : g_comp) {
assert(gc);
reformLeadingDots(*gc);
- }
-
+ }
+
recalcComponents(g_comp, cc.grey);
- }
-
+ }
+
if (processComponents(*this, expr, g_comp, som)) {
- return true;
- }
-
- // If we're in prefiltering mode, we can run the prefilter reductions and
- // have another shot at accepting the graph.
-
+ return true;
+ }
+
+ // If we're in prefiltering mode, we can run the prefilter reductions and
+ // have another shot at accepting the graph.
+
if (cc.grey.prefilterReductions && expr.prefilter) {
for (auto &gc : g_comp) {
if (!gc) {
- continue;
- }
+ continue;
+ }
prefilterReductions(*gc, cc);
- }
-
+ }
+
if (processComponents(*this, expr, g_comp, som)) {
- return true;
- }
- }
-
- // We must have components that could not be compiled.
- for (u32 i = 0; i < g_comp.size(); i++) {
- if (g_comp[i]) {
- DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
- i, num_vertices(*g_comp[i]));
+ return true;
+ }
+ }
+
+ // We must have components that could not be compiled.
+ for (u32 i = 0; i < g_comp.size(); i++) {
+ if (g_comp[i]) {
+ DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
+ i, num_vertices(*g_comp[i]));
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-
- assert(0); // should have thrown.
- return false;
-}
-
-/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
+ }
+ }
+
+ assert(0); // should have thrown.
+ return false;
+}
+
+/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
bool NG::addHolder(NGHolder &g) {
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
assert(allMatchStatesHaveReports(g));
assert(hasCorrectlyNumberedVertices(g));
-
- /* We don't update the global minWidth here as we care about the min width
- * of the whole pattern - not a just a prefix of it. */
-
- bool prefilter = false;
+
+ /* We don't update the global minWidth here as we care about the min width
+ * of the whole pattern - not a just a prefix of it. */
+
+ bool prefilter = false;
//dumpDotComp(comp, g, *this, 20, "prefix_init");
-
- som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
- themselves track som */
- bool utf8 = false; // handling done earlier
+
+ som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
+ themselves track som */
+ bool utf8 = false; // handling done earlier
reduceGraph(g, som, utf8, cc);
-
- // There may be redundant regions that we can remove
- if (cc.grey.performGraphSimplification) {
+
+ // There may be redundant regions that we can remove
+ if (cc.grey.performGraphSimplification) {
removeRegionRedundancy(g, som);
- }
-
- // "Short Exhaustible Passthrough" patterns always become outfixes.
+ }
+
+ // "Short Exhaustible Passthrough" patterns always become outfixes.
if (isSEP(g, rm, cc.grey)) {
- DEBUG_PRINTF("graph is SEP\n");
+ DEBUG_PRINTF("graph is SEP\n");
if (rose->addOutfix(g)) {
- return true;
- }
- }
-
+ return true;
+ }
+ }
+
if (splitOffAnchoredAcyclic(*rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (handleSmallLiteralSets(*rose, g, cc)
|| handleFixedWidth(*rose, g, cc.grey)) {
- return true;
- }
-
+ return true;
+ }
+
if (handleDecoratedLiterals(*rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (doViolet(*rose, g, prefilter, false, rm, cc)) {
- return true;
- }
+ return true;
+ }
if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
- return true;
- }
+ return true;
+ }
if (doViolet(*rose, g, prefilter, true, rm, cc)) {
- return true;
- }
-
- DEBUG_PRINTF("trying for outfix\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("trying for outfix\n");
if (rose->addOutfix(g)) {
- DEBUG_PRINTF("ok\n");
- return true;
- }
- DEBUG_PRINTF("trying for outfix - failed\n");
- DEBUG_PRINTF("nobody would take us\n");
- return false;
-}
-
-bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
+ DEBUG_PRINTF("ok\n");
+ return true;
+ }
+ DEBUG_PRINTF("trying for outfix - failed\n");
+ DEBUG_PRINTF("nobody would take us\n");
+ return false;
+}
+
+bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
u32 external_report, bool highlander, som_type som,
bool quiet) {
- assert(!literal.empty());
-
- if (!cc.grey.shortcutLiterals) {
- return false;
- }
-
- // We can't natively handle arbitrary literals with mixed case sensitivity
- // in Rose -- they require mechanisms like benefits masks, which have
- // length limits etc. Better to let those go through full graph processing.
- if (mixed_sensitivity(literal)) {
- DEBUG_PRINTF("mixed sensitivity\n");
- return false;
- }
-
- // Register external report and validate highlander constraints.
- rm.registerExtReport(external_report,
- external_report_info(highlander, expr_index));
-
- ReportID id;
- if (som) {
- assert(!highlander); // not allowed, checked earlier.
- Report r = makeSomRelativeCallback(external_report, 0, literal.length());
- id = rm.getInternalId(r);
- rose->setSom();
- } else {
- u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
- : INVALID_EKEY;
+ assert(!literal.empty());
+
+ if (!cc.grey.shortcutLiterals) {
+ return false;
+ }
+
+ // We can't natively handle arbitrary literals with mixed case sensitivity
+ // in Rose -- they require mechanisms like benefits masks, which have
+ // length limits etc. Better to let those go through full graph processing.
+ if (mixed_sensitivity(literal)) {
+ DEBUG_PRINTF("mixed sensitivity\n");
+ return false;
+ }
+
+ // Register external report and validate highlander constraints.
+ rm.registerExtReport(external_report,
+ external_report_info(highlander, expr_index));
+
+ ReportID id;
+ if (som) {
+ assert(!highlander); // not allowed, checked earlier.
+ Report r = makeSomRelativeCallback(external_report, 0, literal.length());
+ id = rm.getInternalId(r);
+ rose->setSom();
+ } else {
+ u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
+ : INVALID_EKEY;
Report r = makeECallback(external_report, 0, ekey, quiet);
- id = rm.getInternalId(r);
- }
-
- DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
- dumpString(literal).c_str(), id);
-
- rose->add(false, false, literal, {id});
-
- minWidth = min(minWidth, depth(literal.length()));
-
+ id = rm.getInternalId(r);
+ }
+
+ DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
+ dumpString(literal).c_str(), id);
+
+ rose->add(false, false, literal, {id});
+
+ minWidth = min(minWidth, depth(literal.length()));
+
/* inform small write handler about this literal */
smwr->add(literal, id);
-
- return true;
-}
-
-} // namespace ue2
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.h b/contrib/libs/hyperscan/src/nfagraph/ng.h
index a5a9077d4f..ed908e9a8d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng.h
@@ -1,110 +1,110 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief NG declaration.
- */
-
-#ifndef NG_H
-#define NG_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "parser/position.h"
-#include "som/slot_manager.h"
-#include "som/som.h"
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph.h"
+ */
+
+#ifndef NG_H
+#define NG_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "parser/position.h"
+#include "som/slot_manager.h"
+#include "som/som.h"
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph.h"
#include "util/noncopyable.h"
-#include "util/report_manager.h"
-
-#include <deque>
-#include <map>
-#include <memory>
-#include <utility>
-#include <vector>
-
-namespace ue2 {
-
-struct CompileContext;
-struct ue2_literal;
-
+#include "util/report_manager.h"
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace ue2 {
+
+struct CompileContext;
+struct ue2_literal;
+
class ExpressionInfo;
-class RoseBuild;
-class SmallWriteBuild;
-
+class RoseBuild;
+class SmallWriteBuild;
+
class NG : noncopyable {
-public:
+public:
NG(const CompileContext &in_cc, size_t num_patterns,
unsigned in_somPrecision);
- ~NG();
-
- /** \brief Consumes a pattern, returns false or throws a CompileError
- * exception if the graph cannot be consumed. */
+ ~NG();
+
+ /** \brief Consumes a pattern, returns false or throws a CompileError
+ * exception if the graph cannot be consumed. */
bool addGraph(ExpressionInfo &expr, std::unique_ptr<NGHolder> g_ptr);
-
- /** \brief Consumes a graph, cut-down version of addGraph for use by SOM
- * processing. */
- bool addHolder(NGHolder &h);
-
+
+ /** \brief Consumes a graph, cut-down version of addGraph for use by SOM
+ * processing. */
+ bool addHolder(NGHolder &h);
+
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* of using \ref addGraph) */
- bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
+ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som, bool quiet);
-
- /** \brief Maximum history in bytes available for use by SOM reverse NFAs,
- * a hack for pattern support (see UE-1903). This is always set to the max
- * "lookbehind" length. */
- const u32 maxSomRevHistoryAvailable;
-
- /** \brief The length of the shortest corpus which can match a pattern
- * contained in the NG (excluding the boundary reports used by vacuous
- * patterns, which give an effective minWidth of zero). */
- depth minWidth;
-
- ReportManager rm;
- SomSlotManager ssm;
- BoundaryReports boundary;
- const CompileContext cc;
-
+
+ /** \brief Maximum history in bytes available for use by SOM reverse NFAs,
+ * a hack for pattern support (see UE-1903). This is always set to the max
+ * "lookbehind" length. */
+ const u32 maxSomRevHistoryAvailable;
+
+ /** \brief The length of the shortest corpus which can match a pattern
+ * contained in the NG (excluding the boundary reports used by vacuous
+ * patterns, which give an effective minWidth of zero). */
+ depth minWidth;
+
+ ReportManager rm;
+ SomSlotManager ssm;
+ BoundaryReports boundary;
+ const CompileContext cc;
+
const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder.
- const std::unique_ptr<RoseBuild> rose; //!< Rose builder.
-};
-
-/** \brief Run graph reduction passes.
- *
- * Shared with the small write compiler.
- */
+ const std::unique_ptr<RoseBuild> rose; //!< Rose builder.
+};
+
+/** \brief Run graph reduction passes.
+ *
+ * Shared with the small write compiler.
+ */
void reduceGraph(NGHolder &g, som_type som, bool utf8,
const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
index 22e3e49609..6547c7a8e1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
@@ -1,67 +1,67 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Anchored acyclic graph -> DFA analysis.
- */
-#include "ng_anchored_acyclic.h"
-
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "rose/rose_build.h"
-#include "util/compile_context.h"
-
-namespace ue2 {
-
-bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc) {
- if (!cc.grey.allowAnchoredAcyclic) {
- return false;
- }
-
- if (!isAnchored(h)) {
- DEBUG_PRINTF("fail, not anchored\n");
- return false;
- }
-
- if (!isAcyclic(h)) {
- DEBUG_PRINTF("fail, not acyclic\n");
- return false;
- }
-
- if (rose.addAnchoredAcyclic(h)) {
- return true;
- } else {
- DEBUG_PRINTF("failed to add anchored nfa\n");
- return false;
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Anchored acyclic graph -> DFA analysis.
+ */
+#include "ng_anchored_acyclic.h"
+
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "rose/rose_build.h"
+#include "util/compile_context.h"
+
+namespace ue2 {
+
+bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc) {
+ if (!cc.grey.allowAnchoredAcyclic) {
+ return false;
+ }
+
+ if (!isAnchored(h)) {
+ DEBUG_PRINTF("fail, not anchored\n");
+ return false;
+ }
+
+ if (!isAcyclic(h)) {
+ DEBUG_PRINTF("fail, not acyclic\n");
+ return false;
+ }
+
+ if (rose.addAnchoredAcyclic(h)) {
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add anchored nfa\n");
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
index f9bc5d772e..fa4e6199b4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Anchored acyclic graph -> DFA analysis.
- */
-
-#ifndef NG_ANCHORED_ACYCLIC_H
-#define NG_ANCHORED_ACYCLIC_H
-
-namespace ue2 {
-
-class NGHolder;
-class RoseBuild;
-struct CompileContext;
-
-/** \brief Attempt to consume the entire pattern in graph \a h as an anchored
- * acyclic DFA. Returns true if successful. */
-bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NG_ANCHORED_ACYCLIC_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Anchored acyclic graph -> DFA analysis.
+ */
+
+#ifndef NG_ANCHORED_ACYCLIC_H
+#define NG_ANCHORED_ACYCLIC_H
+
+namespace ue2 {
+
+class NGHolder;
+class RoseBuild;
+struct CompileContext;
+
+/** \brief Attempt to consume the entire pattern in graph \a h as an anchored
+ * acyclic DFA. Returns true if successful. */
+bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NG_ANCHORED_ACYCLIC_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
index 9a13376d19..9a0abb124c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
@@ -1,651 +1,651 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis pass to reform leading dots.
- *
- * We have found that many regexes found in the wild use an anchored dot-repeat
- * to represent an unanchored pattern, particularly if they have been used with
- * a regex engine that assumes that a pattern is anchored. This pass reforms
- * patterns that begin with sequences of dots into a more standard form.
- *
- * In addition, both anchored and unanchored patterns with dot repeats as
- * prefixes will have these prefixes reformed into a canonical form, which some
- * later analyses depend upon.
- */
-#include "ng_anchored_dots.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <queue>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool findStarts(const NGHolder &g, set<NFAVertex> &anchored,
- set<NFAVertex> &unanchored) {
- // Populate unanchored map
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (is_special(v, g)) {
- continue;
- }
- unanchored.insert(v);
- }
-
- // Populate anchored map
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- anchored.insert(v);
- }
-
- if (unanchored == anchored) {
- anchored.clear();
- } else if (!unanchored.empty() && !anchored.empty()) {
- return false;
- }
-
- return !anchored.empty() || !unanchored.empty();
-}
-
-namespace {
-class DotInfo {
-public:
- DotInfo(NFAVertex v, bool se, u32 idx)
- : vertex(v), hasSelfLoop(se), index(idx) {}
-
- bool operator<(const DotInfo &other) const {
- if (hasSelfLoop != other.hasSelfLoop)
- return hasSelfLoop < other.hasSelfLoop;
- // tie break with vertex id: lowest ID wins
- return index > other.index;
- }
-
- NFAVertex vertex;
- bool hasSelfLoop;
- u32 index;
-};
-}
-
-// Returns nullptr if all vertices in the given set are not dots.
-// We can only pick one dot vertex, so we go for a dot-star if it exists,
-// otherwise the dot without a self-edge with the lowest ID.
-static
-NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts,
- set<NFAVertex> &otherV) {
- priority_queue<DotInfo> dotq;
- for (auto v : starts) {
- if (is_dot(v, g)) {
- u32 idx = g[v].index;
- dotq.push(DotInfo(v, hasSelfLoop(v, g), idx));
- }
- }
-
- if (dotq.empty()) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis pass to reform leading dots.
+ *
+ * We have found that many regexes found in the wild use an anchored dot-repeat
+ * to represent an unanchored pattern, particularly if they have been used with
+ * a regex engine that assumes that a pattern is anchored. This pass reforms
+ * patterns that begin with sequences of dots into a more standard form.
+ *
+ * In addition, both anchored and unanchored patterns with dot repeats as
+ * prefixes will have these prefixes reformed into a canonical form, which some
+ * later analyses depend upon.
+ */
+#include "ng_anchored_dots.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool findStarts(const NGHolder &g, set<NFAVertex> &anchored,
+ set<NFAVertex> &unanchored) {
+ // Populate unanchored map
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ unanchored.insert(v);
+ }
+
+ // Populate anchored map
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ anchored.insert(v);
+ }
+
+ if (unanchored == anchored) {
+ anchored.clear();
+ } else if (!unanchored.empty() && !anchored.empty()) {
+ return false;
+ }
+
+ return !anchored.empty() || !unanchored.empty();
+}
+
+namespace {
+class DotInfo {
+public:
+ DotInfo(NFAVertex v, bool se, u32 idx)
+ : vertex(v), hasSelfLoop(se), index(idx) {}
+
+ bool operator<(const DotInfo &other) const {
+ if (hasSelfLoop != other.hasSelfLoop)
+ return hasSelfLoop < other.hasSelfLoop;
+ // tie break with vertex id: lowest ID wins
+ return index > other.index;
+ }
+
+ NFAVertex vertex;
+ bool hasSelfLoop;
+ u32 index;
+};
+}
+
+// Returns nullptr if all vertices in the given set are not dots.
+// We can only pick one dot vertex, so we go for a dot-star if it exists,
+// otherwise the dot without a self-edge with the lowest ID.
+static
+NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts,
+ set<NFAVertex> &otherV) {
+ priority_queue<DotInfo> dotq;
+ for (auto v : starts) {
+ if (is_dot(v, g)) {
+ u32 idx = g[v].index;
+ dotq.push(DotInfo(v, hasSelfLoop(v, g), idx));
+ }
+ }
+
+ if (dotq.empty()) {
return NGHolder::null_vertex();
- }
-
- const DotInfo &dot = dotq.top();
- otherV = starts;
- otherV.erase(dot.vertex);
- DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index,
- dot.hasSelfLoop ? "has self-edge" : "no self-edge");
- DEBUG_PRINTF("%zu other vertices\n", otherV.size());
- return dot.vertex;
-}
-
-// Returns true if the given vertex is only preceded by start. If start is
-// graph.startDs (i.e. unanchored), the given vertex can also be connected to
-// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored.
-static
-bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g,
- bool selfLoopIsAcceptable) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (selfLoopIsAcceptable && u == v) {
- continue;
- } else if (u == start) {
- continue;
- } else if (start == g.startDs && u == g.start) {
- continue;
- } else {
- return false;
- }
- }
- return true;
-}
-
-// Note: this will only remove the anchored first dot in the chain -- any other
-// removable nodes will be handled by the unanchored case below.
-static
-void reformAnchoredRepeatsComponent(NGHolder &g,
- set<NFAVertex> &compAnchoredStarts,
- set<NFAVertex> &compUnanchoredStarts,
- set<NFAVertex> &dead, depth *startBegin,
- depth *startEnd) {
- // anchored cases can not have any unanchored starts
- if (!compUnanchoredStarts.empty()) {
- DEBUG_PRINTF("we have unanchored starts, skipping\n");
- return;
- }
-
+ }
+
+ const DotInfo &dot = dotq.top();
+ otherV = starts;
+ otherV.erase(dot.vertex);
+ DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index,
+ dot.hasSelfLoop ? "has self-edge" : "no self-edge");
+ DEBUG_PRINTF("%zu other vertices\n", otherV.size());
+ return dot.vertex;
+}
+
+// Returns true if the given vertex is only preceded by start. If start is
+// graph.startDs (i.e. unanchored), the given vertex can also be connected to
+// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored.
+static
+bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g,
+ bool selfLoopIsAcceptable) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (selfLoopIsAcceptable && u == v) {
+ continue;
+ } else if (u == start) {
+ continue;
+ } else if (start == g.startDs && u == g.start) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Note: this will only remove the anchored first dot in the chain -- any other
+// removable nodes will be handled by the unanchored case below.
+static
+void reformAnchoredRepeatsComponent(NGHolder &g,
+ set<NFAVertex> &compAnchoredStarts,
+ set<NFAVertex> &compUnanchoredStarts,
+ set<NFAVertex> &dead, depth *startBegin,
+ depth *startEnd) {
+ // anchored cases can not have any unanchored starts
+ if (!compUnanchoredStarts.empty()) {
+ DEBUG_PRINTF("we have unanchored starts, skipping\n");
+ return;
+ }
+
NFAVertex dotV = NGHolder::null_vertex();
- set<NFAVertex> otherV;
- dotV = findReformable(g, compAnchoredStarts, otherV);
+ set<NFAVertex> otherV;
+ dotV = findReformable(g, compAnchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no candidate reformable dot found.\n");
- return;
- }
-
- NFAEdge loopEdge;
- bool selfLoop = false;
- bool bustOut = false;
-
- for (const auto &e : out_edges_range(dotV, g)) {
- NFAVertex t = target(e, g);
- if (t == dotV) {
- selfLoop = true;
- loopEdge = e;
- continue;
- }
-
- if (is_special(t, g)) {
- bustOut = true;
- break;
- }
-
- if (!otherV.empty() && otherV.find(t) == otherV.end()) {
- bustOut = true;
- break;
- }
- }
-
- if (bustOut) {
- DEBUG_PRINTF("busting out\n");
- return;
- }
-
- if (!isStartNode(dotV, g.start, g, true)) {
+ DEBUG_PRINTF("no candidate reformable dot found.\n");
+ return;
+ }
+
+ NFAEdge loopEdge;
+ bool selfLoop = false;
+ bool bustOut = false;
+
+ for (const auto &e : out_edges_range(dotV, g)) {
+ NFAVertex t = target(e, g);
+ if (t == dotV) {
+ selfLoop = true;
+ loopEdge = e;
+ continue;
+ }
+
+ if (is_special(t, g)) {
+ bustOut = true;
+ break;
+ }
+
+ if (!otherV.empty() && otherV.find(t) == otherV.end()) {
+ bustOut = true;
+ break;
+ }
+ }
+
+ if (bustOut) {
+ DEBUG_PRINTF("busting out\n");
+ return;
+ }
+
+ if (!isStartNode(dotV, g.start, g, true)) {
DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index);
- return;
- }
-
- /* get bounds */
- depth min;
+ return;
+ }
+
+ /* get bounds */
+ depth min;
depth max(1);
-
- if (selfLoop) {
- // A self-loop indicates that this is a '.+' or '.*'
- max = depth::infinity();
- }
-
- if (!otherV.empty()) {
- /* We require that the successors of the dot node are are the same
- * as the start vertex. TODO: remember why.
- */
- if (selfLoop) {
- if (otherV.size() != out_degree(dotV, g) - 1) {
- return;
- }
- } else {
- if (otherV.size() != out_degree(dotV, g)) {
- return;
- }
- }
-
+
+ if (selfLoop) {
+ // A self-loop indicates that this is a '.+' or '.*'
+ max = depth::infinity();
+ }
+
+ if (!otherV.empty()) {
+ /* We require that the successors of the dot node are are the same
+ * as the start vertex. TODO: remember why.
+ */
+ if (selfLoop) {
+ if (otherV.size() != out_degree(dotV, g) - 1) {
+ return;
+ }
+ } else {
+ if (otherV.size() != out_degree(dotV, g)) {
+ return;
+ }
+ }
+
min = depth(0);
- } else {
+ } else {
min = depth(1);
- }
-
- *startBegin = min;
- *startEnd = max;
-
- for (auto t : adjacent_vertices_range(dotV, g)) {
- if (t != dotV) {
- add_edge_if_not_present(g.startDs, t, g);
- add_edge_if_not_present(g.start, t, g);
- compUnanchoredStarts.insert(t);
- }
- }
-
- for (auto v : otherV) {
- remove_edge(g.start, v, g);
- }
-
+ }
+
+ *startBegin = min;
+ *startEnd = max;
+
+ for (auto t : adjacent_vertices_range(dotV, g)) {
+ if (t != dotV) {
+ add_edge_if_not_present(g.startDs, t, g);
+ add_edge_if_not_present(g.start, t, g);
+ compUnanchoredStarts.insert(t);
+ }
+ }
+
+ for (auto v : otherV) {
+ remove_edge(g.start, v, g);
+ }
+
DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index);
- clear_vertex(dotV, g);
- dead.insert(dotV);
- compAnchoredStarts.erase(dotV);
-}
-
-static
-void reformUnanchoredRepeatsComponent(NGHolder &g,
- set<NFAVertex> &compAnchoredStarts,
- set<NFAVertex> &compUnanchoredStarts,
- set<NFAVertex> &dead,
- depth *startBegin, depth *startEnd) {
- // unanchored cases can not have any anchored starts
- if (!compAnchoredStarts.empty()) {
- DEBUG_PRINTF("we have anchored starts, skipping\n");
- return;
- }
-
- while (true) {
+ clear_vertex(dotV, g);
+ dead.insert(dotV);
+ compAnchoredStarts.erase(dotV);
+}
+
+static
+void reformUnanchoredRepeatsComponent(NGHolder &g,
+ set<NFAVertex> &compAnchoredStarts,
+ set<NFAVertex> &compUnanchoredStarts,
+ set<NFAVertex> &dead,
+ depth *startBegin, depth *startEnd) {
+ // unanchored cases can not have any anchored starts
+ if (!compAnchoredStarts.empty()) {
+ DEBUG_PRINTF("we have anchored starts, skipping\n");
+ return;
+ }
+
+ while (true) {
NFAVertex dotV = NGHolder::null_vertex();
- set<NFAVertex> otherV;
- dotV = findReformable(g, compUnanchoredStarts, otherV);
+ set<NFAVertex> otherV;
+ dotV = findReformable(g, compUnanchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no candidate reformable dot found.\n");
- return;
- }
-
- NFAEdge loopEdge;
- bool selfLoop = false;
- bool bustOut = false;
-
- for (const auto &e : out_edges_range(dotV, g)) {
- NFAVertex t = target(e, g);
-
- if (t == dotV) {
- selfLoop = true;
- loopEdge = e;
- continue;
- }
-
- if (is_special(t, g)) {
- bustOut = true;
- break;
- }
-
- if (!otherV.empty() && otherV.find(t) == otherV.end()) {
- bustOut = true;
- break;
- }
- }
-
- if (bustOut) {
- DEBUG_PRINTF("busting out\n");
- if (!selfLoop) {
- return;
- }
-
- for (auto v : otherV) {
- if (!edge(dotV, v, g).second) {
- return;
- }
- }
-
- // A self-loop indicates that this is a '.+' or '.*'
+ DEBUG_PRINTF("no candidate reformable dot found.\n");
+ return;
+ }
+
+ NFAEdge loopEdge;
+ bool selfLoop = false;
+ bool bustOut = false;
+
+ for (const auto &e : out_edges_range(dotV, g)) {
+ NFAVertex t = target(e, g);
+
+ if (t == dotV) {
+ selfLoop = true;
+ loopEdge = e;
+ continue;
+ }
+
+ if (is_special(t, g)) {
+ bustOut = true;
+ break;
+ }
+
+ if (!otherV.empty() && otherV.find(t) == otherV.end()) {
+ bustOut = true;
+ break;
+ }
+ }
+
+ if (bustOut) {
+ DEBUG_PRINTF("busting out\n");
+ if (!selfLoop) {
+ return;
+ }
+
+ for (auto v : otherV) {
+ if (!edge(dotV, v, g).second) {
+ return;
+ }
+ }
+
+ // A self-loop indicates that this is a '.+' or '.*'
DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index);
- *startEnd = depth::infinity();
- remove_edge(dotV, dotV, g);
- return;
- }
-
- if (!isStartNode(dotV, g.startDs, g, true)) {
+ *startEnd = depth::infinity();
+ remove_edge(dotV, dotV, g);
+ return;
+ }
+
+ if (!isStartNode(dotV, g.startDs, g, true)) {
DEBUG_PRINTF("fleeing: vertex %zu has other preds\n",
g[dotV].index);
- return;
- }
-
- /* get bounds */
+ return;
+ }
+
+ /* get bounds */
depth min(1);
depth max(1);
-
- if (selfLoop) {
- // A self-loop indicates that this is a '.+' or '.*'
- DEBUG_PRINTF("self-loop detected\n");
- max = depth::infinity();
- }
-
- if (!otherV.empty()) {
- if (!selfLoop && otherV.size() != out_degree(dotV, g)) {
- return;
- }
-
- if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) {
- return;
- }
-
- if (min > depth(1)) {
- /* this is not a case we can handle */
- DEBUG_PRINTF("min greater than one, skipping\n");
- return;
- }
+
+ if (selfLoop) {
+ // A self-loop indicates that this is a '.+' or '.*'
+ DEBUG_PRINTF("self-loop detected\n");
+ max = depth::infinity();
+ }
+
+ if (!otherV.empty()) {
+ if (!selfLoop && otherV.size() != out_degree(dotV, g)) {
+ return;
+ }
+
+ if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) {
+ return;
+ }
+
+ if (min > depth(1)) {
+ /* this is not a case we can handle */
+ DEBUG_PRINTF("min greater than one, skipping\n");
+ return;
+ }
min = depth(0);
- }
-
- *startBegin += min;
- *startEnd += max;
-
- for (auto v : otherV) {
- remove_edge(g.start, v, g);
- remove_edge(g.startDs, v, g);
- }
-
- compUnanchoredStarts.clear();
- for (auto t : adjacent_vertices_range(dotV, g)) {
- if (t != dotV) {
+ }
+
+ *startBegin += min;
+ *startEnd += max;
+
+ for (auto v : otherV) {
+ remove_edge(g.start, v, g);
+ remove_edge(g.startDs, v, g);
+ }
+
+ compUnanchoredStarts.clear();
+ for (auto t : adjacent_vertices_range(dotV, g)) {
+ if (t != dotV) {
DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index);
- add_edge(g.startDs, t, g);
- add_edge(g.start, t, g);
- compUnanchoredStarts.insert(t);
- }
- }
-
+ add_edge(g.startDs, t, g);
+ add_edge(g.start, t, g);
+ compUnanchoredStarts.insert(t);
+ }
+ }
+
DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index);
- dead.insert(dotV);
- clear_vertex(dotV, g);
- compUnanchoredStarts.erase(dotV);
- }
-}
-
-// for t to be another optional dot, it must have only in-edges from v and from
-// starts
-static
-bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) {
- if (!is_dot(t, g)) {
- return false;
- }
-
- bool found_v = false, found_start = false;
-
- for (auto u : inv_adjacent_vertices_range(t, g)) {
- if (u == v) {
- found_v = true;
- } else if (u == g.start || u == g.startDs) {
- found_start = true;
- } else {
- return false;
- }
- }
-
- return found_v && found_start;
-}
-
-static
-bool gatherParticipants(const NGHolder &g,
- NFAVertex start, NFAVertex initialDot,
- set<NFAVertex> &dots, set<NFAVertex> &succ) {
- // Walk the graph downwards from the initial dot; each dot will have:
- // 1) a single optional dot successor, or
- // 2) N successors (our terminating case)
- dots.insert(initialDot);
- NFAVertex v = initialDot;
-
- while (out_degree(v, g) == 1) {
- NFAVertex t = *(adjacent_vertices(v, g).first);
- // for t to be another optional dot, it must have only in-edges from v
- // and from starts
- if (isOptionalDot(t, v, g)) {
- // another dot; bail if we've seen it once already
- if (dots.find(t) != dots.end()) {
+ dead.insert(dotV);
+ clear_vertex(dotV, g);
+ compUnanchoredStarts.erase(dotV);
+ }
+}
+
+// for t to be another optional dot, it must have only in-edges from v and from
+// starts
+static
+bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) {
+ if (!is_dot(t, g)) {
+ return false;
+ }
+
+ bool found_v = false, found_start = false;
+
+ for (auto u : inv_adjacent_vertices_range(t, g)) {
+ if (u == v) {
+ found_v = true;
+ } else if (u == g.start || u == g.startDs) {
+ found_start = true;
+ } else {
+ return false;
+ }
+ }
+
+ return found_v && found_start;
+}
+
+static
+bool gatherParticipants(const NGHolder &g,
+ NFAVertex start, NFAVertex initialDot,
+ set<NFAVertex> &dots, set<NFAVertex> &succ) {
+ // Walk the graph downwards from the initial dot; each dot will have:
+ // 1) a single optional dot successor, or
+ // 2) N successors (our terminating case)
+ dots.insert(initialDot);
+ NFAVertex v = initialDot;
+
+ while (out_degree(v, g) == 1) {
+ NFAVertex t = *(adjacent_vertices(v, g).first);
+ // for t to be another optional dot, it must have only in-edges from v
+ // and from starts
+ if (isOptionalDot(t, v, g)) {
+ // another dot; bail if we've seen it once already
+ if (dots.find(t) != dots.end()) {
DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index);
- return false;
- }
- dots.insert(t);
- v = t;
- continue;
- }
- // otherwise, we found a terminating dot state
- break;
- }
-
- // Our terminating states are the successors of v.
- // All of these MUST have an edge from start as well.
- for (auto w : adjacent_vertices_range(v, g)) {
- succ.insert(w);
- if (!edge(start, w, g).second) {
+ return false;
+ }
+ dots.insert(t);
+ v = t;
+ continue;
+ }
+ // otherwise, we found a terminating dot state
+ break;
+ }
+
+ // Our terminating states are the successors of v.
+ // All of these MUST have an edge from start as well.
+ for (auto w : adjacent_vertices_range(v, g)) {
+ succ.insert(w);
+ if (!edge(start, w, g).second) {
DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n",
- g[w].index);
- return false;
- }
- }
-
- /* All the non chained v connected to start must be in succ as well
- * TODO: remember why (and document). */
- for (auto u : adjacent_vertices_range(start, g)) {
- if (is_special(u, g)) {
- continue;
- }
- if (!contains(dots, u) && !contains(succ, u)) {
- return false;
- }
- }
-
- return !succ.empty();
-}
-
-static
-void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
- set<NFAVertex> &dead, UNUSED depth *startBegin,
- depth *startEnd) {
- // Handle optional dot repeat prefixes, e.g.
- // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs
- // Note that this code assumes that fixed repeats ('^.{5,20}') have been
- // pruned already, down (in this case) to '^.{0,15}'.
-
- // The first of our optional dots must be connected to start. The jump edge
- // past it will be verified in gatherParticipants(). If start is
- // graph.start, it should not be connected to startDs.
+ g[w].index);
+ return false;
+ }
+ }
+
+ /* All the non chained v connected to start must be in succ as well
+ * TODO: remember why (and document). */
+ for (auto u : adjacent_vertices_range(start, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+ if (!contains(dots, u) && !contains(succ, u)) {
+ return false;
+ }
+ }
+
+ return !succ.empty();
+}
+
+static
+void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
+ set<NFAVertex> &dead, UNUSED depth *startBegin,
+ depth *startEnd) {
+ // Handle optional dot repeat prefixes, e.g.
+ // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs
+ // Note that this code assumes that fixed repeats ('^.{5,20}') have been
+ // pruned already, down (in this case) to '^.{0,15}'.
+
+ // The first of our optional dots must be connected to start. The jump edge
+ // past it will be verified in gatherParticipants(). If start is
+ // graph.start, it should not be connected to startDs.
NFAVertex initialDot = NGHolder::null_vertex();
- for (auto v : adjacent_vertices_range(start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (is_dot(v, g) && isStartNode(v, start, g, false)) {
- if (initialDot) {
- return;
- }
- initialDot = v;
+ for (auto v : adjacent_vertices_range(start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (is_dot(v, g) && isStartNode(v, start, g, false)) {
+ if (initialDot) {
+ return;
+ }
+ initialDot = v;
DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index);
- }
- }
-
- if (!initialDot) {
- return;
- }
-
- // Collect all the other optional dot vertices and the successor vertices
- // by walking down the graph from initialDot
- set<NFAVertex> dots, succ;
- if (!gatherParticipants(g, start, initialDot, dots, succ)) {
- DEBUG_PRINTF("gatherParticipants failed\n");
- return;
- }
-
- DEBUG_PRINTF("optional dot repeat with %zu participants, "
- "terminating in %zu non-dot nodes\n",
- dots.size(), succ.size());
-
- // Remove all the participants and set the start offset
- dead.insert(dots.begin(), dots.end());
-
- DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(),
- startEnd->str().c_str());
-
- if (start == g.start && startEnd->is_infinite()) {
+ }
+ }
+
+ if (!initialDot) {
+ return;
+ }
+
+ // Collect all the other optional dot vertices and the successor vertices
+ // by walking down the graph from initialDot
+ set<NFAVertex> dots, succ;
+ if (!gatherParticipants(g, start, initialDot, dots, succ)) {
+ DEBUG_PRINTF("gatherParticipants failed\n");
+ return;
+ }
+
+ DEBUG_PRINTF("optional dot repeat with %zu participants, "
+ "terminating in %zu non-dot nodes\n",
+ dots.size(), succ.size());
+
+ // Remove all the participants and set the start offset
+ dead.insert(dots.begin(), dots.end());
+
+ DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(),
+ startEnd->str().c_str());
+
+ if (start == g.start && startEnd->is_infinite()) {
*startEnd = depth(dots.size());
- } else if (startEnd->is_finite()) {
- *startEnd += dots.size();
- }
- assert(startEnd->is_reachable());
-
- // Connect our successor vertices to both start and startDs.
+ } else if (startEnd->is_finite()) {
+ *startEnd += dots.size();
+ }
+ assert(startEnd->is_reachable());
+
+ // Connect our successor vertices to both start and startDs.
for (auto v : succ) {
- add_edge_if_not_present(g.start, v, g);
- add_edge_if_not_present(g.startDs, v, g);
- }
-}
-
-static
-void deleteVertices(set<NFAVertex> &dead, NGHolder &g) {
- if (!dead.empty()) {
- DEBUG_PRINTF("pruning %zu vertices\n", dead.size());
- remove_vertices(dead, g);
- }
- dead.clear();
-}
-
-static
-void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
- DEBUG_PRINTF("component\n");
- set<NFAVertex> anchored, unanchored, dead;
- if (!findStarts(g, anchored, unanchored)) {
- DEBUG_PRINTF("no starts\n");
- return;
- }
-
- reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
- startEnd);
- deleteVertices(dead, g);
-
- reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
- startEnd);
- deleteVertices(dead, g);
-}
-
-static
-void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
- DEBUG_PRINTF("collapseVariableRepeats\n");
- set<NFAVertex> dead;
-
- collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd);
- deleteVertices(dead, g);
-
- collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd);
- deleteVertices(dead, g);
-}
-
-static
-void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
- depth min_repeat, depth max_repeat) {
- const bool unbounded = max_repeat.is_infinite();
- if (unbounded) {
- max_repeat = min_repeat;
- }
-
- assert(max_repeat.is_finite());
-
- NFAVertex u = lhs;
-
- if (!min_repeat && unbounded) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- g[v].char_reach.setall();
-
- for (auto w : rhs) {
- add_edge(lhs, w, g);
- }
- }
-
- for (u32 i = 0; i < min_repeat; i++) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- g[v].char_reach.setall();
- u = v;
- }
-
- NFAVertex split = u;
- /* lhs now split point for optional */
- for (u32 i = min_repeat; i < max_repeat; i++) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- if (u != split) {
- add_edge(split, v, g);
- }
- g[v].char_reach.setall();
- u = v;
- }
-
- if (unbounded) {
- add_edge(u, u, g);
- }
-
- for (auto w : rhs) {
- add_edge(u, w, g);
- if (split != u) {
- add_edge(split, w, g);
- }
- }
-}
-
-static
-void restoreLeadingDots(NGHolder &g, const depth &startBegin,
- const depth &startEnd) {
- if (startBegin == depth(0) && startEnd.is_infinite()) {
- return;
- }
- DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(),
- startEnd.str().c_str());
-
- for (UNUSED auto v : adjacent_vertices_range(g.start, g)) {
- assert(edge(g.startDs, v, g).second);
- }
- clear_out_edges(g.start, g);
- add_edge(g.start, g.startDs, g);
-
- const bool unbounded = startEnd.is_infinite();
-
- NFAVertex root = unbounded ? g.startDs : g.start;
-
- vector<NFAVertex> rhs;
- insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g));
- rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end());
- for (auto v : rhs) {
- remove_edge(g.startDs, v, g);
- }
-
- addDotsBetween(g, root, rhs, startBegin, startEnd);
+ add_edge_if_not_present(g.start, v, g);
+ add_edge_if_not_present(g.startDs, v, g);
+ }
+}
+
+static
+void deleteVertices(set<NFAVertex> &dead, NGHolder &g) {
+ if (!dead.empty()) {
+ DEBUG_PRINTF("pruning %zu vertices\n", dead.size());
+ remove_vertices(dead, g);
+ }
+ dead.clear();
+}
+
+static
+void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
+ DEBUG_PRINTF("component\n");
+ set<NFAVertex> anchored, unanchored, dead;
+ if (!findStarts(g, anchored, unanchored)) {
+ DEBUG_PRINTF("no starts\n");
+ return;
+ }
+
+ reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
+ startEnd);
+ deleteVertices(dead, g);
+
+ reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
+ startEnd);
+ deleteVertices(dead, g);
+}
+
+static
+void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
+ DEBUG_PRINTF("collapseVariableRepeats\n");
+ set<NFAVertex> dead;
+
+ collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd);
+ deleteVertices(dead, g);
+
+ collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd);
+ deleteVertices(dead, g);
+}
+
+static
+void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
+ depth min_repeat, depth max_repeat) {
+ const bool unbounded = max_repeat.is_infinite();
+ if (unbounded) {
+ max_repeat = min_repeat;
+ }
+
+ assert(max_repeat.is_finite());
+
+ NFAVertex u = lhs;
+
+ if (!min_repeat && unbounded) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ g[v].char_reach.setall();
+
+ for (auto w : rhs) {
+ add_edge(lhs, w, g);
+ }
+ }
+
+ for (u32 i = 0; i < min_repeat; i++) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ g[v].char_reach.setall();
+ u = v;
+ }
+
+ NFAVertex split = u;
+ /* lhs now split point for optional */
+ for (u32 i = min_repeat; i < max_repeat; i++) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ if (u != split) {
+ add_edge(split, v, g);
+ }
+ g[v].char_reach.setall();
+ u = v;
+ }
+
+ if (unbounded) {
+ add_edge(u, u, g);
+ }
+
+ for (auto w : rhs) {
+ add_edge(u, w, g);
+ if (split != u) {
+ add_edge(split, w, g);
+ }
+ }
+}
+
+static
+void restoreLeadingDots(NGHolder &g, const depth &startBegin,
+ const depth &startEnd) {
+ if (startBegin == depth(0) && startEnd.is_infinite()) {
+ return;
+ }
+ DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(),
+ startEnd.str().c_str());
+
+ for (UNUSED auto v : adjacent_vertices_range(g.start, g)) {
+ assert(edge(g.startDs, v, g).second);
+ }
+ clear_out_edges(g.start, g);
+ add_edge(g.start, g.startDs, g);
+
+ const bool unbounded = startEnd.is_infinite();
+
+ NFAVertex root = unbounded ? g.startDs : g.start;
+
+ vector<NFAVertex> rhs;
+ insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g));
+ rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end());
+ for (auto v : rhs) {
+ remove_edge(g.startDs, v, g);
+ }
+
+ addDotsBetween(g, root, rhs, startBegin, startEnd);
renumber_vertices(g);
renumber_edges(g);
-}
-
-// Entry point.
-void reformLeadingDots(NGHolder &g) {
- depth startBegin(0);
- depth startEnd = depth::infinity();
-
- reformAnchoredRepeats(g, &startBegin, &startEnd);
- collapseVariableRepeats(g, &startBegin, &startEnd);
- restoreLeadingDots(g, startBegin, startEnd);
-}
-
-} // namespace ue2
+}
+
+// Entry point.
+void reformLeadingDots(NGHolder &g) {
+ depth startBegin(0);
+ depth startEnd = depth::infinity();
+
+ reformAnchoredRepeats(g, &startBegin, &startEnd);
+ collapseVariableRepeats(g, &startBegin, &startEnd);
+ restoreLeadingDots(g, startBegin, startEnd);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
index 8454c31941..d5ffee2d79 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis pass to reform leading dots.
- */
-
-#ifndef NG_ANCHORED_BOUNDED_REPEATS_H
-#define NG_ANCHORED_BOUNDED_REPEATS_H
-
-namespace ue2 {
-
-class NGHolder;
-
-/* should not be used if SoM is required */
-void reformLeadingDots(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis pass to reform leading dots.
+ */
+
+#ifndef NG_ANCHORED_BOUNDED_REPEATS_H
+#define NG_ANCHORED_BOUNDED_REPEATS_H
+
+namespace ue2 {
+
+class NGHolder;
+
+/* should not be used if SoM is required */
+void reformLeadingDots(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
index 8812afadb7..24d4ecace1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
@@ -1,558 +1,558 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Resolve special assert vertices.
- *
- * The assert resolution algorithm proceeds by iterating over those edges with
- * assertion flags, considering source and target vertices of each edge. If a
- * vertex has a superset of the reachability demanded by the assertion on the
- * edge, it is split into alternatives providing the word and non-word paths
- * through that vertex.
- *
- * A great deal of the complexity in the resolveAsserts pass is devoted to
- * handling these assertions when the UCP flag is specified (meaning \\w and \\W
- * are implemented with Unicode properties, rather than their ASCII
- * interpretation) and the prefiltering flag is also used. Complete,
- * non-prefiltering UCP support is not available yet.
- */
-#include "ng_asserts.h"
-
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Resolve special assert vertices.
+ *
+ * The assert resolution algorithm proceeds by iterating over those edges with
+ * assertion flags, considering source and target vertices of each edge. If a
+ * vertex has a superset of the reachability demanded by the assertion on the
+ * edge, it is split into alternatives providing the word and non-word paths
+ * through that vertex.
+ *
+ * A great deal of the complexity in the resolveAsserts pass is devoted to
+ * handling these assertions when the UCP flag is specified (meaning \\w and \\W
+ * are implemented with Unicode properties, rather than their ASCII
+ * interpretation) and the prefiltering flag is also used. Complete,
+ * non-prefiltering UCP support is not available yet.
+ */
+#include "ng_asserts.h"
+
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-#include "parser/position.h" // for POS flags
-#include "util/bitutils.h" // for findAndClearLSB_32
-#include "util/boundary_reports.h"
-#include "util/container.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-#include "util/unicode_def.h"
-
-#include <queue>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum number of vertices we'll clone before we
- * throw up our hands and report 'Pattern too large.' */
-static const size_t MAX_CLONED_VERTICES = 2048;
-
-/** \brief The definition of \\w, since we use it everywhere in here. */
-static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
- CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
-
-/** \brief \\W is the inverse of \\w */
-static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
-
-/** \brief Prefiltering definition of \\w for UCP mode.
- *
- * Includes all high bytes as to capture all non-ASCII, however depending on
- * direction only continuers or starters are strictly required - as the input
- * is well-formed, this laxness will not cost us. */
-static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
- | CharReach(128, 255));
-
-/** \brief Prefiltering definition of \\W for UCP Mode.
- *
- * (non-word already includes high bytes) */
-static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
-
-/** \brief Find all the edges with assertion flags. */
-static
-vector<NFAEdge> getAsserts(const NGHolder &g) {
- vector<NFAEdge> out;
- for (const auto &e : edges_range(g)) {
- if (g[e].assert_flags) {
- out.push_back(e);
- }
- }
- return out;
-}
-
-static
-void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
+#include "parser/position.h" // for POS flags
+#include "util/bitutils.h" // for findAndClearLSB_32
+#include "util/boundary_reports.h"
+#include "util/container.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+#include "util/unicode_def.h"
+
+#include <queue>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Hard limit on the maximum number of vertices we'll clone before we
+ * throw up our hands and report 'Pattern too large.' */
+static const size_t MAX_CLONED_VERTICES = 2048;
+
+/** \brief The definition of \\w, since we use it everywhere in here. */
+static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
+ CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
+
+/** \brief \\W is the inverse of \\w */
+static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
+
+/** \brief Prefiltering definition of \\w for UCP mode.
+ *
+ * Includes all high bytes as to capture all non-ASCII, however depending on
+ * direction only continuers or starters are strictly required - as the input
+ * is well-formed, this laxness will not cost us. */
+static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
+ | CharReach(128, 255));
+
+/** \brief Prefiltering definition of \\W for UCP Mode.
+ *
+ * (non-word already includes high bytes) */
+static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
+
+/** \brief Find all the edges with assertion flags. */
+static
+vector<NFAEdge> getAsserts(const NGHolder &g) {
+ vector<NFAEdge> out;
+ for (const auto &e : edges_range(g)) {
+ if (g[e].assert_flags) {
+ out.push_back(e);
+ }
+ }
+ return out;
+}
+
+static
+void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
DEBUG_PRINTF("%zu needs splitting\n", g[v].index);
- to_split->emplace(g[v].index, v);
-}
-
-/** \brief Find vertices that need to be split due to an assertion edge.
- *
- * A vertex needs to be split if has an edge to/from it with an assert with a
- * restriction on the relevant end. */
-static
-void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
- map<u32, NFAVertex> *to_split,
- map<u32, NFAVertex> *to_split_ucp) {
- for (const auto &e : asserts) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- u32 flags = g[e].assert_flags;
- assert(flags);
-
- const CharReach &u_cr = g[u].char_reach;
- const CharReach &v_cr = g[v].char_reach;
-
- bool ucp_assert = flags & UCP_ASSERT_FLAGS;
- bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
- /* In reality, an expression can only be entirely ucp or not ucp */
- assert(ucp_assert != normal_assert);
-
- if (normal_assert) {
- /* assume any flag results in us have to split if the vertex is not
- * a subset of word or completely disjoint from it. We could be more
- * nuanced if flags is a disjunction of multiple assertions. */
- if (!u_cr.isSubsetOf(CHARREACH_WORD)
- && !u_cr.isSubsetOf(CHARREACH_NONWORD)
- && u != g.start) { /* start is always considered a nonword */
- addToSplit(g, u, to_split);
- }
-
- if (!v_cr.isSubsetOf(CHARREACH_WORD)
- && !v_cr.isSubsetOf(CHARREACH_NONWORD)
- && v != g.accept /* accept require special handling, done on a
- * per edge basis in resolve asserts
- */
- && v != g.acceptEod) { /* eod is always considered a nonword */
- addToSplit(g, v, to_split);
- }
- }
-
- if (ucp_assert) {
- /* note: the ucp prefilter crs overlap - requires a bit more care */
- if (u == g.start) { /* start never needs to be split,
- * treat nonword */
- } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
- if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
- && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
- addToSplit(g, u, to_split_ucp);
- }
- } else {
- assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
- if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
- && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
- addToSplit(g, u, to_split_ucp);
- }
- }
-
- if (v == g.acceptEod /* eod is always considered a nonword */
- || v == g.accept) { /* accept require special handling, done on
- * a per edge basis in resolve asserts */
- } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
- if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
- && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
- addToSplit(g, v, to_split_ucp);
- }
- } else {
- assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
- if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
- && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
- addToSplit(g, v, to_split_ucp);
- }
- }
- }
- }
-}
-
-static
+ to_split->emplace(g[v].index, v);
+}
+
+/** \brief Find vertices that need to be split due to an assertion edge.
+ *
+ * A vertex needs to be split if has an edge to/from it with an assert with a
+ * restriction on the relevant end. */
+static
+void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
+ map<u32, NFAVertex> *to_split,
+ map<u32, NFAVertex> *to_split_ucp) {
+ for (const auto &e : asserts) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ u32 flags = g[e].assert_flags;
+ assert(flags);
+
+ const CharReach &u_cr = g[u].char_reach;
+ const CharReach &v_cr = g[v].char_reach;
+
+ bool ucp_assert = flags & UCP_ASSERT_FLAGS;
+ bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
+ /* In reality, an expression can only be entirely ucp or not ucp */
+ assert(ucp_assert != normal_assert);
+
+ if (normal_assert) {
+ /* assume any flag results in us have to split if the vertex is not
+ * a subset of word or completely disjoint from it. We could be more
+ * nuanced if flags is a disjunction of multiple assertions. */
+ if (!u_cr.isSubsetOf(CHARREACH_WORD)
+ && !u_cr.isSubsetOf(CHARREACH_NONWORD)
+ && u != g.start) { /* start is always considered a nonword */
+ addToSplit(g, u, to_split);
+ }
+
+ if (!v_cr.isSubsetOf(CHARREACH_WORD)
+ && !v_cr.isSubsetOf(CHARREACH_NONWORD)
+ && v != g.accept /* accept require special handling, done on a
+ * per edge basis in resolve asserts
+ */
+ && v != g.acceptEod) { /* eod is always considered a nonword */
+ addToSplit(g, v, to_split);
+ }
+ }
+
+ if (ucp_assert) {
+ /* note: the ucp prefilter crs overlap - requires a bit more care */
+ if (u == g.start) { /* start never needs to be split,
+ * treat nonword */
+ } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
+ if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
+ && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
+ addToSplit(g, u, to_split_ucp);
+ }
+ } else {
+ assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
+ if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
+ && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
+ addToSplit(g, u, to_split_ucp);
+ }
+ }
+
+ if (v == g.acceptEod /* eod is always considered a nonword */
+ || v == g.accept) { /* accept require special handling, done on
+ * a per edge basis in resolve asserts */
+ } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
+ if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
+ && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
+ addToSplit(g, v, to_split_ucp);
+ }
+ } else {
+ assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
+ if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
+ && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
+ addToSplit(g, v, to_split_ucp);
+ }
+ }
+ }
+ }
+}
+
+static
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
- // Don't try and set the report ID of a special vertex.
- assert(!is_special(v, g));
-
- // If there's a report set already, we're replacing it.
- g[v].reports.clear();
-
+ // Don't try and set the report ID of a special vertex.
+ assert(!is_special(v, g));
+
+ // If there's a report set already, we're replacing it.
+ g[v].reports.clear();
+
Report ir = rm.getBasicInternalReport(expr, adj);
-
- g[v].reports.insert(rm.getInternalId(ir));
+
+ g[v].reports.insert(rm.getInternalId(ir));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
-}
-
-static
+}
+
+static
NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, const CharReach &cr_mask) {
- NFAVertex clone = clone_vertex(g, v);
- g[clone].char_reach &= cr_mask;
- clone_out_edges(g, v, clone);
- clone_in_edges(g, v, clone);
-
- if (v == g.startDs) {
+ NFAVertex clone = clone_vertex(g, v);
+ g[clone].char_reach &= cr_mask;
+ clone_out_edges(g, v, clone);
+ clone_in_edges(g, v, clone);
+
+ if (v == g.startDs) {
if (expr.utf8) {
- g[clone].char_reach &= ~UTF_START_CR;
- }
-
- DEBUG_PRINTF("marked as virt\n");
- g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
-
+ g[clone].char_reach &= ~UTF_START_CR;
+ }
+
+ DEBUG_PRINTF("marked as virt\n");
+ g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
+
setReportId(rm, g, expr, clone, 0);
- }
-
- return clone;
-}
-
-static
+ }
+
+ return clone;
+}
+
+static
void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, bool ucp) {
- assert(v != g.start);
- assert(v != g.accept);
- assert(v != g.acceptEod);
+ assert(v != g.start);
+ assert(v != g.accept);
+ assert(v != g.acceptEod);
DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp);
-
- CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
- CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
-
- auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
-
- // Split v into word/nonword vertices with only asserting out-edges.
+
+ CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
+ CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
+
+ auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
+
+ // Split v into word/nonword vertices with only asserting out-edges.
NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
- remove_out_edge_if(w_out, has_no_assert, g);
- remove_out_edge_if(nw_out, has_no_assert, g);
-
- // Split v into word/nonword vertices with only asserting in-edges.
+ remove_out_edge_if(w_out, has_no_assert, g);
+ remove_out_edge_if(nw_out, has_no_assert, g);
+
+ // Split v into word/nonword vertices with only asserting in-edges.
NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
- remove_in_edge_if(w_in, has_no_assert, g);
- remove_in_edge_if(nw_in, has_no_assert, g);
-
- // Prune edges with asserts from original v.
- auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
- remove_in_edge_if(v, has_assert, g);
- remove_out_edge_if(v, has_assert, g);
-}
-
-static
+ remove_in_edge_if(w_in, has_no_assert, g);
+ remove_in_edge_if(nw_in, has_no_assert, g);
+
+ // Prune edges with asserts from original v.
+ auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
+ remove_in_edge_if(v, has_assert, g);
+ remove_out_edge_if(v, has_assert, g);
+}
+
+static
void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
set<NFAEdge> *dead) {
- for (const auto &e : edges_range(g)) {
- u32 flags = g[e].assert_flags;
- if (!flags) {
- continue;
- }
-
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
-
- assert(u != g.startDs);
-
- const CharReach &u_cr = g[u].char_reach;
- const CharReach &v_cr = g[v].char_reach;
-
- bool impassable = true;
- bool ucp = flags & UCP_ASSERT_FLAGS;
+ for (const auto &e : edges_range(g)) {
+ u32 flags = g[e].assert_flags;
+ if (!flags) {
+ continue;
+ }
+
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+
+ assert(u != g.startDs);
+
+ const CharReach &u_cr = g[u].char_reach;
+ const CharReach &v_cr = g[v].char_reach;
+
+ bool impassable = true;
+ bool ucp = flags & UCP_ASSERT_FLAGS;
DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n",
g[u].index, g[v].index, flags, (int)ucp);
- while (flags && impassable) {
- u32 flag = 1U << findAndClearLSB_32(&flags);
- switch (flag) {
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD:
- if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_NONWORD:
- case POS_FLAG_ASSERT_WORD_TO_WORD:
- if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
- if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
- if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
- continue;
- }
- break;
- default:
- assert(0);
- }
-
- if (v == g.accept) {
- /* accept special will need to be treated specially later */
- impassable = false;
- continue;
- }
-
- switch (flag) {
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
- case POS_FLAG_ASSERT_WORD_TO_NONWORD:
- if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_WORD:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD:
- if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
- if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
- && v != g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
- if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
- || v == g.acceptEod) {
- continue;
- }
- break;
- default:
- assert(0);
- }
- impassable = false;
- }
-
- if (impassable) {
- dead->insert(e);
- } else if (v == g.accept && !ucp) {
- bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
- UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
- assert(u_w != u_nw);
- bool v_w = false;
- bool v_nw = false;
-
- flags = g[e].assert_flags;
- if (u_w) {
- v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
- v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
- } else {
- v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
- v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
- }
- assert(v_w || v_nw);
- if (v_w && v_nw) {
- /* edge is effectively unconditional */
- g[e].assert_flags = 0;
- } else if (v_w) {
- /* need to add a word byte */
- NFAVertex vv = add_vertex(g);
+ while (flags && impassable) {
+ u32 flag = 1U << findAndClearLSB_32(&flags);
+ switch (flag) {
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD:
+ if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_WORD_TO_WORD:
+ if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
+ if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
+ if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
+ continue;
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ if (v == g.accept) {
+ /* accept special will need to be treated specially later */
+ impassable = false;
+ continue;
+ }
+
+ switch (flag) {
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD:
+ if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_WORD:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD:
+ if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
+ if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
+ && v != g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
+ if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
+ || v == g.acceptEod) {
+ continue;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ impassable = false;
+ }
+
+ if (impassable) {
+ dead->insert(e);
+ } else if (v == g.accept && !ucp) {
+ bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
+ UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
+ assert(u_w != u_nw);
+ bool v_w = false;
+ bool v_nw = false;
+
+ flags = g[e].assert_flags;
+ if (u_w) {
+ v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
+ v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
+ } else {
+ v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
+ v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
+ }
+ assert(v_w || v_nw);
+ if (v_w && v_nw) {
+ /* edge is effectively unconditional */
+ g[e].assert_flags = 0;
+ } else if (v_w) {
+ /* need to add a word byte */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_WORD;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
- dead->insert(e);
- } else {
- /* need to add a non word byte or see eod */
- NFAVertex vv = add_vertex(g);
+ g[vv].char_reach = CHARREACH_WORD;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
+ dead->insert(e);
+ } else {
+ /* need to add a non word byte or see eod */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_NONWORD;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
+ g[vv].char_reach = CHARREACH_NONWORD;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
/* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive
*/
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
- g[start_eod].assert_flags = 0;
- dead->erase(start_eod);
+ g[start_eod].assert_flags = 0;
+ dead->erase(start_eod);
} else {
add_edge(u, g.acceptEod, g[e], g);
- }
- dead->insert(e);
- }
- } else if (v == g.accept && ucp) {
- DEBUG_PRINTF("resolving ucp assert to accept\n");
- assert(u_cr.any());
- bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
- && u != g.start;
- bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
- || u == g.start;
- assert(u_w || u_nw);
-
- bool v_w = false;
- bool v_nw = false;
-
- flags = g[e].assert_flags;
- if (u_w) {
- v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
- v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
- }
- if (u_nw) {
- v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
- v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
- }
- assert(v_w || v_nw);
- if (v_w && v_nw) {
- /* edge is effectively unconditional */
- g[e].assert_flags = 0;
- } else if (v_w) {
- /* need to add a word byte */
- NFAVertex vv = add_vertex(g);
+ }
+ dead->insert(e);
+ }
+ } else if (v == g.accept && ucp) {
+ DEBUG_PRINTF("resolving ucp assert to accept\n");
+ assert(u_cr.any());
+ bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
+ && u != g.start;
+ bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
+ || u == g.start;
+ assert(u_w || u_nw);
+
+ bool v_w = false;
+ bool v_nw = false;
+
+ flags = g[e].assert_flags;
+ if (u_w) {
+ v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
+ v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
+ }
+ if (u_nw) {
+ v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
+ v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
+ }
+ assert(v_w || v_nw);
+ if (v_w && v_nw) {
+ /* edge is effectively unconditional */
+ g[e].assert_flags = 0;
+ } else if (v_w) {
+ /* need to add a word byte */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
- dead->insert(e);
- } else {
- /* need to add a non word byte or see eod */
- NFAVertex vv = add_vertex(g);
+ g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
+ dead->insert(e);
+ } else {
+ /* need to add a non word byte or see eod */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
+ g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
/* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive
*/
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
- g[start_eod].assert_flags = 0;
- dead->erase(start_eod);
+ g[start_eod].assert_flags = 0;
+ dead->erase(start_eod);
} else {
add_edge(u, g.acceptEod, g[e], g);
- }
- dead->insert(e);
- }
- } else {
- /* we can remove the asserts as we have partitioned the vertices
- * into w/nw around the assert edges
- */
- g[e].assert_flags = 0;
- }
- }
-}
-
+ }
+ dead->insert(e);
+ }
+ } else {
+ /* we can remove the asserts as we have partitioned the vertices
+ * into w/nw around the assert edges
+ */
+ g[e].assert_flags = 0;
+ }
+ }
+}
+
void resolveAsserts(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- vector<NFAEdge> asserts = getAsserts(g);
- if (asserts.empty()) {
- return;
- }
-
- map<u32, NFAVertex> to_split; /* by index, for determinism */
- map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
- findSplitters(g, asserts, &to_split, &to_split_ucp);
- if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
+ vector<NFAEdge> asserts = getAsserts(g);
+ if (asserts.empty()) {
+ return;
+ }
+
+ map<u32, NFAVertex> to_split; /* by index, for determinism */
+ map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
+ findSplitters(g, asserts, &to_split, &to_split_ucp);
+ if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- for (const auto &m : to_split) {
- assert(!contains(to_split_ucp, m.first));
+ }
+
+ for (const auto &m : to_split) {
+ assert(!contains(to_split_ucp, m.first));
splitVertex(rm, g, expr, m.second, false);
- }
-
- for (const auto &m : to_split_ucp) {
+ }
+
+ for (const auto &m : to_split_ucp) {
splitVertex(rm, g, expr, m.second, true);
- }
-
- set<NFAEdge> dead;
+ }
+
+ set<NFAEdge> dead;
resolveEdges(rm, g, expr, &dead);
-
- remove_edges(dead, g);
+
+ remove_edges(dead, g);
renumber_vertices(g);
- pruneUseless(g);
- pruneEmptyVertices(g);
-
+ pruneUseless(g);
+ pruneEmptyVertices(g);
+
renumber_vertices(g);
renumber_edges(g);
- clearReports(g);
-}
-
+ clearReports(g);
+}
+
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- /* In utf8 mode there is an implicit assertion that we start at codepoint
- * boundaries. Assert resolution handles the badness coming from asserts.
- * The only other source of trouble is startDs->accept connections.
- */
+ /* In utf8 mode there is an implicit assertion that we start at codepoint
+ * boundaries. Assert resolution handles the badness coming from asserts.
+ * The only other source of trouble is startDs->accept connections.
+ */
NFAEdge orig = edge(g.startDs, g.accept, g);
if (expr.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", expr.report);
Report ir = rm.getBasicInternalReport(expr);
- ReportID rep = rm.getInternalId(ir);
-
- NFAVertex v_a = add_vertex(g);
- g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_a].char_reach = UTF_ASCII_CR;
- add_edge(v_a, g.accept, g[orig], g);
-
- NFAVertex v_2 = add_vertex(g);
- g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
-
- NFAVertex v_3 = add_vertex(g);
- g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
-
- NFAVertex v_4 = add_vertex(g);
- g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
-
- NFAVertex v_c = add_vertex(g);
- g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_c].char_reach = UTF_CONT_CR;
- add_edge(v_c, g.accept, g[orig], g);
-
- add_edge(v_2, v_c, g);
-
- NFAVertex v_3c = add_vertex(g);
- g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_3c].char_reach = UTF_CONT_CR;
- add_edge(v_3c, v_c, g);
- add_edge(v_3, v_3c, g);
-
- NFAVertex v_4c = add_vertex(g);
- g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_4c].char_reach = UTF_CONT_CR;
- add_edge(v_4c, v_3c, g);
- add_edge(v_4, v_4c, g);
-
- g[v_a].reports.insert(rep);
- g[v_c].reports.insert(rep);
-
- add_edge(g.start, v_a, g);
- add_edge(g.startDs, v_a, g);
- add_edge(g.start, v_2, g);
- add_edge(g.startDs, v_2, g);
- add_edge(g.start, v_3, g);
- add_edge(g.startDs, v_3, g);
- add_edge(g.start, v_4, g);
- add_edge(g.startDs, v_4, g);
- remove_edge(orig, g);
+ ReportID rep = rm.getInternalId(ir);
+
+ NFAVertex v_a = add_vertex(g);
+ g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_a].char_reach = UTF_ASCII_CR;
+ add_edge(v_a, g.accept, g[orig], g);
+
+ NFAVertex v_2 = add_vertex(g);
+ g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
+
+ NFAVertex v_3 = add_vertex(g);
+ g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
+
+ NFAVertex v_4 = add_vertex(g);
+ g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
+
+ NFAVertex v_c = add_vertex(g);
+ g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_c].char_reach = UTF_CONT_CR;
+ add_edge(v_c, g.accept, g[orig], g);
+
+ add_edge(v_2, v_c, g);
+
+ NFAVertex v_3c = add_vertex(g);
+ g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_3c].char_reach = UTF_CONT_CR;
+ add_edge(v_3c, v_c, g);
+ add_edge(v_3, v_3c, g);
+
+ NFAVertex v_4c = add_vertex(g);
+ g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_4c].char_reach = UTF_CONT_CR;
+ add_edge(v_4c, v_3c, g);
+ add_edge(v_4, v_4c, g);
+
+ g[v_a].reports.insert(rep);
+ g[v_c].reports.insert(rep);
+
+ add_edge(g.start, v_a, g);
+ add_edge(g.startDs, v_a, g);
+ add_edge(g.start, v_2, g);
+ add_edge(g.startDs, v_2, g);
+ add_edge(g.start, v_3, g);
+ add_edge(g.startDs, v_3, g);
+ add_edge(g.start, v_4, g);
+ add_edge(g.startDs, v_4, g);
+ remove_edge(orig, g);
renumber_edges(g);
clearReports(g);
- }
-}
-
-} // namespace ue2
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
index 2534f57147..edbc3d5d80 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Resolve special assert vertices.
- */
-
-#ifndef NG_ASSERTS_H
-#define NG_ASSERTS_H
-
-namespace ue2 {
-
-struct BoundaryReports;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Resolve special assert vertices.
+ */
+
+#ifndef NG_ASSERTS_H
+#define NG_ASSERTS_H
+
+namespace ue2 {
+
+struct BoundaryReports;
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
+class ReportManager;
+
void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
-
+
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
-
-} // namespace ue2
-
-#endif // NG_ASSERTS_H
+
+} // namespace ue2
+
+#endif // NG_ASSERTS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
index 60f667f491..33edad8bef 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
@@ -1,278 +1,278 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief: NFA Graph Builder: used by Glushkov construction to construct an
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGHolder from a parsed expression.
- */
+ */
#include "ng_builder.h"
-#include "grey.h"
-#include "ng.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "compiler/compiler.h" // for ParsedExpression
-#include "util/compile_error.h"
-#include "util/make_unique.h"
-
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Concrete implementation of NFABuilder interface. */
-class NFABuilderImpl : public NFABuilder {
-public:
- NFABuilderImpl(ReportManager &rm, const Grey &grey,
- const ParsedExpression &expr);
-
- ~NFABuilderImpl() override;
-
- Position makePositions(size_t nPositions) override;
- Position getStart() const override;
- Position getStartDotStar() const override;
- Position getAccept() const override;
- Position getAcceptEOD() const override;
-
- bool isSpecialState(Position p) const override;
-
- void setNodeReportID(Position position, int offsetAdjust) override;
- void addCharReach(Position position, const CharReach &cr) override;
- void setAssertFlag(Position position, u32 flag) override;
- u32 getAssertFlag(Position position) override;
-
- void addVertex(Position p) override;
-
- void addEdge(Position start, Position end) override;
-
- bool hasEdge(Position start, Position end) const override;
-
- u32 numVertices() const override { return vertIdx; }
-
- void cloneRegion(Position first, Position last,
- unsigned posOffset) override;
-
+#include "grey.h"
+#include "ng.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "compiler/compiler.h" // for ParsedExpression
+#include "util/compile_error.h"
+#include "util/make_unique.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Concrete implementation of NFABuilder interface. */
+class NFABuilderImpl : public NFABuilder {
+public:
+ NFABuilderImpl(ReportManager &rm, const Grey &grey,
+ const ParsedExpression &expr);
+
+ ~NFABuilderImpl() override;
+
+ Position makePositions(size_t nPositions) override;
+ Position getStart() const override;
+ Position getStartDotStar() const override;
+ Position getAccept() const override;
+ Position getAcceptEOD() const override;
+
+ bool isSpecialState(Position p) const override;
+
+ void setNodeReportID(Position position, int offsetAdjust) override;
+ void addCharReach(Position position, const CharReach &cr) override;
+ void setAssertFlag(Position position, u32 flag) override;
+ u32 getAssertFlag(Position position) override;
+
+ void addVertex(Position p) override;
+
+ void addEdge(Position start, Position end) override;
+
+ bool hasEdge(Position start, Position end) const override;
+
+ u32 numVertices() const override { return vertIdx; }
+
+ void cloneRegion(Position first, Position last,
+ unsigned posOffset) override;
+
BuiltExpression getGraph() override;
-
-private:
- /** fetch a vertex given its Position ID. */
- NFAVertex getVertex(Position pos) const;
-
- /** \brief Internal convenience function to add an edge (u, v). */
- pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v);
-
- /** \brief We use the ReportManager to hand out new internal reports. */
- ReportManager &rm;
-
- /** \brief Greybox: used for resource limits. */
- const Grey &grey;
-
+
+private:
+ /** fetch a vertex given its Position ID. */
+ NFAVertex getVertex(Position pos) const;
+
+ /** \brief Internal convenience function to add an edge (u, v). */
+ pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v);
+
+ /** \brief We use the ReportManager to hand out new internal reports. */
+ ReportManager &rm;
+
+ /** \brief Greybox: used for resource limits. */
+ const Grey &grey;
+
/** \brief Underlying graph. */
unique_ptr<NGHolder> graph;
-
+
/** \brief Underlying expression info. */
ExpressionInfo expr;
- /** \brief mapping from position to vertex. Use \ref getVertex for access.
- * */
- vector<NFAVertex> id2vertex;
-
- /** \brief Index of next vertex. */
- u32 vertIdx;
-}; // class NFABuilderImpl
-
-} // namespace
-
-NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
+ /** \brief mapping from position to vertex. Use \ref getVertex for access.
+ * */
+ vector<NFAVertex> id2vertex;
+
+ /** \brief Index of next vertex. */
+ u32 vertIdx;
+}; // class NFABuilderImpl
+
+} // namespace
+
+NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
const ParsedExpression &parsed)
: rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
expr(parsed.expr), vertIdx(N_SPECIALS) {
-
- // Reserve space for a reasonably-sized NFA
- id2vertex.reserve(64);
- id2vertex.resize(N_SPECIALS);
- id2vertex[NODE_START] = graph->start;
- id2vertex[NODE_START_DOTSTAR] = graph->startDs;
- id2vertex[NODE_ACCEPT] = graph->accept;
- id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod;
-}
-
-NFABuilderImpl::~NFABuilderImpl() {
- // empty
-}
-
-NFAVertex NFABuilderImpl::getVertex(Position pos) const {
- assert(id2vertex.size() >= pos);
- const NFAVertex v = id2vertex[pos];
+
+ // Reserve space for a reasonably-sized NFA
+ id2vertex.reserve(64);
+ id2vertex.resize(N_SPECIALS);
+ id2vertex[NODE_START] = graph->start;
+ id2vertex[NODE_START_DOTSTAR] = graph->startDs;
+ id2vertex[NODE_ACCEPT] = graph->accept;
+ id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod;
+}
+
+NFABuilderImpl::~NFABuilderImpl() {
+ // empty
+}
+
+NFAVertex NFABuilderImpl::getVertex(Position pos) const {
+ assert(id2vertex.size() >= pos);
+ const NFAVertex v = id2vertex[pos];
assert(v != NGHolder::null_vertex());
assert((*graph)[v].index == pos);
- return v;
-}
-
-void NFABuilderImpl::addVertex(Position pos) {
- // Enforce resource limit.
- if (pos > grey.limitGraphVertices) {
- throw CompileError("Pattern too large.");
- }
-
- NFAVertex v = add_vertex(*graph);
- if (id2vertex.size() <= pos) {
- id2vertex.resize(pos + 1);
- }
- id2vertex[pos] = v;
+ return v;
+}
+
+void NFABuilderImpl::addVertex(Position pos) {
+ // Enforce resource limit.
+ if (pos > grey.limitGraphVertices) {
+ throw CompileError("Pattern too large.");
+ }
+
+ NFAVertex v = add_vertex(*graph);
+ if (id2vertex.size() <= pos) {
+ id2vertex.resize(pos + 1);
+ }
+ id2vertex[pos] = v;
(*graph)[v].index = pos;
-}
-
+}
+
BuiltExpression NFABuilderImpl::getGraph() {
- DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
- num_vertices(*graph), num_edges(*graph));
-
- if (num_edges(*graph) > grey.limitGraphEdges) {
- throw CompileError("Pattern too large.");
- }
- if (num_vertices(*graph) > grey.limitGraphVertices) {
- throw CompileError("Pattern too large.");
- }
-
+ DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
+ num_vertices(*graph), num_edges(*graph));
+
+ if (num_edges(*graph) > grey.limitGraphEdges) {
+ throw CompileError("Pattern too large.");
+ }
+ if (num_vertices(*graph) > grey.limitGraphVertices) {
+ throw CompileError("Pattern too large.");
+ }
+
return { expr, move(graph) };
-}
-
-void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
+}
+
+void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
- DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
+ DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
pos, expr.report, offsetAdjust, ir.ekey);
-
- NFAVertex v = getVertex(pos);
- auto &reports = (*graph)[v].reports;
- reports.clear();
- reports.insert(rm.getInternalId(ir));
-}
-
-void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) {
- NFAVertex v = getVertex(pos);
+
+ NFAVertex v = getVertex(pos);
+ auto &reports = (*graph)[v].reports;
+ reports.clear();
+ reports.insert(rm.getInternalId(ir));
+}
+
+void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) {
+ NFAVertex v = getVertex(pos);
(*graph)[v].char_reach |= cr;
-}
-
-void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) {
- NFAVertex v = getVertex(pos);
+}
+
+void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) {
+ NFAVertex v = getVertex(pos);
(*graph)[v].assert_flags |= flag;
-}
-
-u32 NFABuilderImpl::getAssertFlag(Position pos) {
- NFAVertex v = getVertex(pos);
+}
+
+u32 NFABuilderImpl::getAssertFlag(Position pos) {
+ NFAVertex v = getVertex(pos);
return (*graph)[v].assert_flags;
-}
-
-pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) {
- // assert that the edge doesn't already exist
+}
+
+pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) {
+ // assert that the edge doesn't already exist
assert(edge(u, v, *graph).second == false);
-
+
return add_edge(u, v, *graph);
-}
-
-void NFABuilderImpl::addEdge(Position startPos, Position endPos) {
- DEBUG_PRINTF("%u -> %u\n", startPos, endPos);
- assert(startPos < vertIdx);
- assert(endPos < vertIdx);
-
- NFAVertex u = getVertex(startPos);
- NFAVertex v = getVertex(endPos);
-
- if ((u == graph->start || u == graph->startDs) && v == graph->startDs) {
- /* standard special -> special edges already exist */
+}
+
+void NFABuilderImpl::addEdge(Position startPos, Position endPos) {
+ DEBUG_PRINTF("%u -> %u\n", startPos, endPos);
+ assert(startPos < vertIdx);
+ assert(endPos < vertIdx);
+
+ NFAVertex u = getVertex(startPos);
+ NFAVertex v = getVertex(endPos);
+
+ if ((u == graph->start || u == graph->startDs) && v == graph->startDs) {
+ /* standard special -> special edges already exist */
assert(edge(u, v, *graph).second == true);
- return;
- }
-
+ return;
+ }
+
assert(edge(u, v, *graph).second == false);
- addEdge(u, v);
-}
-
-bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const {
+ addEdge(u, v);
+}
+
+bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const {
return edge(getVertex(startPos), getVertex(endPos), *graph).second;
-}
-
-Position NFABuilderImpl::getStart() const {
- return NODE_START;
-}
-
-Position NFABuilderImpl::getStartDotStar() const {
- return NODE_START_DOTSTAR;
-}
-
-Position NFABuilderImpl::getAccept() const {
- return NODE_ACCEPT;
-}
-
-Position NFABuilderImpl::getAcceptEOD() const {
- return NODE_ACCEPT_EOD;
-}
-
-bool NFABuilderImpl::isSpecialState(Position p) const {
- return (p == NODE_START || p == NODE_START_DOTSTAR ||
- p == NODE_ACCEPT || p == NODE_ACCEPT_EOD);
-}
-
-Position NFABuilderImpl::makePositions(size_t nPositions) {
- Position base = vertIdx;
- for (size_t i = 0; i < nPositions; i++) {
- addVertex(vertIdx++);
- }
- DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base);
- return base;
-}
-
-void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) {
+}
+
+Position NFABuilderImpl::getStart() const {
+ return NODE_START;
+}
+
+Position NFABuilderImpl::getStartDotStar() const {
+ return NODE_START_DOTSTAR;
+}
+
+Position NFABuilderImpl::getAccept() const {
+ return NODE_ACCEPT;
+}
+
+Position NFABuilderImpl::getAcceptEOD() const {
+ return NODE_ACCEPT_EOD;
+}
+
+bool NFABuilderImpl::isSpecialState(Position p) const {
+ return (p == NODE_START || p == NODE_START_DOTSTAR ||
+ p == NODE_ACCEPT || p == NODE_ACCEPT_EOD);
+}
+
+Position NFABuilderImpl::makePositions(size_t nPositions) {
+ Position base = vertIdx;
+ for (size_t i = 0; i < nPositions; i++) {
+ addVertex(vertIdx++);
+ }
+ DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base);
+ return base;
+}
+
+void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) {
NGHolder &g = *graph;
- assert(posOffset > 0);
-
- // walk the nodes between first and last and copy their vertex properties
- DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last,
- posOffset);
- for (Position i = first; i <= last; ++i) {
- NFAVertex orig = getVertex(i);
- Position destIdx = i + posOffset;
- assert(destIdx < vertIdx);
- NFAVertex dest = getVertex(destIdx);
- g[dest] = g[orig]; // all properties
- g[dest].index = destIdx;
- }
-}
-
-unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc,
- const ParsedExpression &expr) {
- return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr);
-}
-
-NFABuilder::~NFABuilder() { }
-
-} // namespace ue2
+ assert(posOffset > 0);
+
+ // walk the nodes between first and last and copy their vertex properties
+ DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last,
+ posOffset);
+ for (Position i = first; i <= last; ++i) {
+ NFAVertex orig = getVertex(i);
+ Position destIdx = i + posOffset;
+ assert(destIdx < vertIdx);
+ NFAVertex dest = getVertex(destIdx);
+ g[dest] = g[orig]; // all properties
+ g[dest].index = destIdx;
+ }
+}
+
+unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc,
+ const ParsedExpression &expr) {
+ return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr);
+}
+
+NFABuilder::~NFABuilder() { }
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
index 9f71b62235..7158620e70 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief: NFA Graph Builder: used by Glushkov construction to construct an
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGHolder from a parsed expression.
- */
-
-#ifndef NG_BUILDER_H
-#define NG_BUILDER_H
-
-#include "ue2common.h"
-
-#include "parser/position.h"
+ */
+
+#ifndef NG_BUILDER_H
+#define NG_BUILDER_H
+
+#include "ue2common.h"
+
+#include "parser/position.h"
#include "util/noncopyable.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class CharReach;
-class ReportManager;
+
+#include <memory>
+
+namespace ue2 {
+
+class CharReach;
+class ReportManager;
struct BuiltExpression;
-struct CompileContext;
-
-class ParsedExpression;
-
-/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct
- * one. Used by GlushkovBuildState. */
+struct CompileContext;
+
+class ParsedExpression;
+
+/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct
+ * one. Used by GlushkovBuildState. */
class NFABuilder : noncopyable {
-public:
- virtual ~NFABuilder();
-
- virtual Position makePositions(size_t nPositions) = 0;
- virtual Position getStart() const = 0;
- virtual Position getStartDotStar() const = 0;
- virtual Position getAccept() const = 0;
- virtual Position getAcceptEOD() const = 0;
-
- virtual bool isSpecialState(Position p) const = 0;
-
- virtual void setNodeReportID(Position position, int offsetAdjust) = 0;
- virtual void addCharReach(Position position, const CharReach &cr) = 0;
-
- /* or-in vertex assertions */
- virtual void setAssertFlag(Position position, u32 flag) = 0;
- virtual u32 getAssertFlag(Position position) = 0;
-
- virtual void addVertex(Position p) = 0;
-
- virtual void addEdge(Position start, Position end) = 0;
-
- virtual bool hasEdge(Position start, Position end) const = 0;
-
- virtual u32 numVertices() const = 0;
-
- virtual void cloneRegion(Position first, Position last,
- unsigned posOffset) = 0;
-
- /**
+public:
+ virtual ~NFABuilder();
+
+ virtual Position makePositions(size_t nPositions) = 0;
+ virtual Position getStart() const = 0;
+ virtual Position getStartDotStar() const = 0;
+ virtual Position getAccept() const = 0;
+ virtual Position getAcceptEOD() const = 0;
+
+ virtual bool isSpecialState(Position p) const = 0;
+
+ virtual void setNodeReportID(Position position, int offsetAdjust) = 0;
+ virtual void addCharReach(Position position, const CharReach &cr) = 0;
+
+ /* or-in vertex assertions */
+ virtual void setAssertFlag(Position position, u32 flag) = 0;
+ virtual u32 getAssertFlag(Position position) = 0;
+
+ virtual void addVertex(Position p) = 0;
+
+ virtual void addEdge(Position start, Position end) = 0;
+
+ virtual bool hasEdge(Position start, Position end) const = 0;
+
+ virtual u32 numVertices() const = 0;
+
+ virtual void cloneRegion(Position first, Position last,
+ unsigned posOffset) = 0;
+
+ /**
* \brief Returns the built NGHolder graph and ExpressionInfo.
- * Note that this builder cannot be used after this call.
- */
+ * Note that this builder cannot be used after this call.
+ */
virtual BuiltExpression getGraph() = 0;
-};
-
-/** Construct a usable NFABuilder. */
-std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm,
- const CompileContext &cc,
- const ParsedExpression &expr);
-
-} // namespace ue2
-
-#endif
+};
+
+/** Construct a usable NFABuilder. */
+std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm,
+ const CompileContext &cc,
+ const ParsedExpression &expr);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
index 3e9454eeed..3474ca9875 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
@@ -1,232 +1,232 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Splits an NFA graph into its connected components.
- *
- * This pass takes a NGHolder and splits its graph into a set of connected
- * components, returning them as individual NGHolder graphs. For example, the
- * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split
- * into four NGHolders, representing these four components:
- *
- * - /foo.*bar/
- * - /[a-z]{7,13}/
- * - /hatstand/
- * - /teakettle$/
- *
- * The pass operates by creating an undirected graph from the input graph, and
- * then using the BGL's connected_components algorithm to do the work, cloning
- * the identified components into their own graphs. A "shell" of vertices
- * is identified and removed first from the head and tail of the graph, in
- * order to handle cases where there is a common head/tail region.
- *
- * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/,
- * are not split, as later optimisations will handle these cases efficiently.
- */
-#include "ng_calc_components.h"
-
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Splits an NFA graph into its connected components.
+ *
+ * This pass takes a NGHolder and splits its graph into a set of connected
+ * components, returning them as individual NGHolder graphs. For example, the
+ * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split
+ * into four NGHolders, representing these four components:
+ *
+ * - /foo.*bar/
+ * - /[a-z]{7,13}/
+ * - /hatstand/
+ * - /teakettle$/
+ *
+ * The pass operates by creating an undirected graph from the input graph, and
+ * then using the BGL's connected_components algorithm to do the work, cloning
+ * the identified components into their own graphs. A "shell" of vertices
+ * is identified and removed first from the head and tail of the graph, in
+ * order to handle cases where there is a common head/tail region.
+ *
+ * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/,
+ * are not split, as later optimisations will handle these cases efficiently.
+ */
+#include "ng_calc_components.h"
+
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
#include "grey.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
#include "util/graph_undirected.h"
-#include "util/make_unique.h"
-
-#include <map>
-#include <vector>
-
-#include <boost/graph/connected_components.hpp>
+#include "util/make_unique.h"
+
+#include <map>
+#include <vector>
+
+#include <boost/graph/connected_components.hpp>
#include <boost/graph/filtered_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3;
-static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3;
-
-/**
- * \brief Returns true if the whole graph is just an alternation of character
- * classes.
- */
-bool isAlternationOfClasses(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- // Vertex must have in edges from starts only.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_any_start(u, g)) {
- return false;
- }
- }
- // Vertex must have out edges to accepts only.
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_any_accept(w, g)) {
- return false;
- }
- }
- }
-
- DEBUG_PRINTF("alternation of single states, treating as one comp\n");
- return true;
-}
-
-/**
- * \brief Compute initial max distance to v from start (i.e. ignoring its own
- * self-loop).
- */
-static
-depth max_dist_from_start(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- NFAVertex v) {
- depth max_depth(0);
- for (const auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
- const auto &d = depths.at(g[u].index);
- if (d.fromStart.max.is_reachable()) {
- max_depth = max(max_depth, d.fromStart.max);
- }
- if (d.fromStartDotStar.max.is_reachable()) {
- max_depth = max(max_depth, d.fromStartDotStar.max);
- }
- }
- return max_depth + 1;
-}
-
-/**
- * \brief Compute initial max depth from v from accept (i.e. ignoring its own
- * self-loop).
- */
-static
-depth max_dist_to_accept(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- NFAVertex v) {
- depth max_depth(0);
- for (const auto w : adjacent_vertices_range(v, g)) {
- if (w == v) {
- continue;
- }
- const auto &d = depths.at(g[w].index);
- if (d.toAccept.max.is_reachable()) {
- max_depth = max(max_depth, d.toAccept.max);
- }
- if (d.toAcceptEod.max.is_reachable()) {
- max_depth = max(max_depth, d.toAcceptEod.max);
- }
- }
- return max_depth + 1;
-}
-
-static
-flat_set<NFAVertex> findHeadShell(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- const depth &max_dist) {
- flat_set<NFAVertex> shell;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (max_dist_from_start(g, depths, v) <= max_dist) {
- shell.insert(v);
- }
- }
-
- for (UNUSED auto v : shell) {
+
+using namespace std;
+
+namespace ue2 {
+
+static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3;
+static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3;
+
+/**
+ * \brief Returns true if the whole graph is just an alternation of character
+ * classes.
+ */
+bool isAlternationOfClasses(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ // Vertex must have in edges from starts only.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_any_start(u, g)) {
+ return false;
+ }
+ }
+ // Vertex must have out edges to accepts only.
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_any_accept(w, g)) {
+ return false;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("alternation of single states, treating as one comp\n");
+ return true;
+}
+
+/**
+ * \brief Compute initial max distance to v from start (i.e. ignoring its own
+ * self-loop).
+ */
+static
+depth max_dist_from_start(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ NFAVertex v) {
+ depth max_depth(0);
+ for (const auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+ const auto &d = depths.at(g[u].index);
+ if (d.fromStart.max.is_reachable()) {
+ max_depth = max(max_depth, d.fromStart.max);
+ }
+ if (d.fromStartDotStar.max.is_reachable()) {
+ max_depth = max(max_depth, d.fromStartDotStar.max);
+ }
+ }
+ return max_depth + 1;
+}
+
+/**
+ * \brief Compute initial max depth from v from accept (i.e. ignoring its own
+ * self-loop).
+ */
+static
+depth max_dist_to_accept(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ NFAVertex v) {
+ depth max_depth(0);
+ for (const auto w : adjacent_vertices_range(v, g)) {
+ if (w == v) {
+ continue;
+ }
+ const auto &d = depths.at(g[w].index);
+ if (d.toAccept.max.is_reachable()) {
+ max_depth = max(max_depth, d.toAccept.max);
+ }
+ if (d.toAcceptEod.max.is_reachable()) {
+ max_depth = max(max_depth, d.toAcceptEod.max);
+ }
+ }
+ return max_depth + 1;
+}
+
+static
+flat_set<NFAVertex> findHeadShell(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ const depth &max_dist) {
+ flat_set<NFAVertex> shell;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (max_dist_from_start(g, depths, v) <= max_dist) {
+ shell.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : shell) {
DEBUG_PRINTF("shell: %zu\n", g[v].index);
- }
-
- return shell;
-}
-
-static
-flat_set<NFAVertex> findTailShell(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- const depth &max_dist) {
- flat_set<NFAVertex> shell;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (max_dist_to_accept(g, depths, v) <= max_dist) {
- shell.insert(v);
- }
- }
-
- for (UNUSED auto v : shell) {
+ }
+
+ return shell;
+}
+
+static
+flat_set<NFAVertex> findTailShell(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ const depth &max_dist) {
+ flat_set<NFAVertex> shell;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (max_dist_to_accept(g, depths, v) <= max_dist) {
+ shell.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : shell) {
DEBUG_PRINTF("shell: %zu\n", g[v].index);
- }
-
- return shell;
-}
-
-static
-vector<NFAEdge> findShellEdges(const NGHolder &g,
- const flat_set<NFAVertex> &head_shell,
- const flat_set<NFAVertex> &tail_shell) {
- vector<NFAEdge> shell_edges;
-
- for (const auto &e : edges_range(g)) {
- auto u = source(e, g);
- auto v = target(e, g);
-
- if (v == g.startDs && is_any_start(u, g)) {
- continue;
- }
- if (u == g.accept && v == g.acceptEod) {
- continue;
- }
-
- if ((is_special(u, g) || contains(head_shell, u)) &&
- (is_special(v, g) || contains(tail_shell, v))) {
+ }
+
+ return shell;
+}
+
+static
+vector<NFAEdge> findShellEdges(const NGHolder &g,
+ const flat_set<NFAVertex> &head_shell,
+ const flat_set<NFAVertex> &tail_shell) {
+ vector<NFAEdge> shell_edges;
+
+ for (const auto &e : edges_range(g)) {
+ auto u = source(e, g);
+ auto v = target(e, g);
+
+ if (v == g.startDs && is_any_start(u, g)) {
+ continue;
+ }
+ if (u == g.accept && v == g.acceptEod) {
+ continue;
+ }
+
+ if ((is_special(u, g) || contains(head_shell, u)) &&
+ (is_special(v, g) || contains(tail_shell, v))) {
DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index,
g[v].index);
- shell_edges.push_back(e);
- }
- }
-
- return shell_edges;
-}
-
+ shell_edges.push_back(e);
+ }
+ }
+
+ return shell_edges;
+}
+
template<typename GetAdjRange>
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
GetAdjRange adj_range_func) {
if (shell.empty()) {
DEBUG_PRINTF("no shell\n");
return false;
- }
+ }
NFAVertex exit_vertex = NGHolder::null_vertex();
for (auto u : shell) {
@@ -246,62 +246,62 @@ bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
}
return true;
-}
-
+}
+
/**
* True if all edges out of vertices in the head shell lead to at most a single
* outside vertex, or the inverse for the tail shell.
*/
-static
+static
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell,
const flat_set<NFAVertex> &tail_shell) {
if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("head shell has only one path through it\n");
return true;
- }
+ }
if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("tail shell has only one path into it\n");
return true;
}
return false;
-}
-
-/**
- * Common code called by calc- and recalc- below. Splits the given holder into
- * one or more connected components, adding them to the comps deque.
- */
-static
+}
+
+/**
+ * Common code called by calc- and recalc- below. Splits the given holder into
+ * one or more connected components, adding them to the comps deque.
+ */
+static
void splitIntoComponents(unique_ptr<NGHolder> g,
deque<unique_ptr<NGHolder>> &comps,
- const depth &max_head_depth,
- const depth &max_tail_depth, bool *shell_comp) {
+ const depth &max_head_depth,
+ const depth &max_tail_depth, bool *shell_comp) {
DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g));
-
- assert(shell_comp);
- *shell_comp = false;
-
- // Compute "shell" head and tail subgraphs.
+
+ assert(shell_comp);
+ *shell_comp = false;
+
+ // Compute "shell" head and tail subgraphs.
auto depths = calcBidiDepths(*g);
auto head_shell = findHeadShell(*g, depths, max_head_depth);
auto tail_shell = findTailShell(*g, depths, max_tail_depth);
- for (auto v : head_shell) {
- tail_shell.erase(v);
- }
-
+ for (auto v : head_shell) {
+ tail_shell.erase(v);
+ }
+
if (head_shell.size() + tail_shell.size() + N_SPECIALS >=
num_vertices(*g)) {
- DEBUG_PRINTF("all in shell component\n");
+ DEBUG_PRINTF("all in shell component\n");
comps.push_back(std::move(g));
- *shell_comp = true;
- return;
- }
-
+ *shell_comp = true;
+ return;
+ }
+
// Find edges connecting the head and tail shells directly.
vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell);
-
- DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
- head_shell.size(), tail_shell.size(), shell_edges.size());
-
+
+ DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
+ head_shell.size(), tail_shell.size(), shell_edges.size());
+
// If there are no shell edges and only one path out of the head shell or
// into the tail shell, we aren't going to find more than one component.
if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) {
@@ -309,152 +309,152 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
comps.push_back(std::move(g));
return;
}
-
+
auto ug = make_undirected_graph(*g);
-
+
// Filter specials and shell vertices from undirected graph.
unordered_set<NFAVertex> bad_vertices(
{g->start, g->startDs, g->accept, g->acceptEod});
bad_vertices.insert(head_shell.begin(), head_shell.end());
bad_vertices.insert(tail_shell.begin(), tail_shell.end());
-
+
auto filtered_ug = boost::make_filtered_graph(
ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices));
-
+
// Actually run the connected components algorithm.
map<NFAVertex, u32> split_components;
- const u32 num = connected_components(
+ const u32 num = connected_components(
filtered_ug, boost::make_assoc_property_map(split_components));
-
- assert(num > 0);
- if (num == 1 && shell_edges.empty()) {
- DEBUG_PRINTF("single component\n");
+
+ assert(num > 0);
+ if (num == 1 && shell_edges.empty()) {
+ DEBUG_PRINTF("single component\n");
comps.push_back(std::move(g));
- return;
- }
-
- DEBUG_PRINTF("broke graph into %u components\n", num);
-
- vector<deque<NFAVertex>> verts(num);
-
- // Collect vertex lists per component.
- for (const auto &m : split_components) {
+ return;
+ }
+
+ DEBUG_PRINTF("broke graph into %u components\n", num);
+
+ vector<deque<NFAVertex>> verts(num);
+
+ // Collect vertex lists per component.
+ for (const auto &m : split_components) {
NFAVertex v = m.first;
- u32 c = m.second;
- verts[c].push_back(v);
+ u32 c = m.second;
+ verts[c].push_back(v);
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
- }
-
+ }
+
unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
- for (auto &vv : verts) {
- // Shells are in every component.
- vv.insert(vv.end(), begin(head_shell), end(head_shell));
- vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
-
+ for (auto &vv : verts) {
+ // Shells are in every component.
+ vv.insert(vv.end(), begin(head_shell), end(head_shell));
+ vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
+
/* Sort for determinism. Still required as NFAUndirectedVertex have
* no deterministic ordering (split_components map). */
sort(begin(vv), end(vv));
-
- auto gc = ue2::make_unique<NGHolder>();
- v_map.clear();
+
+ auto gc = ue2::make_unique<NGHolder>();
+ v_map.clear();
fillHolder(gc.get(), *g, vv, &v_map);
-
- // Remove shell edges, which will get their own component.
- for (const auto &e : shell_edges) {
+
+ // Remove shell edges, which will get their own component.
+ for (const auto &e : shell_edges) {
auto cu = v_map.at(source(e, *g));
auto cv = v_map.at(target(e, *g));
- assert(edge(cu, cv, *gc).second);
- remove_edge(cu, cv, *gc);
- }
-
- pruneUseless(*gc);
- DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(),
- num_vertices(*gc));
- comps.push_back(move(gc));
- }
-
- // Another component to handle the direct shell-to-shell edges.
- if (!shell_edges.empty()) {
- deque<NFAVertex> vv;
- vv.insert(vv.end(), begin(head_shell), end(head_shell));
- vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
-
- auto gc = ue2::make_unique<NGHolder>();
- v_map.clear();
+ assert(edge(cu, cv, *gc).second);
+ remove_edge(cu, cv, *gc);
+ }
+
+ pruneUseless(*gc);
+ DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(),
+ num_vertices(*gc));
+ comps.push_back(move(gc));
+ }
+
+ // Another component to handle the direct shell-to-shell edges.
+ if (!shell_edges.empty()) {
+ deque<NFAVertex> vv;
+ vv.insert(vv.end(), begin(head_shell), end(head_shell));
+ vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
+
+ auto gc = ue2::make_unique<NGHolder>();
+ v_map.clear();
fillHolder(gc.get(), *g, vv, &v_map);
-
- pruneUseless(*gc);
- DEBUG_PRINTF("shell edge component %zu has %zu vertices\n",
- comps.size(), num_vertices(*gc));
- comps.push_back(move(gc));
- *shell_comp = true;
- }
-
+
+ pruneUseless(*gc);
+ DEBUG_PRINTF("shell edge component %zu has %zu vertices\n",
+ comps.size(), num_vertices(*gc));
+ comps.push_back(move(gc));
+ *shell_comp = true;
+ }
+
// Ensure that only vertices with accept edges have reports.
for (auto &gc : comps) {
assert(gc);
clearReports(*gc);
}
- // We should never produce empty component graphs.
- assert(all_of(begin(comps), end(comps),
- [](const unique_ptr<NGHolder> &g_comp) {
- return num_vertices(*g_comp) > N_SPECIALS;
- }));
-}
-
+ // We should never produce empty component graphs.
+ assert(all_of(begin(comps), end(comps),
+ [](const unique_ptr<NGHolder> &g_comp) {
+ return num_vertices(*g_comp) > N_SPECIALS;
+ }));
+}
+
deque<unique_ptr<NGHolder>> calcComponents(unique_ptr<NGHolder> g,
const Grey &grey) {
- deque<unique_ptr<NGHolder>> comps;
-
- // For trivial cases, we needn't bother running the full
- // connected_components algorithm.
+ deque<unique_ptr<NGHolder>> comps;
+
+ // For trivial cases, we needn't bother running the full
+ // connected_components algorithm.
if (!grey.calcComponents || isAlternationOfClasses(*g)) {
comps.push_back(std::move(g));
- return comps;
- }
-
- bool shell_comp = false;
+ return comps;
+ }
+
+ bool shell_comp = false;
splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH),
depth(MAX_TAIL_SHELL_DEPTH), &shell_comp);
-
- if (shell_comp) {
- DEBUG_PRINTF("re-running on shell comp\n");
- assert(!comps.empty());
+
+ if (shell_comp) {
+ DEBUG_PRINTF("re-running on shell comp\n");
+ assert(!comps.empty());
auto sc = std::move(comps.back());
- comps.pop_back();
+ comps.pop_back();
splitIntoComponents(std::move(sc), comps, depth(0), depth(0),
&shell_comp);
- }
-
- DEBUG_PRINTF("finished; split into %zu components\n", comps.size());
- return comps;
-}
-
+ }
+
+ DEBUG_PRINTF("finished; split into %zu components\n", comps.size());
+ return comps;
+}
+
void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) {
if (!grey.calcComponents) {
return;
}
- deque<unique_ptr<NGHolder>> out;
-
- for (auto &gc : comps) {
- if (!gc) {
- continue; // graph has been consumed already.
- }
-
- if (isAlternationOfClasses(*gc)) {
+ deque<unique_ptr<NGHolder>> out;
+
+ for (auto &gc : comps) {
+ if (!gc) {
+ continue; // graph has been consumed already.
+ }
+
+ if (isAlternationOfClasses(*gc)) {
out.push_back(std::move(gc));
- continue;
- }
-
+ continue;
+ }
+
auto gc_comps = calcComponents(std::move(gc), grey);
out.insert(end(out), std::make_move_iterator(begin(gc_comps)),
std::make_move_iterator(end(gc_comps)));
- }
-
- // Replace comps with our recalculated list.
- comps.swap(out);
-}
-
-} // namespace ue2
+ }
+
+ // Replace comps with our recalculated list.
+ comps.swap(out);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
index 1bcdc5f81e..3c9cc08c24 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Splits an NFA graph into its connected components.
- */
-
-#ifndef NG_CALC_COMPONENTS_H
-#define NG_CALC_COMPONENTS_H
-
-#include <deque>
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Splits an NFA graph into its connected components.
+ */
+
+#ifndef NG_CALC_COMPONENTS_H
+#define NG_CALC_COMPONENTS_H
+
+#include <deque>
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
struct Grey;
-
-bool isAlternationOfClasses(const NGHolder &g);
-
+
+bool isAlternationOfClasses(const NGHolder &g);
+
std::deque<std::unique_ptr<NGHolder>>
calcComponents(std::unique_ptr<NGHolder> g, const Grey &grey);
-
+
void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps,
const Grey &grey);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
index 0b24bf07a8..8d84acfd9e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
@@ -1,213 +1,213 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
- * leading to a cyclic repeat.
- *
- * This is a graph reduction pass intended to remove vertices that are
- * redundant because they lead solely to a cyclic vertex with a superset of
- * their character reachability. For example, in this pattern:
- *
- * /(abc|def|abcghi).*0123/s
- *
- * The vertices for 'ghi' can be removed due to the presence of the dot-star
- * repeat.
- *
- * Algorithm:
- *
- * for each cyclic vertex V:
- * for each proper predecessor U of V:
- * let S be the set of successors of U that are successors of V
- * (including V itself)
- * for each successor W of U not in S:
- * perform a DFS forward from W, stopping exploration when a vertex
- * in S is encountered;
- * if a vertex with reach not in reach(V) or an accept is encountered:
- * fail and continue to the next W.
- * else:
- * remove (U, W)
- *
- * NOTE: the following code is templated not just for fun, but so that we can
- * run this analysis both forward and in reverse over the graph.
- */
-#include "ng_cyclic_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
+ * leading to a cyclic repeat.
+ *
+ * This is a graph reduction pass intended to remove vertices that are
+ * redundant because they lead solely to a cyclic vertex with a superset of
+ * their character reachability. For example, in this pattern:
+ *
+ * /(abc|def|abcghi).*0123/s
+ *
+ * The vertices for 'ghi' can be removed due to the presence of the dot-star
+ * repeat.
+ *
+ * Algorithm:
+ *
+ * for each cyclic vertex V:
+ * for each proper predecessor U of V:
+ * let S be the set of successors of U that are successors of V
+ * (including V itself)
+ * for each successor W of U not in S:
+ * perform a DFS forward from W, stopping exploration when a vertex
+ * in S is encountered;
+ * if a vertex with reach not in reach(V) or an accept is encountered:
+ * fail and continue to the next W.
+ * else:
+ * remove (U, W)
+ *
+ * NOTE: the following code is templated not just for fun, but so that we can
+ * run this analysis both forward and in reverse over the graph.
+ */
+#include "ng_cyclic_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
+
#include <algorithm>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::reverse_graph;
-
-namespace ue2 {
-
-namespace {
-
-// Terminator function for depth first traversal, tells us not to explore
-// beyond vertices in set S.
-template<class Vertex, class Graph>
-class VertexInSet {
- public:
- explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
- bool operator()(const Vertex &v, const Graph&) const {
- return contains(verts, v);
- }
-
- private:
- const flat_set<Vertex> &verts;
-};
-
-struct SearchFailed {};
-
-// Visitor for depth first traversal, throws an error if we encounter a vertex
-// with bad reach or a report.
-class SearchVisitor : public boost::default_dfs_visitor {
- public:
- explicit SearchVisitor(const CharReach &r) : cr(r) {}
-
- template<class Vertex, class Graph>
- void discover_vertex(const Vertex &v, const Graph &g) const {
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::reverse_graph;
+
+namespace ue2 {
+
+namespace {
+
+// Terminator function for depth first traversal, tells us not to explore
+// beyond vertices in set S.
+template<class Vertex, class Graph>
+class VertexInSet {
+ public:
+ explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
+ bool operator()(const Vertex &v, const Graph&) const {
+ return contains(verts, v);
+ }
+
+ private:
+ const flat_set<Vertex> &verts;
+};
+
+struct SearchFailed {};
+
+// Visitor for depth first traversal, throws an error if we encounter a vertex
+// with bad reach or a report.
+class SearchVisitor : public boost::default_dfs_visitor {
+ public:
+ explicit SearchVisitor(const CharReach &r) : cr(r) {}
+
+ template<class Vertex, class Graph>
+ void discover_vertex(const Vertex &v, const Graph &g) const {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- if (is_special(v, g)) {
- DEBUG_PRINTF("start or accept\n");
- throw SearchFailed();
- }
-
- if (g[v].assert_flags) {
- DEBUG_PRINTF("assert flags\n");
- throw SearchFailed();
- }
-
- const CharReach &vcr = g[v].char_reach;
- if (vcr != (vcr & cr)) {
- DEBUG_PRINTF("bad reach\n");
- throw SearchFailed();
- }
- }
-
- private:
- const CharReach &cr;
-};
-
-} // namespace
-
+ if (is_special(v, g)) {
+ DEBUG_PRINTF("start or accept\n");
+ throw SearchFailed();
+ }
+
+ if (g[v].assert_flags) {
+ DEBUG_PRINTF("assert flags\n");
+ throw SearchFailed();
+ }
+
+ const CharReach &vcr = g[v].char_reach;
+ if (vcr != (vcr & cr)) {
+ DEBUG_PRINTF("bad reach\n");
+ throw SearchFailed();
+ }
+ }
+
+ private:
+ const CharReach &cr;
+};
+
+} // namespace
+
template<class Graph, class ColorMap>
-static
-bool searchForward(const Graph &g, const CharReach &reach,
+static
+bool searchForward(const Graph &g, const CharReach &reach,
ColorMap &colours,
- const flat_set<typename Graph::vertex_descriptor> &s,
- typename Graph::vertex_descriptor w) {
+ const flat_set<typename Graph::vertex_descriptor> &s,
+ typename Graph::vertex_descriptor w) {
colours.fill(small_color::white);
- try {
+ try {
depth_first_visit(g, w, SearchVisitor(reach), colours,
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
} catch (SearchFailed &) {
- return false;
- }
-
- return true;
-}
-
-static
+ return false;
+ }
+
+ return true;
+}
+
+static
NFAEdge to_raw(const NFAEdge &e, const NGHolder &) {
- return e;
-}
-
-static
+ return e;
+}
+
+static
NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e,
const reverse_graph<NGHolder, NGHolder &> &g) {
return get(boost::edge_underlying, g, e);
-}
-
-/* returns true if we did stuff */
-template<class Graph>
-static
-bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
- NGHolder &raw) {
- bool did_stuff = false;
-
- const CharReach &reach = g[v].char_reach;
-
- typedef typename Graph::vertex_descriptor vertex_descriptor;
-
+}
+
+/* returns true if we did stuff */
+template<class Graph>
+static
+bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
+ NGHolder &raw) {
+ bool did_stuff = false;
+
+ const CharReach &reach = g[v].char_reach;
+
+ typedef typename Graph::vertex_descriptor vertex_descriptor;
+
// Colour map used for depth_first_visit().
auto colours = make_small_color_map(g);
- // precalc successors of v.
- flat_set<vertex_descriptor> succ_v;
- insert(&succ_v, adjacent_vertices(v, g));
-
- flat_set<vertex_descriptor> s;
-
- for (const auto &e : in_edges_range(v, g)) {
- vertex_descriptor u = source(e, g);
- if (u == v) {
- continue;
- }
- if (is_any_accept(u, g)) {
- continue;
- }
-
+ // precalc successors of v.
+ flat_set<vertex_descriptor> succ_v;
+ insert(&succ_v, adjacent_vertices(v, g));
+
+ flat_set<vertex_descriptor> s;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ vertex_descriptor u = source(e, g);
+ if (u == v) {
+ continue;
+ }
+ if (is_any_accept(u, g)) {
+ continue;
+ }
+
DEBUG_PRINTF("- checking u %zu\n", g[u].index);
-
- // let s be intersection(succ(u), succ(v))
- s.clear();
- for (auto b : adjacent_vertices_range(u, g)) {
- if (contains(succ_v, b)) {
- s.insert(b);
- }
- }
-
- for (const auto &e_u : make_vector_from(out_edges(u, g))) {
- vertex_descriptor w = target(e_u, g);
- if (is_special(w, g) || contains(s, w)) {
- continue;
- }
-
- const CharReach &w_reach = g[w].char_reach;
- if (!w_reach.isSubsetOf(reach)) {
- continue;
- }
-
+
+ // let s be intersection(succ(u), succ(v))
+ s.clear();
+ for (auto b : adjacent_vertices_range(u, g)) {
+ if (contains(succ_v, b)) {
+ s.insert(b);
+ }
+ }
+
+ for (const auto &e_u : make_vector_from(out_edges(u, g))) {
+ vertex_descriptor w = target(e_u, g);
+ if (is_special(w, g) || contains(s, w)) {
+ continue;
+ }
+
+ const CharReach &w_reach = g[w].char_reach;
+ if (!w_reach.isSubsetOf(reach)) {
+ continue;
+ }
+
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
-
+
if (!searchForward(g, reach, colours, succ_v, w)) {
continue;
- }
+ }
DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index);
/* we are currently iterating over the in-edges of v, so it
@@ -215,50 +215,50 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
assert(w != v); /* as v is in s */
remove_edge(to_raw(e_u, g), raw);
did_stuff = true;
- }
- }
-
- return did_stuff;
-}
-
-template<class Graph>
-static
-bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
- bool did_stuff = false;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !edge(v, v, g).second) {
- continue;
- }
-
+ }
+ }
+
+ return did_stuff;
+}
+
+template<class Graph>
+static
+bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
+ bool did_stuff = false;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !edge(v, v, g).second) {
+ continue;
+ }
+
DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index);
- did_stuff |= removeCyclicPathRedundancy(g, v, raw);
- }
-
- return did_stuff;
-}
-
-bool removeCyclicPathRedundancy(NGHolder &g) {
+ did_stuff |= removeCyclicPathRedundancy(g, v, raw);
+ }
+
+ return did_stuff;
+}
+
+bool removeCyclicPathRedundancy(NGHolder &g) {
assert(hasCorrectlyNumberedVertices(g));
- // Forward pass.
+ // Forward pass.
bool f_changed = cyclicPathRedundancyPass(g, g);
- if (f_changed) {
- DEBUG_PRINTF("edges removed by forward pass\n");
- pruneUseless(g);
- }
-
- // Reverse pass.
- DEBUG_PRINTF("REVERSE PASS\n");
+ if (f_changed) {
+ DEBUG_PRINTF("edges removed by forward pass\n");
+ pruneUseless(g);
+ }
+
+ // Reverse pass.
+ DEBUG_PRINTF("REVERSE PASS\n");
typedef reverse_graph<NGHolder, NGHolder &> RevGraph;
RevGraph revg(g);
- bool r_changed = cyclicPathRedundancyPass(revg, g);
- if (r_changed) {
- DEBUG_PRINTF("edges removed by reverse pass\n");
- pruneUseless(g);
- }
-
- return f_changed || r_changed;
-}
-
-} // namespace ue2
+ bool r_changed = cyclicPathRedundancyPass(revg, g);
+ if (r_changed) {
+ DEBUG_PRINTF("edges removed by reverse pass\n");
+ pruneUseless(g);
+ }
+
+ return f_changed || r_changed;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
index 3ce07c6688..9a83c49361 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
- * leading to a cyclic repeat.
- */
-
-#ifndef NG_CYCLIC_REDUNDANCY_H
-#define NG_CYCLIC_REDUNDANCY_H
-
-namespace ue2 {
-
-class NGHolder;
-
-bool removeCyclicPathRedundancy(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
+ * leading to a cyclic repeat.
+ */
+
+#ifndef NG_CYCLIC_REDUNDANCY_H
+#define NG_CYCLIC_REDUNDANCY_H
+
+namespace ue2 {
+
+class NGHolder;
+
+bool removeCyclicPathRedundancy(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
index 6c90326ce4..e952ff445e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
@@ -1,398 +1,398 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief NFA graph vertex depth calculations.
- */
-#include "ng_depth.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
+ * \brief NFA graph vertex depth calculations.
+ */
+#include "ng_depth.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <deque>
-#include <vector>
-
+
+#include <deque>
+#include <vector>
+
#include <boost/graph/breadth_first_search.hpp>
-#include <boost/graph/dag_shortest_paths.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/dag_shortest_paths.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/filtered_graph.hpp>
#include <boost/graph/property_maps/constant_property_map.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
#include <boost/range/adaptor/reversed.hpp>
-
-using namespace std;
-using boost::filtered_graph;
+
+using namespace std;
+using boost::filtered_graph;
using boost::make_filtered_graph;
-using boost::make_constant_property;
-using boost::reverse_graph;
+using boost::make_constant_property;
+using boost::reverse_graph;
using boost::adaptors::reverse;
-
-namespace ue2 {
-
-namespace {
-
-/** Distance value used to indicate that the vertex can't be reached. */
-static constexpr int DIST_UNREACHABLE = INT_MAX;
-
-/**
- * Distance value used to indicate that the distance to a vertex is infinite
- * (for example, it's the max distance and there's a cycle in the path) or so
- * large that we should consider it effectively infinite.
- */
-static constexpr int DIST_INFINITY = INT_MAX - 1;
-
-//
-// Filters
-//
-
-template <class GraphT>
-struct NodeFilter {
- typedef typename GraphT::edge_descriptor EdgeT;
- NodeFilter() {} // BGL filters must be default-constructible.
- NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
- : bad(bad_in), g(g_in) { }
- bool operator()(const EdgeT &e) const {
- assert(g && bad);
-
- u32 src_idx = (*g)[source(e, *g)].index;
- u32 tar_idx = (*g)[target(e, *g)].index;
-
- if (tar_idx == NODE_START_DOTSTAR) {
- return false;
- }
-
- return !(*bad)[src_idx] && !(*bad)[tar_idx];
- }
-
-private:
- const vector<bool> *bad = nullptr;
- const GraphT *g = nullptr;
-};
-
-template <class GraphT>
-struct StartFilter {
- typedef typename GraphT::edge_descriptor EdgeT;
- StartFilter() {} // BGL filters must be default-constructible.
- explicit StartFilter(const GraphT *g_in) : g(g_in) { }
- bool operator()(const EdgeT &e) const {
- assert(g);
-
- u32 src_idx = (*g)[source(e, *g)].index;
- u32 tar_idx = (*g)[target(e, *g)].index;
-
- // Remove our stylised edges from anchored start to startDs.
- if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) {
- return false;
- }
- // Also remove the equivalent in the reversed direction.
- if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) {
- return false;
- }
- return true;
- }
-
-private:
- const GraphT *g = nullptr;
-};
-
-} // namespace
-
+
+namespace ue2 {
+
+namespace {
+
+/** Distance value used to indicate that the vertex can't be reached. */
+static constexpr int DIST_UNREACHABLE = INT_MAX;
+
+/**
+ * Distance value used to indicate that the distance to a vertex is infinite
+ * (for example, it's the max distance and there's a cycle in the path) or so
+ * large that we should consider it effectively infinite.
+ */
+static constexpr int DIST_INFINITY = INT_MAX - 1;
+
+//
+// Filters
+//
+
+template <class GraphT>
+struct NodeFilter {
+ typedef typename GraphT::edge_descriptor EdgeT;
+ NodeFilter() {} // BGL filters must be default-constructible.
+ NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
+ : bad(bad_in), g(g_in) { }
+ bool operator()(const EdgeT &e) const {
+ assert(g && bad);
+
+ u32 src_idx = (*g)[source(e, *g)].index;
+ u32 tar_idx = (*g)[target(e, *g)].index;
+
+ if (tar_idx == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ return !(*bad)[src_idx] && !(*bad)[tar_idx];
+ }
+
+private:
+ const vector<bool> *bad = nullptr;
+ const GraphT *g = nullptr;
+};
+
+template <class GraphT>
+struct StartFilter {
+ typedef typename GraphT::edge_descriptor EdgeT;
+ StartFilter() {} // BGL filters must be default-constructible.
+ explicit StartFilter(const GraphT *g_in) : g(g_in) { }
+ bool operator()(const EdgeT &e) const {
+ assert(g);
+
+ u32 src_idx = (*g)[source(e, *g)].index;
+ u32 tar_idx = (*g)[target(e, *g)].index;
+
+ // Remove our stylised edges from anchored start to startDs.
+ if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) {
+ return false;
+ }
+ // Also remove the equivalent in the reversed direction.
+ if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) {
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const GraphT *g = nullptr;
+};
+
+} // namespace
+
template<class Graph>
-static
+static
vector<bool> findLoopReachable(const Graph &g,
const typename Graph::vertex_descriptor src) {
vector<bool> deadNodes(num_vertices(g));
-
+
using Edge = typename Graph::edge_descriptor;
using Vertex = typename Graph::vertex_descriptor;
using EdgeSet = set<Edge>;
- EdgeSet deadEdges;
- BackEdges<EdgeSet> be(deadEdges);
-
+ EdgeSet deadEdges;
+ BackEdges<EdgeSet> be(deadEdges);
+
auto colors = make_small_color_map(g);
-
+
depth_first_search(g, be, colors, src);
auto af = make_bad_edge_filter(&deadEdges);
auto acyclic_g = make_filtered_graph(g, af);
-
+
vector<Vertex> topoOrder; /* actually reverse topological order */
- topoOrder.reserve(deadNodes.size());
+ topoOrder.reserve(deadNodes.size());
topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors));
-
- for (const auto &e : deadEdges) {
+
+ for (const auto &e : deadEdges) {
size_t srcIdx = g[source(e, g)].index;
- if (srcIdx != NODE_START_DOTSTAR) {
- deadNodes[srcIdx] = true;
- }
- }
-
+ if (srcIdx != NODE_START_DOTSTAR) {
+ deadNodes[srcIdx] = true;
+ }
+ }
+
for (auto v : reverse(topoOrder)) {
- for (const auto &e : in_edges_range(v, g)) {
- if (deadNodes[g[source(e, g)].index]) {
- deadNodes[g[v].index] = true;
- break;
- }
- }
- }
+ for (const auto &e : in_edges_range(v, g)) {
+ if (deadNodes[g[source(e, g)].index]) {
+ deadNodes[g[v].index] = true;
+ break;
+ }
+ }
+ }
return deadNodes;
-}
-
-template <class GraphT>
-static
+}
+
+template <class GraphT>
+static
void calcDepthFromSource(const GraphT &g,
- typename GraphT::vertex_descriptor srcVertex,
+ typename GraphT::vertex_descriptor srcVertex,
const vector<bool> &deadNodes, vector<int> &dMin,
vector<int> &dMax) {
- typedef typename GraphT::edge_descriptor EdgeT;
-
+ typedef typename GraphT::edge_descriptor EdgeT;
+
const size_t numVerts = num_vertices(g);
-
- NodeFilter<GraphT> nf(&deadNodes, &g);
- StartFilter<GraphT> sf(&g);
-
- /* minimum distance needs to run on a graph with .*start unreachable
- * from start */
- typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph;
- const StartFilteredGraph mindist_g(g, sf);
-
- /* maximum distance needs to run on a graph without cycles & nodes
- * reachable from cycles */
- typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph;
- const NodeFilteredGraph maxdist_g(g, nf);
-
- // Record distance of each vertex from source using one of the following
- // algorithms.
-
- /* note: filtered graphs have same num_{vertices,edges} as base */
-
- dMin.assign(numVerts, DIST_UNREACHABLE);
- dMax.assign(numVerts, DIST_UNREACHABLE);
- dMin[mindist_g[srcVertex].index] = 0;
-
- using boost::make_iterator_property_map;
-
+
+ NodeFilter<GraphT> nf(&deadNodes, &g);
+ StartFilter<GraphT> sf(&g);
+
+ /* minimum distance needs to run on a graph with .*start unreachable
+ * from start */
+ typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph;
+ const StartFilteredGraph mindist_g(g, sf);
+
+ /* maximum distance needs to run on a graph without cycles & nodes
+ * reachable from cycles */
+ typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph;
+ const NodeFilteredGraph maxdist_g(g, nf);
+
+ // Record distance of each vertex from source using one of the following
+ // algorithms.
+
+ /* note: filtered graphs have same num_{vertices,edges} as base */
+
+ dMin.assign(numVerts, DIST_UNREACHABLE);
+ dMax.assign(numVerts, DIST_UNREACHABLE);
+ dMin[mindist_g[srcVertex].index] = 0;
+
+ using boost::make_iterator_property_map;
+
auto min_index_map = get(vertex_index, mindist_g);
-
- breadth_first_search(mindist_g, srcVertex,
- visitor(make_bfs_visitor(record_distances(
+
+ breadth_first_search(mindist_g, srcVertex,
+ visitor(make_bfs_visitor(record_distances(
make_iterator_property_map(dMin.begin(),
min_index_map),
boost::on_tree_edge())))
.color_map(make_small_color_map(mindist_g)));
-
+
auto max_index_map = get(vertex_index, maxdist_g);
-
- dag_shortest_paths(maxdist_g, srcVertex,
+
+ dag_shortest_paths(maxdist_g, srcVertex,
distance_map(make_iterator_property_map(dMax.begin(),
max_index_map))
.weight_map(make_constant_property<EdgeT>(-1))
.color_map(make_small_color_map(maxdist_g)));
-
- for (size_t i = 0; i < numVerts; i++) {
- if (dMin[i] > DIST_UNREACHABLE) {
- dMin[i] = DIST_UNREACHABLE;
- }
- DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]);
- if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) {
- dMax[i] = -DIST_INFINITY; /* max depths currently negative */
- DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
- } else if (dMax[i] >= DIST_UNREACHABLE
- || dMax[i] < -DIST_UNREACHABLE) {
- dMax[i] = -DIST_UNREACHABLE;
- DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
- }
- }
-}
-
-/**
- * \brief Convert the integer distance we use in our shortest path calculations
- * to a \ref depth value.
- */
-static
-depth depthFromDistance(int val) {
- assert(val >= 0);
- if (val >= DIST_UNREACHABLE) {
- return depth::unreachable();
- } else if (val == DIST_INFINITY) {
- return depth::infinity();
- }
- return depth((u32)val);
-}
-
-static
-DepthMinMax getDepths(u32 idx, const vector<int> &dMin,
- const vector<int> &dMax) {
- DepthMinMax d(depthFromDistance(dMin[idx]),
- depthFromDistance(-1 * dMax[idx]));
- DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str());
- assert(d.min <= d.max);
- return d;
-}
-
-template<class Graph, class Output>
-static
+
+ for (size_t i = 0; i < numVerts; i++) {
+ if (dMin[i] > DIST_UNREACHABLE) {
+ dMin[i] = DIST_UNREACHABLE;
+ }
+ DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]);
+ if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) {
+ dMax[i] = -DIST_INFINITY; /* max depths currently negative */
+ DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
+ } else if (dMax[i] >= DIST_UNREACHABLE
+ || dMax[i] < -DIST_UNREACHABLE) {
+ dMax[i] = -DIST_UNREACHABLE;
+ DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
+ }
+ }
+}
+
+/**
+ * \brief Convert the integer distance we use in our shortest path calculations
+ * to a \ref depth value.
+ */
+static
+depth depthFromDistance(int val) {
+ assert(val >= 0);
+ if (val >= DIST_UNREACHABLE) {
+ return depth::unreachable();
+ } else if (val == DIST_INFINITY) {
+ return depth::infinity();
+ }
+ return depth((u32)val);
+}
+
+static
+DepthMinMax getDepths(u32 idx, const vector<int> &dMin,
+ const vector<int> &dMax) {
+ DepthMinMax d(depthFromDistance(dMin[idx]),
+ depthFromDistance(-1 * dMax[idx]));
+ DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str());
+ assert(d.min <= d.max);
+ return d;
+}
+
+template<class Graph, class Output>
+static
void calcAndStoreDepth(const Graph &g,
- const typename Graph::vertex_descriptor src,
- const vector<bool> &deadNodes,
- vector<int> &dMin /* util */,
- vector<int> &dMax /* util */,
- vector<Output> &depths,
- DepthMinMax Output::*store) {
+ const typename Graph::vertex_descriptor src,
+ const vector<bool> &deadNodes,
+ vector<int> &dMin /* util */,
+ vector<int> &dMax /* util */,
+ vector<Output> &depths,
+ DepthMinMax Output::*store) {
calcDepthFromSource(g, src, deadNodes, dMin, dMax);
-
- for (auto v : vertices_range(g)) {
- u32 idx = g[v].index;
- assert(idx < depths.size());
- Output &d = depths.at(idx);
- d.*store = getDepths(idx, dMin, dMax);
- }
-}
-
+
+ for (auto v : vertices_range(g)) {
+ u32 idx = g[v].index;
+ assert(idx < depths.size());
+ Output &d = depths.at(idx);
+ d.*store = getDepths(idx, dMin, dMax);
+ }
+}
+
vector<NFAVertexDepth> calcDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(g, g.start);
-
- DEBUG_PRINTF("doing start\n");
+
+ DEBUG_PRINTF("doing start\n");
calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths,
&NFAVertexDepth::fromStart);
- DEBUG_PRINTF("doing startds\n");
+ DEBUG_PRINTF("doing startds\n");
calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths,
&NFAVertexDepth::fromStartDotStar);
return depths;
-}
-
+}
+
vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexRevDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /* reverse the graph before walking it */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /* reverse the graph before walking it */
typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph;
const RevNFAGraph rg(g);
-
+
assert(num_vertices(g) == num_vertices(rg));
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(rg, g.acceptEod);
-
- DEBUG_PRINTF("doing accept\n");
- calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
+
+ DEBUG_PRINTF("doing accept\n");
+ calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
rg, g.accept, deadNodes, dMin, dMax, depths,
- &NFAVertexRevDepth::toAccept);
- DEBUG_PRINTF("doing accepteod\n");
- deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
- calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
+ &NFAVertexRevDepth::toAccept);
+ DEBUG_PRINTF("doing accepteod\n");
+ deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
+ calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
rg, g.acceptEod, deadNodes, dMin, dMax, depths,
- &NFAVertexRevDepth::toAcceptEod);
+ &NFAVertexRevDepth::toAcceptEod);
return depths;
-}
-
+}
+
vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexBidiDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(g, g.start);
-
- DEBUG_PRINTF("doing start\n");
+
+ DEBUG_PRINTF("doing start\n");
calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>(
g, g.start, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::fromStart);
- DEBUG_PRINTF("doing startds\n");
+ &NFAVertexBidiDepth::fromStart);
+ DEBUG_PRINTF("doing startds\n");
calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>(
g, g.startDs, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::fromStartDotStar);
-
- /* Now go backwards */
+ &NFAVertexBidiDepth::fromStartDotStar);
+
+ /* Now go backwards */
typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph;
const RevNFAGraph rg(g);
deadNodes = findLoopReachable(rg, g.acceptEod);
-
- DEBUG_PRINTF("doing accept\n");
- calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
+
+ DEBUG_PRINTF("doing accept\n");
+ calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
rg, g.accept, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::toAccept);
- DEBUG_PRINTF("doing accepteod\n");
- deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
- calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
+ &NFAVertexBidiDepth::toAccept);
+ DEBUG_PRINTF("doing accepteod\n");
+ deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
+ calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
rg, g.acceptEod, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::toAcceptEod);
+ &NFAVertexBidiDepth::toAcceptEod);
return depths;
-}
-
+}
+
vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
auto deadNodes = findLoopReachable(g, g.start);
-
- vector<int> dMin, dMax;
+
+ vector<int> dMin, dMax;
calcDepthFromSource(g, src, deadNodes, dMin, dMax);
-
+
vector<DepthMinMax> depths(numVertices);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
auto idx = g[v].index;
- depths.at(idx) = getDepths(idx, dMin, dMax);
- }
+ depths.at(idx) = getDepths(idx, dMin, dMax);
+ }
return depths;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
index 36cca87e84..418e5e4412 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief NFA graph vertex depth calculations.
- */
-
+ * \brief NFA graph vertex depth calculations.
+ */
+
#ifndef NG_DEPTH_H
#define NG_DEPTH_H
-
+
#include "ue2common.h"
-#include "nfagraph/ng_holder.h"
-#include "util/depth.h"
-
-#include <vector>
-
-namespace ue2 {
-
-/**
- * \brief Encapsulates min/max depths relative to the start and startDs
- * vertices.
- */
-struct NFAVertexDepth {
- DepthMinMax fromStart;
- DepthMinMax fromStartDotStar;
-};
-
-/**
- * \brief Encapsulates min/max depths relative to the accept and acceptEod
- * vertices.
- */
-struct NFAVertexRevDepth {
- DepthMinMax toAccept;
- DepthMinMax toAcceptEod;
-};
-
-/**
- * \brief Encapsulates min/max depths relative to all of our special vertices.
- */
+#include "nfagraph/ng_holder.h"
+#include "util/depth.h"
+
+#include <vector>
+
+namespace ue2 {
+
+/**
+ * \brief Encapsulates min/max depths relative to the start and startDs
+ * vertices.
+ */
+struct NFAVertexDepth {
+ DepthMinMax fromStart;
+ DepthMinMax fromStartDotStar;
+};
+
+/**
+ * \brief Encapsulates min/max depths relative to the accept and acceptEod
+ * vertices.
+ */
+struct NFAVertexRevDepth {
+ DepthMinMax toAccept;
+ DepthMinMax toAcceptEod;
+};
+
+/**
+ * \brief Encapsulates min/max depths relative to all of our special vertices.
+ */
struct NFAVertexBidiDepth {
DepthMinMax fromStart;
DepthMinMax fromStartDotStar;
DepthMinMax toAccept;
DepthMinMax toAcceptEod;
-};
-
-/**
+};
+
+/**
* \brief Calculate depths from start and startDs. Returns them in a vector,
* indexed by vertex index.
- */
+ */
std::vector<NFAVertexDepth> calcDepths(const NGHolder &g);
-
-/**
+
+/**
* \brief Calculate depths to accept and acceptEod. Returns them in a vector,
* indexed by vertex index.
- */
+ */
std::vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g);
-
-/**
+
+/**
* \brief Calculate depths to/from all special vertices. Returns them in a
* vector, indexed by vertex index.
- */
+ */
std::vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g);
-
+
/**
* \brief Calculate the (min, max) depths from the given \p src to every vertex
* in the graph and return them in a vector, indexed by \p vertex_index.
*/
std::vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // NG_DEPTH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
index d6a064d12f..2589881009 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Calculate dominator and post-dominator trees.
- *
- * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
- */
-#include "ng_dominators.h"
-
-#include "ue2common.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-
-#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Calculate dominator and post-dominator trees.
+ *
+ * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
+ */
+#include "ng_dominators.h"
+
+#include "ue2common.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+
+#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
#include <boost-patched/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::make_assoc_property_map;
-using boost::make_iterator_property_map;
-
-namespace ue2 {
-
-template <class Graph>
+
+using namespace std;
+using boost::make_assoc_property_map;
+using boost::make_iterator_property_map;
+
+namespace ue2 {
+
+template <class Graph>
unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g,
typename Graph::vertex_descriptor source) {
using Vertex = typename Graph::vertex_descriptor;
- const size_t num_verts = num_vertices(g);
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- vector<size_t> dfnum(num_verts, 0);
+ const size_t num_verts = num_vertices(g);
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ vector<size_t> dfnum(num_verts, 0);
vector<Vertex> parents(num_verts, Graph::null_vertex());
-
- auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map);
- auto parent_map = make_iterator_property_map(parents.begin(), index_map);
+
+ auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map);
+ auto parent_map = make_iterator_property_map(parents.begin(), index_map);
vector<Vertex> vertices_by_dfnum(num_verts, Graph::null_vertex());
-
- // Output map.
+
+ // Output map.
vector<Vertex> doms(num_verts, Graph::null_vertex());
auto dom_map = make_iterator_property_map(doms.begin(), index_map);
-
- boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map,
- parent_map, vertices_by_dfnum,
- dom_map);
-
+
+ boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map,
+ parent_map, vertices_by_dfnum,
+ dom_map);
+
/* Translate back to an NFAVertex map */
unordered_map<NFAVertex, NFAVertex> doms2;
doms2.reserve(num_verts);
@@ -78,17 +78,17 @@ unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g,
}
}
return doms2;
-}
-
+}
+
unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
return calcDominators(g, g.start);
-}
-
+}
+
unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
return calcDominators(boost::reverse_graph<NGHolder, const NGHolder &>(g),
- g.acceptEod);
-}
-
-} // namespace ue2
+ g.acceptEod);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
index f505b7e471..eefc7e93df 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Calculate dominator and post-dominator trees.
- *
- * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
- */
-
-#ifndef NG_DOMINATORS_H
-#define NG_DOMINATORS_H
-
-#include "ng_holder.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Calculate dominator and post-dominator trees.
+ *
+ * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
+ */
+
+#ifndef NG_DOMINATORS_H
+#define NG_DOMINATORS_H
+
+#include "ng_holder.h"
+
#include <unordered_map>
-namespace ue2 {
-
+namespace ue2 {
+
std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
-
+
std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
-
-} // namespace ue2
-
-#endif // NG_DOMINATORS_H
+
+} // namespace ue2
+
+#endif // NG_DOMINATORS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
index 3e12d1d22e..6b22ac2e21 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
@@ -1,175 +1,175 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Dump code for NFA graphs.
- */
-
-#ifndef NG_DUMP_H
-#define NG_DUMP_H
-
-#include "grey.h"
-#include "ng_holder.h" // for graph types
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Dump code for NFA graphs.
+ */
+
+#ifndef NG_DUMP_H
+#define NG_DUMP_H
+
+#include "grey.h"
+#include "ng_holder.h" // for graph types
+#include "ue2common.h"
+
#include <unordered_map>
-#ifdef DUMP_SUPPORT
-#include <fstream>
-#endif
-
-struct RoseEngine;
-
-namespace ue2 {
-
-class NGHolder;
-class NG;
+#ifdef DUMP_SUPPORT
+#include <fstream>
+#endif
+
+struct RoseEngine;
+
+namespace ue2 {
+
+class NGHolder;
+class NG;
class ExpressionInfo;
-class ReportManager;
-
-// Implementations for stubs below -- all have the suffix "Impl".
-
-#ifdef DUMP_SUPPORT
-
-template <typename GraphT>
-void dumpGraphImpl(const char *name, const GraphT &g);
-
-template <typename GraphT>
-void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
-
+class ReportManager;
+
+// Implementations for stubs below -- all have the suffix "Impl".
+
+#ifdef DUMP_SUPPORT
+
+template <typename GraphT>
+void dumpGraphImpl(const char *name, const GraphT &g);
+
+template <typename GraphT>
+void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
+
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const char *name, const Grey &grey);
-
-void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
- const Grey &grey);
-
-void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
- u32 comp, u32 plan, const Grey &grey);
-
-void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
- const char *stageName, const Grey &grey);
-
-// Variant that takes a region map as well.
-void dumpHolderImpl(const NGHolder &h,
+
+void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
+ const Grey &grey);
+
+void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
+ u32 comp, u32 plan, const Grey &grey);
+
+void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
+ const char *stageName, const Grey &grey);
+
+// Variant that takes a region map as well.
+void dumpHolderImpl(const NGHolder &h,
const std::unordered_map<NFAVertex, u32> &region_map,
- unsigned int stageNumber, const char *stageName,
- const Grey &grey);
-
-template <typename GraphT>
-static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
- dumpGraphImpl(name, g);
-}
-
-#endif // DUMP_SUPPORT
-
-// Stubs which call through to dump code if compiled in.
-
-UNUSED static inline
+ unsigned int stageNumber, const char *stageName,
+ const Grey &grey);
+
+template <typename GraphT>
+static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
+ dumpGraphImpl(name, g);
+}
+
+#endif // DUMP_SUPPORT
+
+// Stubs which call through to dump code if compiled in.
+
+UNUSED static inline
void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
UNUSED const char *name, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
+#ifdef DUMP_SUPPORT
dumpDotWrapperImpl(g, expr, name, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name,
- UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpComponentImpl(h, name, expr, comp, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name,
- UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan,
- UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpSomSubComponentImpl(h, name, expr, comp, plan, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
- UNUSED const char *name, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpHolderImpl(h, stageNumber, name, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpHolder(UNUSED const NGHolder &h,
+#endif
+}
+
+UNUSED static inline
+void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name,
+ UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpComponentImpl(h, name, expr, comp, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name,
+ UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan,
+ UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpSomSubComponentImpl(h, name, expr, comp, plan, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
+ UNUSED const char *name, UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpHolderImpl(h, stageNumber, name, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpHolder(UNUSED const NGHolder &h,
UNUSED const std::unordered_map<NFAVertex, u32> &region_map,
- UNUSED unsigned int stageNumber, UNUSED const char *name,
- UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpHolderImpl(h, region_map, stageNumber, name, grey);
-#endif
-}
-
-#ifdef DUMP_SUPPORT
-void dumpReportManager(const ReportManager &rm, const Grey &grey);
-void dumpSmallWrite(const RoseEngine *rose, const Grey &grey);
-#else
-static UNUSED
-void dumpReportManager(const ReportManager &, const Grey &) {
-}
-static UNUSED
-void dumpSmallWrite(const RoseEngine *, const Grey &) {
-}
-#endif
-
-#ifdef DUMP_SUPPORT
-// replace boost's graphviz writer
-template <typename GraphT, typename WriterT, typename VertexID>
-static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w,
- const VertexID &vertex_id) {
- const std::string delimiter(" -> ");
- out << "digraph G {" << std::endl;
-
- typename boost::graph_traits<GraphT>::vertex_iterator i, end;
- for(boost::tie(i,end) = vertices(g); i != end; ++i) {
- out << get(vertex_id, *i);
- w(out, *i); // print vertex attributes
- out << ";" << std::endl;
- }
- typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end;
- for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) {
- out << (get(vertex_id, source(*ei, g))) << delimiter
- << (get(vertex_id, target(*ei, g))) << " ";
- w(out, *ei); // print edge attributes
- out << ";" << std::endl;
- }
- out << "}" << std::endl;
-}
-
-#endif // DUMP_SUPPORT
-
-} // namespace ue2
-
-#endif // NG_DUMP_H
+ UNUSED unsigned int stageNumber, UNUSED const char *name,
+ UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpHolderImpl(h, region_map, stageNumber, name, grey);
+#endif
+}
+
+#ifdef DUMP_SUPPORT
+void dumpReportManager(const ReportManager &rm, const Grey &grey);
+void dumpSmallWrite(const RoseEngine *rose, const Grey &grey);
+#else
+static UNUSED
+void dumpReportManager(const ReportManager &, const Grey &) {
+}
+static UNUSED
+void dumpSmallWrite(const RoseEngine *, const Grey &) {
+}
+#endif
+
+#ifdef DUMP_SUPPORT
+// replace boost's graphviz writer
+template <typename GraphT, typename WriterT, typename VertexID>
+static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w,
+ const VertexID &vertex_id) {
+ const std::string delimiter(" -> ");
+ out << "digraph G {" << std::endl;
+
+ typename boost::graph_traits<GraphT>::vertex_iterator i, end;
+ for(boost::tie(i,end) = vertices(g); i != end; ++i) {
+ out << get(vertex_id, *i);
+ w(out, *i); // print vertex attributes
+ out << ";" << std::endl;
+ }
+ typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end;
+ for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) {
+ out << (get(vertex_id, source(*ei, g))) << delimiter
+ << (get(vertex_id, target(*ei, g))) << " ";
+ w(out, *ei); // print edge attributes
+ out << ";" << std::endl;
+ }
+ out << "}" << std::endl;
+}
+
+#endif // DUMP_SUPPORT
+
+} // namespace ue2
+
+#endif // NG_DUMP_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
index b8354bd42a..ed2de70598 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
@@ -1,186 +1,186 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Edge redundancy graph reductions.
- */
-#include "ng_edge_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "parser/position.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Edge redundancy graph reductions.
+ */
+#include "ng_edge_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "parser/position.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-/* reverse edge redundancy removal is possible but is not implemented as it
- * regressed rose pattern support in the regression suite: 19026 - 19027
- * (foo.{1,5}b?ar)
- *
- * If rose becomes smarter we can reimplement.
- */
-
-static never_inline
-bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
- const set<NFAVertex> &happy) {
- /* need to check if for each vertex in sad if it has an edge to a happy
- * vertex */
- for (auto u : sad) {
- bool ok = false;
- for (auto v : adjacent_vertices_range(u, g)) {
- if (contains(happy, v)) {
- ok = true;
- break;
- }
- }
-
- if (!ok) {
- return false;
- }
- }
-
- return true;
-}
-
-static never_inline
-bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
- const set<NFAVertex> &happy) {
- /* need to check if for each vertex in sad if it has an edge to a happy
- * vertex */
- for (auto v : sad) {
- bool ok = false;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(happy, u)) {
- ok = true;
- break;
- }
- }
-
- if (!ok) {
- return false;
- }
- }
-
- return true;
-}
-
-/** \brief Redundant self-loop removal.
- *
- * A self loop on a vertex v can be removed if:
- *
- * For every vertex u in pred(v) either:
- * 1: u has a self loop and cr(v) subset of cr(u)
- * OR
- * 2: u has an edge to vertex satisfying criterion 1
- *
- * Note: we remove all dead loops at the end of the pass and do not check the
- * live status of the loops we are depending on during the analysis.
- *
- * We don't end up in situations where we remove a group of loops which depend
- * on each other as:
- *
- * - there must be at least one vertex not in the group which is a pred of some
- * member of the group (as we don't remove loops on specials)
- *
- * For each pred vertex of the group:
- * - the vertex must be 'sad' as it is not part of the group
- * - therefore it must have edges to each member of the group (to happy, trans)
- * - therefore the group is enabled simultaneously
- * - due to internal group edges, all members will still be active after the
- * next character.
- *
- * Actually, the vertex redundancy code will merge the entire group into one
- * cyclic state.
- */
-static
-bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
- unsigned dead_count = 0;
-
- set<NFAVertex> happy;
- set<NFAVertex> sad;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !hasSelfLoop(v, g)) {
- continue;
- }
-
- const CharReach &cr_v = g[v].char_reach;
-
- happy.clear();
- sad.clear();
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
-
- if (!hasSelfLoop(u, g)) {
- sad.insert(u);
- continue;
- }
-
- if (ignore_starts) {
- if (u == g.startDs || is_virtual_start(u, g)) {
- sad.insert(u);
- continue;
- }
- }
-
- const CharReach &cr_u = g[u].char_reach;
-
- if ((cr_u & cr_v) != cr_v) {
- sad.insert(u);
- continue;
- }
-
- happy.insert(u);
- }
-
- if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
- dead_count++;
- remove_edge(v, v, g);
- }
- }
-
- DEBUG_PRINTF("found %u removable edges.\n", dead_count);
- return dead_count;
-}
-
+#include "util/graph_range.h"
+
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+/* reverse edge redundancy removal is possible but is not implemented as it
+ * regressed rose pattern support in the regression suite: 19026 - 19027
+ * (foo.{1,5}b?ar)
+ *
+ * If rose becomes smarter we can reimplement.
+ */
+
+static never_inline
+bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
+ const set<NFAVertex> &happy) {
+ /* need to check if for each vertex in sad if it has an edge to a happy
+ * vertex */
+ for (auto u : sad) {
+ bool ok = false;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (contains(happy, v)) {
+ ok = true;
+ break;
+ }
+ }
+
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static never_inline
+bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
+ const set<NFAVertex> &happy) {
+ /* need to check if for each vertex in sad if it has an edge to a happy
+ * vertex */
+ for (auto v : sad) {
+ bool ok = false;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(happy, u)) {
+ ok = true;
+ break;
+ }
+ }
+
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** \brief Redundant self-loop removal.
+ *
+ * A self loop on a vertex v can be removed if:
+ *
+ * For every vertex u in pred(v) either:
+ * 1: u has a self loop and cr(v) subset of cr(u)
+ * OR
+ * 2: u has an edge to vertex satisfying criterion 1
+ *
+ * Note: we remove all dead loops at the end of the pass and do not check the
+ * live status of the loops we are depending on during the analysis.
+ *
+ * We don't end up in situations where we remove a group of loops which depend
+ * on each other as:
+ *
+ * - there must be at least one vertex not in the group which is a pred of some
+ * member of the group (as we don't remove loops on specials)
+ *
+ * For each pred vertex of the group:
+ * - the vertex must be 'sad' as it is not part of the group
+ * - therefore it must have edges to each member of the group (to happy, trans)
+ * - therefore the group is enabled simultaneously
+ * - due to internal group edges, all members will still be active after the
+ * next character.
+ *
+ * Actually, the vertex redundancy code will merge the entire group into one
+ * cyclic state.
+ */
+static
+bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
+ unsigned dead_count = 0;
+
+ set<NFAVertex> happy;
+ set<NFAVertex> sad;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !hasSelfLoop(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr_v = g[v].char_reach;
+
+ happy.clear();
+ sad.clear();
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+
+ if (!hasSelfLoop(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+
+ if (ignore_starts) {
+ if (u == g.startDs || is_virtual_start(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+ }
+
+ const CharReach &cr_u = g[u].char_reach;
+
+ if ((cr_u & cr_v) != cr_v) {
+ sad.insert(u);
+ continue;
+ }
+
+ happy.insert(u);
+ }
+
+ if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
+ dead_count++;
+ remove_edge(v, v, g);
+ }
+ }
+
+ DEBUG_PRINTF("found %u removable edges.\n", dead_count);
+ return dead_count;
+}
+
static
bool checkReportsRev(const NGHolder &g, NFAVertex v,
const set<NFAVertex> &happy) {
@@ -203,336 +203,336 @@ bool checkReportsRev(const NGHolder &g, NFAVertex v,
return is_subset_of(g[v].reports, happy_reports);
}
-/** \brief Redundant self-loop removal (reverse version).
- *
- * A self loop on a vertex v can be removed if:
- *
- * For every vertex u in succ(v) either:
- * 1: u has a self loop and cr(v) is a subset of cr(u).
- * OR
- * 2: u is not an accept and u has an edge from a vertex satisfying
- * criterion 1.
- * OR
- * 3: u is in an accept and u has an edge from a vertex v' satisfying
- * criterion 1 and report(v) == report(v').
- */
-static
-bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
- unsigned dead_count = 0;
-
- set<NFAVertex> happy;
- set<NFAVertex> sad;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !hasSelfLoop(v, g)) {
- continue;
- }
-
- const CharReach &cr_v = g[v].char_reach;
-
- happy.clear();
- sad.clear();
-
- for (auto u : adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
-
- if (!hasSelfLoop(u, g)) {
- sad.insert(u);
- continue;
- }
-
- assert(!is_special(u, g));
-
- const CharReach &cr_u = g[u].char_reach;
-
- if (!cr_v.isSubsetOf(cr_u)) {
- sad.insert(u);
- continue;
- }
-
- happy.insert(u);
- }
-
+/** \brief Redundant self-loop removal (reverse version).
+ *
+ * A self loop on a vertex v can be removed if:
+ *
+ * For every vertex u in succ(v) either:
+ * 1: u has a self loop and cr(v) is a subset of cr(u).
+ * OR
+ * 2: u is not an accept and u has an edge from a vertex satisfying
+ * criterion 1.
+ * OR
+ * 3: u is in an accept and u has an edge from a vertex v' satisfying
+ * criterion 1 and report(v) == report(v').
+ */
+static
+bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
+ unsigned dead_count = 0;
+
+ set<NFAVertex> happy;
+ set<NFAVertex> sad;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !hasSelfLoop(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr_v = g[v].char_reach;
+
+ happy.clear();
+ sad.clear();
+
+ for (auto u : adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+
+ if (!hasSelfLoop(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+
+ assert(!is_special(u, g));
+
+ const CharReach &cr_u = g[u].char_reach;
+
+ if (!cr_v.isSubsetOf(cr_u)) {
+ sad.insert(u);
+ continue;
+ }
+
+ happy.insert(u);
+ }
+
if (!happy.empty() && checkVerticesRev(g, sad, happy)
&& checkReportsRev(g, v, happy)) {
- dead_count++;
- remove_edge(v, v, g);
- }
- }
-
- DEBUG_PRINTF("found %u removable edges.\n", dead_count);
- return dead_count;
-}
-
-static
-bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
- const flat_set<NFAVertex> &other_parents, NFAVertex other,
- map<NFAVertex, bool> &done) {
- map<NFAVertex, bool>::const_iterator dit = done.find(v);
- if (dit != done.end()) {
- return dit->second;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v && contains(other_parents, other)) {
- continue;
- }
-
- if (!contains(other_parents, u)) {
- done[v] = false;
- return false;
- }
- }
-
- done[v] = true;
- return true;
-}
-
-static
-bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
- const flat_set<NFAVertex> &fixed_parents,
- const NFAEdge &candidate,
- map<NFAVertex, bool> &done) {
- NFAVertex w = source(candidate, g);
- NFAVertex v = target(candidate, g);
- const CharReach &cr_w = g[w].char_reach;
- const CharReach &cr_u = g[fixed_src].char_reach;
-
- /* There is no reason why self loops cannot be considered by this
- * transformation but the removal is already handled by many other
- * transformations. */
- if (w == v) {
- return false;
- }
-
- if (is_special(w, g)) {
- return false;
- }
-
- if (!cr_w.isSubsetOf(cr_u)) {
- return false;
- }
-
- /* check that each parent of w is also a parent of u */
- if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
- return false;
- }
-
+ dead_count++;
+ remove_edge(v, v, g);
+ }
+ }
+
+ DEBUG_PRINTF("found %u removable edges.\n", dead_count);
+ return dead_count;
+}
+
+static
+bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
+ const flat_set<NFAVertex> &other_parents, NFAVertex other,
+ map<NFAVertex, bool> &done) {
+ map<NFAVertex, bool>::const_iterator dit = done.find(v);
+ if (dit != done.end()) {
+ return dit->second;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v && contains(other_parents, other)) {
+ continue;
+ }
+
+ if (!contains(other_parents, u)) {
+ done[v] = false;
+ return false;
+ }
+ }
+
+ done[v] = true;
+ return true;
+}
+
+static
+bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
+ const flat_set<NFAVertex> &fixed_parents,
+ const NFAEdge &candidate,
+ map<NFAVertex, bool> &done) {
+ NFAVertex w = source(candidate, g);
+ NFAVertex v = target(candidate, g);
+ const CharReach &cr_w = g[w].char_reach;
+ const CharReach &cr_u = g[fixed_src].char_reach;
+
+ /* There is no reason why self loops cannot be considered by this
+ * transformation but the removal is already handled by many other
+ * transformations. */
+ if (w == v) {
+ return false;
+ }
+
+ if (is_special(w, g)) {
+ return false;
+ }
+
+ if (!cr_w.isSubsetOf(cr_u)) {
+ return false;
+ }
+
+ /* check that each parent of w is also a parent of u */
+ if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
+ return false;
+ }
+
DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n",
g[w].index, g[v].index, g[fixed_src].index, g[v].index);
- return true;
-}
-
-static never_inline
-void checkLargeOutU(const NGHolder &g, NFAVertex u,
- const flat_set<NFAVertex> &parents_u,
- flat_set<NFAVertex> &possible_w,
- map<NFAVertex, bool> &done,
- set<NFAEdge> *dead) {
- /* only vertices with at least one parent in common with u need to be
- * considered, and we also only consider potential siblings with subset
- * reach. */
- possible_w.clear();
- const CharReach &cr_u = g[u].char_reach;
- for (auto p : parents_u) {
- for (auto v : adjacent_vertices_range(p, g)) {
- const CharReach &cr_w = g[v].char_reach;
- if (cr_w.isSubsetOf(cr_u)) {
- possible_w.insert(v);
- }
- }
- }
-
- // If there's only one, it's us, and we have no work to do.
- if (possible_w.size() <= 1) {
- assert(possible_w.empty() || *possible_w.begin() == u);
- return;
- }
-
- for (const auto &e : out_edges_range(u, g)) {
- const NFAVertex v = target(e, g);
-
- if (is_special(v, g)) {
- continue;
- }
-
- if (contains(*dead, e)) {
- continue;
- }
-
- /* Now need check to find any edges which can be removed due to the
- * existence of edge e */
- for (const auto &e2 : in_edges_range(v, g)) {
- if (e == e2 || contains(*dead, e2)) {
- continue;
- }
-
- const NFAVertex w = source(e2, g);
- if (!contains(possible_w, w)) {
- continue;
- }
-
- if (checkFwdCandidate(g, u, parents_u, e2, done)) {
- dead->insert(e2);
- }
- }
- }
-}
-
-static never_inline
-void checkSmallOutU(const NGHolder &g, NFAVertex u,
- const flat_set<NFAVertex> &parents_u,
- map<NFAVertex, bool> &done,
- set<NFAEdge> *dead) {
- for (const auto &e : out_edges_range(u, g)) {
- const NFAVertex v = target(e, g);
-
- if (is_special(v, g)) {
- continue;
- }
-
- if (contains(*dead, e)) {
- continue;
- }
-
- /* Now need check to find any edges which can be removed due to the
- * existence of edge e */
- for (const auto &e2 : in_edges_range(v, g)) {
- if (e == e2 || contains(*dead, e2)) {
- continue;
- }
-
- if (checkFwdCandidate(g, u, parents_u, e2, done)) {
- dead->insert(e2);
- }
- }
- }
-}
-
-/** \brief Forward edge redundancy pass.
- *
- * An edge e from w to v is redundant if there exists an edge e' such that:
- * e' is from u to v
- * and: reach(w) is a subset of reach(u)
- * and: proper_pred(w) is a subset of pred(u)
- * and: self_loop(w) implies self_loop(u) or edge from (w to u)
- *
- * Note: edges to accepts also require report ID checks.
- */
-static
-bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
- set<NFAEdge> dead;
- map<NFAVertex, bool> done;
- flat_set<NFAVertex> parents_u;
- flat_set<NFAVertex> possible_w;
-
- for (auto u : vertices_range(g)) {
- if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
- continue;
- }
-
- parents_u.clear();
- pred(g, u, &parents_u);
-
- done.clear();
+ return true;
+}
+
+static never_inline
+void checkLargeOutU(const NGHolder &g, NFAVertex u,
+ const flat_set<NFAVertex> &parents_u,
+ flat_set<NFAVertex> &possible_w,
+ map<NFAVertex, bool> &done,
+ set<NFAEdge> *dead) {
+ /* only vertices with at least one parent in common with u need to be
+ * considered, and we also only consider potential siblings with subset
+ * reach. */
+ possible_w.clear();
+ const CharReach &cr_u = g[u].char_reach;
+ for (auto p : parents_u) {
+ for (auto v : adjacent_vertices_range(p, g)) {
+ const CharReach &cr_w = g[v].char_reach;
+ if (cr_w.isSubsetOf(cr_u)) {
+ possible_w.insert(v);
+ }
+ }
+ }
+
+ // If there's only one, it's us, and we have no work to do.
+ if (possible_w.size() <= 1) {
+ assert(possible_w.empty() || *possible_w.begin() == u);
+ return;
+ }
+
+ for (const auto &e : out_edges_range(u, g)) {
+ const NFAVertex v = target(e, g);
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (contains(*dead, e)) {
+ continue;
+ }
+
+ /* Now need check to find any edges which can be removed due to the
+ * existence of edge e */
+ for (const auto &e2 : in_edges_range(v, g)) {
+ if (e == e2 || contains(*dead, e2)) {
+ continue;
+ }
+
+ const NFAVertex w = source(e2, g);
+ if (!contains(possible_w, w)) {
+ continue;
+ }
+
+ if (checkFwdCandidate(g, u, parents_u, e2, done)) {
+ dead->insert(e2);
+ }
+ }
+ }
+}
+
+static never_inline
+void checkSmallOutU(const NGHolder &g, NFAVertex u,
+ const flat_set<NFAVertex> &parents_u,
+ map<NFAVertex, bool> &done,
+ set<NFAEdge> *dead) {
+ for (const auto &e : out_edges_range(u, g)) {
+ const NFAVertex v = target(e, g);
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (contains(*dead, e)) {
+ continue;
+ }
+
+ /* Now need check to find any edges which can be removed due to the
+ * existence of edge e */
+ for (const auto &e2 : in_edges_range(v, g)) {
+ if (e == e2 || contains(*dead, e2)) {
+ continue;
+ }
+
+ if (checkFwdCandidate(g, u, parents_u, e2, done)) {
+ dead->insert(e2);
+ }
+ }
+ }
+}
+
+/** \brief Forward edge redundancy pass.
+ *
+ * An edge e from w to v is redundant if there exists an edge e' such that:
+ * e' is from u to v
+ * and: reach(w) is a subset of reach(u)
+ * and: proper_pred(w) is a subset of pred(u)
+ * and: self_loop(w) implies self_loop(u) or edge from (w to u)
+ *
+ * Note: edges to accepts also require report ID checks.
+ */
+static
+bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
+ set<NFAEdge> dead;
+ map<NFAVertex, bool> done;
+ flat_set<NFAVertex> parents_u;
+ flat_set<NFAVertex> possible_w;
+
+ for (auto u : vertices_range(g)) {
+ if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
+ continue;
+ }
+
+ parents_u.clear();
+ pred(g, u, &parents_u);
+
+ done.clear();
if (out_degree(u, g) > 1) {
- checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
- } else {
- checkSmallOutU(g, u, parents_u, done, &dead);
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
- * don't consider startDs or virtual starts as cyclic vertices. */
-bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
- if (!cc.grey.removeEdgeRedundancy) {
- return false;
- }
-
- bool changed = false;
- changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
- changed |= removeEdgeRedundancyNearCyclesRev(g);
- changed |= removeEdgeRedundancyFwd(g, som);
- return changed;
-}
-
-/** \brief Removes optional stuff from the front of floating patterns, since it's
- * redundant with startDs.
- *
- * For each successor of startDs, remove any in-edges that aren't from either
- * start or startDs. This allows us to prune redundant vertices at the start of
- * a pattern:
- *
- * /(hat)?stand --> /stand/
- *
- */
-bool removeSiblingsOfStartDotStar(NGHolder &g) {
- vector<NFAEdge> dead;
-
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
+ } else {
+ checkSmallOutU(g, u, parents_u, done, &dead);
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
+ * don't consider startDs or virtual starts as cyclic vertices. */
+bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
+ if (!cc.grey.removeEdgeRedundancy) {
+ return false;
+ }
+
+ bool changed = false;
+ changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
+ changed |= removeEdgeRedundancyNearCyclesRev(g);
+ changed |= removeEdgeRedundancyFwd(g, som);
+ return changed;
+}
+
+/** \brief Removes optional stuff from the front of floating patterns, since it's
+ * redundant with startDs.
+ *
+ * For each successor of startDs, remove any in-edges that aren't from either
+ * start or startDs. This allows us to prune redundant vertices at the start of
+ * a pattern:
+ *
+ * /(hat)?stand --> /stand/
+ *
+ */
+bool removeSiblingsOfStartDotStar(NGHolder &g) {
+ vector<NFAEdge> dead;
+
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
DEBUG_PRINTF("checking %zu\n", g[v].index);
- if (is_special(v, g)) {
- continue;
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (is_special(u, g)) {
- continue;
- }
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (is_special(u, g)) {
+ continue;
+ }
DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index);
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-/** Removes all edges into virtual starts other than those from start/startDs,
- * providing there is an edge from startDs. This operation is an optimisation
- * for SOM mode. (see UE-1544) */
-bool optimiseVirtualStarts(NGHolder &g) {
- vector<NFAEdge> dead;
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- u32 flags = g[v].assert_flags;
- if (!(flags & POS_FLAG_VIRTUAL_START)) {
- continue;
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- if (!is_any_start(source(e, g), g)) {
- dead.push_back(e);
- }
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-} // namespace ue2
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+/** Removes all edges into virtual starts other than those from start/startDs,
+ * providing there is an edge from startDs. This operation is an optimisation
+ * for SOM mode. (see UE-1544) */
+bool optimiseVirtualStarts(NGHolder &g) {
+ vector<NFAEdge> dead;
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ u32 flags = g[v].assert_flags;
+ if (!(flags & POS_FLAG_VIRTUAL_START)) {
+ continue;
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!is_any_start(source(e, g), g)) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
index 08cf31f26c..f589ff727e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Edge redundancy graph reductions.
- */
-#ifndef NG_EDGE_REDUNDANCY_H
-#define NG_EDGE_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** \brief Entry point: Runs all the edge redundancy passes. */
-bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc);
-
-/** \brief Removes optional stuff from the front of floating patterns, since
- * it's redundant with startDs.
- *
- * For each successor of startDs, remove any in-edges that aren't from either
- * start or startDs. This allows us to prune redundant vertices at the start of
- * a pattern:
- *
- * /(hat)?stand --> /stand/
- *
- */
-bool removeSiblingsOfStartDotStar(NGHolder &g);
-
-/** \brief Removes all edges into virtual starts other than those from
- * start/startDs, providing there is an edge from startDs.
- *
- * This operation is an optimisation for SOM mode. (see UE-1544) */
-bool optimiseVirtualStarts(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Edge redundancy graph reductions.
+ */
+#ifndef NG_EDGE_REDUNDANCY_H
+#define NG_EDGE_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** \brief Entry point: Runs all the edge redundancy passes. */
+bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc);
+
+/** \brief Removes optional stuff from the front of floating patterns, since
+ * it's redundant with startDs.
+ *
+ * For each successor of startDs, remove any in-edges that aren't from either
+ * start or startDs. This allows us to prune redundant vertices at the start of
+ * a pattern:
+ *
+ * /(hat)?stand --> /stand/
+ *
+ */
+bool removeSiblingsOfStartDotStar(NGHolder &g);
+
+/** \brief Removes all edges into virtual starts other than those from
+ * start/startDs, providing there is an edge from startDs.
+ *
+ * This operation is an optimisation for SOM mode. (see UE-1544) */
+bool optimiseVirtualStarts(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
index fba8ce7b74..90d6fd8b75 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
@@ -1,317 +1,317 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Equivalence class graph reduction pass.
- */
-
-#include "ng_equivalence.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "util/compile_context.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Equivalence class graph reduction pass.
+ */
+
+#include "ng_equivalence.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "util/compile_context.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/unordered.h"
-
-#include <algorithm>
+
+#include <algorithm>
#include <memory>
-#include <set>
-#include <stack>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-enum EquivalenceType {
+#include <set>
+#include <stack>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+enum EquivalenceType {
LEFT_EQUIVALENCE,
- RIGHT_EQUIVALENCE,
-};
-
-namespace {
-class VertexInfo;
-
-// custom comparison functor for unordered_set and flat_set
-struct VertexInfoPtrCmp {
- // for flat_set
- bool operator()(const VertexInfo *a, const VertexInfo *b) const;
-};
-
+ RIGHT_EQUIVALENCE,
+};
+
+namespace {
+class VertexInfo;
+
+// custom comparison functor for unordered_set and flat_set
+struct VertexInfoPtrCmp {
+ // for flat_set
+ bool operator()(const VertexInfo *a, const VertexInfo *b) const;
+};
+
using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>;
-/** Precalculated (and maintained) information about a vertex. */
-class VertexInfo {
-public:
- VertexInfo(NFAVertex v_in, const NGHolder &g)
+/** Precalculated (and maintained) information about a vertex. */
+class VertexInfo {
+public:
+ VertexInfo(NFAVertex v_in, const NGHolder &g)
: v(v_in), vert_index(g[v].index), cr(g[v].char_reach),
- equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
-
+ equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
+
VertexInfoSet pred; //!< predecessors of this vertex
VertexInfoSet succ; //!< successors of this vertex
- NFAVertex v;
+ NFAVertex v;
size_t vert_index;
- CharReach cr;
- CharReach pred_cr;
- CharReach succ_cr;
+ CharReach cr;
+ CharReach pred_cr;
+ CharReach succ_cr;
flat_set<u32> edge_tops; /**< tops on edge from start */
- unsigned equivalence_class;
- unsigned vertex_flags;
-};
-
-// compare two vertex info pointers on their vertex index
-bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
- const VertexInfo *b) const {
- return a->vert_index < b->vert_index;
-}
-
-// to avoid traversing infomap each time we need to check the class during
-// partitioning, we will cache the information pertaining to a particular class
-class ClassInfo {
-public:
- struct ClassDepth {
- ClassDepth() {}
- ClassDepth(const NFAVertexDepth &d)
- : d1(d.fromStart), d2(d.fromStartDotStar) {}
- ClassDepth(const NFAVertexRevDepth &rd)
- : d1(rd.toAccept), d2(rd.toAcceptEod) {}
- DepthMinMax d1;
- DepthMinMax d2;
- };
+ unsigned equivalence_class;
+ unsigned vertex_flags;
+};
+
+// compare two vertex info pointers on their vertex index
+bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
+ const VertexInfo *b) const {
+ return a->vert_index < b->vert_index;
+}
+
+// to avoid traversing infomap each time we need to check the class during
+// partitioning, we will cache the information pertaining to a particular class
+class ClassInfo {
+public:
+ struct ClassDepth {
+ ClassDepth() {}
+ ClassDepth(const NFAVertexDepth &d)
+ : d1(d.fromStart), d2(d.fromStartDotStar) {}
+ ClassDepth(const NFAVertexRevDepth &rd)
+ : d1(rd.toAccept), d2(rd.toAcceptEod) {}
+ DepthMinMax d1;
+ DepthMinMax d2;
+ };
ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
- EquivalenceType eq)
+ EquivalenceType eq)
: /* reports only matter for right-equiv */
rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()),
vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr),
adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
/* treat non-special vertices the same */
node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {}
-
+
bool operator==(const ClassInfo &b) const {
return node_type == b.node_type && depth.d1 == b.depth.d1 &&
depth.d2 == b.depth.d2 && cr == b.cr &&
adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops &&
vertex_flags == b.vertex_flags && rs == b.rs;
}
-
+
size_t hash() const {
return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
depth.d2);
- }
-
-private:
- flat_set<ReportID> rs; /* for right equiv only */
- unsigned vertex_flags;
+ }
+
+private:
+ flat_set<ReportID> rs; /* for right equiv only */
+ unsigned vertex_flags;
flat_set<u32> edge_tops;
- CharReach cr;
- CharReach adjacent_cr;
- unsigned node_type;
- ClassDepth depth;
-};
-
-// work queue class. this contraption has two goals:
-// 1. uniqueness of elements
-// 2. FILO operation
-class WorkQueue {
-public:
- explicit WorkQueue(unsigned c) {
- q.reserve(c);
- }
- // unique push
- void push(unsigned id) {
- if (ids.insert(id).second) {
- q.push_back(id);
- }
- }
-
- // pop
- unsigned pop() {
- unsigned id = q.back();
- ids.erase(id);
- q.pop_back();
- return id;
- }
-
- void append(WorkQueue &other) {
- for (const auto &e : other) {
- push(e);
- }
- }
-
- void clear() {
- ids.clear();
- q.clear();
- }
-
- bool empty() const {
- return ids.empty();
- }
-
- vector<unsigned>::const_iterator begin() const {
- return q.begin();
- }
-
- vector<unsigned>::const_iterator end() const {
- return q.end();
- }
-
- size_t capacity() const {
- return q.capacity();
- }
-private:
+ CharReach cr;
+ CharReach adjacent_cr;
+ unsigned node_type;
+ ClassDepth depth;
+};
+
+// work queue class. this contraption has two goals:
+// 1. uniqueness of elements
+// 2. FILO operation
+class WorkQueue {
+public:
+ explicit WorkQueue(unsigned c) {
+ q.reserve(c);
+ }
+ // unique push
+ void push(unsigned id) {
+ if (ids.insert(id).second) {
+ q.push_back(id);
+ }
+ }
+
+ // pop
+ unsigned pop() {
+ unsigned id = q.back();
+ ids.erase(id);
+ q.pop_back();
+ return id;
+ }
+
+ void append(WorkQueue &other) {
+ for (const auto &e : other) {
+ push(e);
+ }
+ }
+
+ void clear() {
+ ids.clear();
+ q.clear();
+ }
+
+ bool empty() const {
+ return ids.empty();
+ }
+
+ vector<unsigned>::const_iterator begin() const {
+ return q.begin();
+ }
+
+ vector<unsigned>::const_iterator end() const {
+ return q.end();
+ }
+
+ size_t capacity() const {
+ return q.capacity();
+ }
+private:
unordered_set<unsigned> ids; //!< stores id's, for uniqueness
- vector<unsigned> q; //!< vector of id's that we use as FILO.
-};
-
-}
-
-static
-bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
- unsigned nonSpecialVertices = 0;
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g) && w != v) {
- nonSpecialVertices++;
- }
- }
- return nonSpecialVertices == 1;
-}
-
-static
-bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
- unsigned nonSpecialVertices = 0;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g) && u != v) {
- nonSpecialVertices++;
- }
- }
- return nonSpecialVertices == 1;
-}
-
-/** Cheaply check whether this graph can't be reduced at all, because it is
- * just a chain of vertices with no other edges. */
-static
-bool isIrreducible(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- // skip specials
- if (is_special(v, g)) {
- continue;
- }
-
- // we want meaningful in_degree to be 1. we also want to make sure we
- // don't count self-loop + 1 incoming edge as not irreducible
- if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
- return false;
- }
- // we want meaningful out_degree to be 1. we also want to make sure we
- // don't count self-loop + 1 outgoing edge as not irreducible
- if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
- return false;
- }
- }
-
- return true;
-}
-
-#ifndef NDEBUG
-static
-bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
- for (const auto &e : in_edges_range(v, g)) {
- if (g[e].assert_flags != 0) {
- return true;
- }
- }
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].assert_flags != 0) {
- return true;
- }
- }
- return false;
-}
-#endif
-
-// populate VertexInfo table
-static
+ vector<unsigned> q; //!< vector of id's that we use as FILO.
+};
+
+}
+
+static
+bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
+ unsigned nonSpecialVertices = 0;
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g) && w != v) {
+ nonSpecialVertices++;
+ }
+ }
+ return nonSpecialVertices == 1;
+}
+
+static
+bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
+ unsigned nonSpecialVertices = 0;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g) && u != v) {
+ nonSpecialVertices++;
+ }
+ }
+ return nonSpecialVertices == 1;
+}
+
+/** Cheaply check whether this graph can't be reduced at all, because it is
+ * just a chain of vertices with no other edges. */
+static
+bool isIrreducible(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ // skip specials
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // we want meaningful in_degree to be 1. we also want to make sure we
+ // don't count self-loop + 1 incoming edge as not irreducible
+ if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
+ return false;
+ }
+ // we want meaningful out_degree to be 1. we also want to make sure we
+ // don't count self-loop + 1 outgoing edge as not irreducible
+ if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifndef NDEBUG
+static
+bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
+ for (const auto &e : in_edges_range(v, g)) {
+ if (g[e].assert_flags != 0) {
+ return true;
+ }
+ }
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].assert_flags != 0) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+// populate VertexInfo table
+static
vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) {
const size_t num_verts = num_vertices(g);
vector<unique_ptr<VertexInfo>> infos;
infos.reserve(num_verts * 2);
- vector<VertexInfo *> vertex_map; // indexed by vertex_index property
+ vector<VertexInfo *> vertex_map; // indexed by vertex_index property
vertex_map.resize(num_verts);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
infos.push_back(std::make_unique<VertexInfo>(v, g));
vertex_map[g[v].index] = infos.back().get();
}
-
+
// now, go through each vertex and populate its predecessor and successor
// lists
for (auto &vi : infos) {
assert(vi);
NFAVertex v = vi->v;
-
- // find predecessors
+
+ // find predecessors
for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
+ NFAVertex u = source(e, g);
VertexInfo *u_vi = vertex_map[g[u].index];
-
+
vi->pred_cr |= u_vi->cr;
vi->pred.insert(u_vi);
-
- // also set up edge tops
- if (is_triggered(g) && u == g.start) {
+
+ // also set up edge tops
+ if (is_triggered(g) && u == g.start) {
vi->edge_tops = g[e].tops;
- }
- }
-
- // find successors
+ }
+ }
+
+ // find successors
for (auto w : adjacent_vertices_range(v, g)) {
VertexInfo *w_vi = vertex_map[g[w].index];
vi->succ_cr |= w_vi->cr;
vi->succ.insert(w_vi);
- }
+ }
assert(!hasEdgeAsserts(vi->v, g));
- }
+ }
return infos;
-}
-
-// store equivalence class in VertexInfo for each vertex
-static
+}
+
+// store equivalence class in VertexInfo for each vertex
+static
vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
WorkQueue &work_queue, const NGHolder &g,
EquivalenceType eq) {
const size_t num_verts = infos.size();
-
+
vector<VertexInfoSet> classes;
ue2_unordered_map<ClassInfo, unsigned> classinfomap;
@@ -320,323 +320,323 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
classes.reserve(num_verts);
classinfomap.reserve(num_verts);
- // get distances from start (or accept) for all vertices
- // only one of them is used at a time, never both
- vector<NFAVertexDepth> depths;
- vector<NFAVertexRevDepth> rdepths;
-
- if (eq == LEFT_EQUIVALENCE) {
+ // get distances from start (or accept) for all vertices
+ // only one of them is used at a time, never both
+ vector<NFAVertexDepth> depths;
+ vector<NFAVertexRevDepth> rdepths;
+
+ if (eq == LEFT_EQUIVALENCE) {
depths = calcDepths(g);
- } else {
+ } else {
rdepths = calcRevDepths(g);
- }
-
- // partition the graph based on CharReach
+ }
+
+ // partition the graph based on CharReach
for (auto &vi : infos) {
assert(vi);
- ClassInfo::ClassDepth depth;
-
- if (eq == LEFT_EQUIVALENCE) {
+ ClassInfo::ClassDepth depth;
+
+ if (eq == LEFT_EQUIVALENCE) {
depth = depths[vi->vert_index];
- } else {
+ } else {
depth = rdepths[vi->vert_index];
- }
+ }
ClassInfo ci(g, *vi, depth, eq);
-
- auto ii = classinfomap.find(ci);
- if (ii == classinfomap.end()) {
+
+ auto ii = classinfomap.find(ci);
+ if (ii == classinfomap.end()) {
// vertex is in a new equivalence class by itself.
unsigned eq_class = classes.size();
vi->equivalence_class = eq_class;
classes.push_back({vi.get()});
classinfomap.emplace(move(ci), eq_class);
- } else {
+ } else {
// vertex is added to an existing class.
- unsigned eq_class = ii->second;
+ unsigned eq_class = ii->second;
vi->equivalence_class = eq_class;
classes.at(eq_class).insert(vi.get());
-
- // we now know that this particular class has more than one
- // vertex, so we add it to the work queue
- work_queue.push(eq_class);
- }
- }
+
+ // we now know that this particular class has more than one
+ // vertex, so we add it to the work queue
+ work_queue.push(eq_class);
+ }
+ }
DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
return classes;
-}
-
-// generalized equivalence processing (left and right)
-// basically, goes through every vertex in a class and checks if all successor or
-// predecessor classes match in all vertices. if classes mismatch, a vertex is
-// split into a separate class, along with all vertices having the same set of
-// successor/predecessor classes. the opposite side (successors for left
-// equivalence, predecessors for right equivalence) classes get revalidated in
-// case of a split.
-static
+}
+
+// generalized equivalence processing (left and right)
+// basically, goes through every vertex in a class and checks if all successor or
+// predecessor classes match in all vertices. if classes mismatch, a vertex is
+// split into a separate class, along with all vertices having the same set of
+// successor/predecessor classes. the opposite side (successors for left
+// equivalence, predecessors for right equivalence) classes get revalidated in
+// case of a split.
+static
void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
- EquivalenceType eq_type) {
- // now, go through the work queue until it's empty
- map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
- flat_set<unsigned> cur_classes;
- // local work queue, to store classes we want to revalidate in case of split
- WorkQueue reval_queue(work_queue.capacity());
-
- while (!work_queue.empty()) {
- // dequeue our class from the work queue
- unsigned cur_class = work_queue.pop();
-
- // get all vertices in current equivalence class
+ EquivalenceType eq_type) {
+ // now, go through the work queue until it's empty
+ map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
+ flat_set<unsigned> cur_classes;
+ // local work queue, to store classes we want to revalidate in case of split
+ WorkQueue reval_queue(work_queue.capacity());
+
+ while (!work_queue.empty()) {
+ // dequeue our class from the work queue
+ unsigned cur_class = work_queue.pop();
+
+ // get all vertices in current equivalence class
VertexInfoSet &cur_class_vertices = classes.at(cur_class);
-
- if (cur_class_vertices.size() < 2) {
- continue;
- }
-
- // clear data from previous iterations
- tentative_classmap.clear();
-
- DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
- cur_class, cur_class_vertices.size());
-
- // go through vertices in this class
- for (VertexInfo *vi : cur_class_vertices) {
- cur_classes.clear();
-
- // get vertex lists for equivalence vertices and vertices for
- // revalidation in case of split
- const auto &eq_vertices =
- (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
- const auto &reval_vertices =
- (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
-
- // go through equivalence and note the classes
- for (const VertexInfo *tmp : eq_vertices) {
- cur_classes.insert(tmp->equivalence_class);
- }
-
- // note all the classes that need to be reevaluated
- for (const VertexInfo *tmp : reval_vertices) {
- reval_queue.push(tmp->equivalence_class);
- }
-
- VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
- tentative_classes.insert(vi);
- }
-
- // if we found more than one class, split and revalidate everything
- if (tentative_classmap.size() > 1) {
- auto tmi = tentative_classmap.begin();
-
- // start from the second class
- for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
- const VertexInfoSet &vertices_to_split = tmi->second;
+
+ if (cur_class_vertices.size() < 2) {
+ continue;
+ }
+
+ // clear data from previous iterations
+ tentative_classmap.clear();
+
+ DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
+ cur_class, cur_class_vertices.size());
+
+ // go through vertices in this class
+ for (VertexInfo *vi : cur_class_vertices) {
+ cur_classes.clear();
+
+ // get vertex lists for equivalence vertices and vertices for
+ // revalidation in case of split
+ const auto &eq_vertices =
+ (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
+ const auto &reval_vertices =
+ (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
+
+ // go through equivalence and note the classes
+ for (const VertexInfo *tmp : eq_vertices) {
+ cur_classes.insert(tmp->equivalence_class);
+ }
+
+ // note all the classes that need to be reevaluated
+ for (const VertexInfo *tmp : reval_vertices) {
+ reval_queue.push(tmp->equivalence_class);
+ }
+
+ VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
+ tentative_classes.insert(vi);
+ }
+
+ // if we found more than one class, split and revalidate everything
+ if (tentative_classmap.size() > 1) {
+ auto tmi = tentative_classmap.begin();
+
+ // start from the second class
+ for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
+ const VertexInfoSet &vertices_to_split = tmi->second;
unsigned new_class = classes.size();
VertexInfoSet new_class_vertices;
-
- for (VertexInfo *vi : vertices_to_split) {
- vi->equivalence_class = new_class;
+
+ for (VertexInfo *vi : vertices_to_split) {
+ vi->equivalence_class = new_class;
// note: we cannot use the cur_class_vertices ref, as it is
// invalidated by modifications to the classes vector.
classes[cur_class].erase(vi);
- new_class_vertices.insert(vi);
- }
+ new_class_vertices.insert(vi);
+ }
classes.push_back(move(new_class_vertices));
if (contains(tmi->first, cur_class)) {
- reval_queue.push(new_class);
- }
- }
- work_queue.append(reval_queue);
- }
- reval_queue.clear();
- }
-}
-
-static
-bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
- const NGHolder &g) {
- /* We require separate eod and normal accept vertices for a class if we have
- * both normal accepts and eod accepts AND the reports are different for eod
- * and non-eod reports. */
-
- flat_set<ReportID> non_eod;
- flat_set<ReportID> eod;
-
- for (const VertexInfo *vi : vert_infos) {
- NFAVertex v = vi->v;
-
- if (edge(v, g.accept, g).second) {
- insert(&non_eod, g[v].reports);
- }
-
- if (edge(v, g.acceptEod, g).second) {
- insert(&eod, g[v].reports);
- }
- }
-
- if (non_eod.empty() || eod.empty()) {
- return false;
- }
-
- return non_eod != eod;
-
-}
-
-static
+ reval_queue.push(new_class);
+ }
+ }
+ work_queue.append(reval_queue);
+ }
+ reval_queue.clear();
+ }
+}
+
+static
+bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
+ const NGHolder &g) {
+ /* We require separate eod and normal accept vertices for a class if we have
+ * both normal accepts and eod accepts AND the reports are different for eod
+ * and non-eod reports. */
+
+ flat_set<ReportID> non_eod;
+ flat_set<ReportID> eod;
+
+ for (const VertexInfo *vi : vert_infos) {
+ NFAVertex v = vi->v;
+
+ if (edge(v, g.accept, g).second) {
+ insert(&non_eod, g[v].reports);
+ }
+
+ if (edge(v, g.acceptEod, g).second) {
+ insert(&eod, g[v].reports);
+ }
+ }
+
+ if (non_eod.empty() || eod.empty()) {
+ return false;
+ }
+
+ return non_eod != eod;
+
+}
+
+static
void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
unsigned eq_class, VertexInfoSet &cur_class_vertices,
set<NFAVertex> *toRemove) {
- DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
- "single vertex.\n", cur_class_vertices.size(), eq_class);
-
- // replace equivalence class with a single vertex:
- // 1. create new vertex with matching properties
- // 2. wire all predecessors to new vertex
- // 2a. update info for new vertex with new predecessors
- // 2b. update each predecessor's successor list
- // 3. wire all successors to new vertex
- // 3a. update info for new vertex with new successors
- // 3b. update each successor's predecessor list
- // 4. remove old vertex
-
- // any differences between vertex properties were resolved during
- // initial partitioning, so we assume that every vertex in equivalence
- // class has the same CharReach et al.
- // so, we find the first vertex in our class and get all its properties
-
- /* For left equivalence, if the members have different reporting behaviour
- * we sometimes require two vertices to be created (one connected to accept
- * and one to accepteod) */
-
- NFAVertex old_v = (*cur_class_vertices.begin())->v;
- NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
- * props */
- g[new_v].reports.clear(); /* populated as we pull in succs */
-
- // store this vertex in our global vertex list
+ DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
+ "single vertex.\n", cur_class_vertices.size(), eq_class);
+
+ // replace equivalence class with a single vertex:
+ // 1. create new vertex with matching properties
+ // 2. wire all predecessors to new vertex
+ // 2a. update info for new vertex with new predecessors
+ // 2b. update each predecessor's successor list
+ // 3. wire all successors to new vertex
+ // 3a. update info for new vertex with new successors
+ // 3b. update each successor's predecessor list
+ // 4. remove old vertex
+
+ // any differences between vertex properties were resolved during
+ // initial partitioning, so we assume that every vertex in equivalence
+ // class has the same CharReach et al.
+ // so, we find the first vertex in our class and get all its properties
+
+ /* For left equivalence, if the members have different reporting behaviour
+ * we sometimes require two vertices to be created (one connected to accept
+ * and one to accepteod) */
+
+ NFAVertex old_v = (*cur_class_vertices.begin())->v;
+ NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
+ * props */
+ g[new_v].reports.clear(); /* populated as we pull in succs */
+
+ // store this vertex in our global vertex list
infos.push_back(std::make_unique<VertexInfo>(new_v, g));
VertexInfo *new_vertex_info = infos.back().get();
-
- NFAVertex new_v_eod = NGHolder::null_vertex();
- VertexInfo *new_vertex_info_eod = nullptr;
-
- if (require_separate_eod_vertex(cur_class_vertices, g)) {
- new_v_eod = clone_vertex(g, old_v);
- g[new_v_eod].reports.clear();
+
+ NFAVertex new_v_eod = NGHolder::null_vertex();
+ VertexInfo *new_vertex_info_eod = nullptr;
+
+ if (require_separate_eod_vertex(cur_class_vertices, g)) {
+ new_v_eod = clone_vertex(g, old_v);
+ g[new_v_eod].reports.clear();
infos.push_back(std::make_unique<VertexInfo>(new_v_eod, g));
new_vertex_info_eod = infos.back().get();
- }
-
+ }
+
const auto &edgetops = (*cur_class_vertices.begin())->edge_tops;
- for (VertexInfo *old_vertex_info : cur_class_vertices) {
- assert(old_vertex_info->equivalence_class == eq_class);
-
- // mark this vertex for removal
- toRemove->insert(old_vertex_info->v);
-
- // for each predecessor, add edge to new vertex and update info
- for (VertexInfo *pred_info : old_vertex_info->pred) {
- // update info for new vertex
- new_vertex_info->pred.insert(pred_info);
- if (new_vertex_info_eod) {
- new_vertex_info_eod->pred.insert(pred_info);
- }
-
- // update info for predecessor
- pred_info->succ.erase(old_vertex_info);
-
- // if edge doesn't exist, create it
+ for (VertexInfo *old_vertex_info : cur_class_vertices) {
+ assert(old_vertex_info->equivalence_class == eq_class);
+
+ // mark this vertex for removal
+ toRemove->insert(old_vertex_info->v);
+
+ // for each predecessor, add edge to new vertex and update info
+ for (VertexInfo *pred_info : old_vertex_info->pred) {
+ // update info for new vertex
+ new_vertex_info->pred.insert(pred_info);
+ if (new_vertex_info_eod) {
+ new_vertex_info_eod->pred.insert(pred_info);
+ }
+
+ // update info for predecessor
+ pred_info->succ.erase(old_vertex_info);
+
+ // if edge doesn't exist, create it
NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g);
-
+
// put edge tops, if applicable
if (!edgetops.empty()) {
assert(g[e].tops.empty() || g[e].tops == edgetops);
g[e].tops = edgetops;
- }
-
- pred_info->succ.insert(new_vertex_info);
-
- if (new_v_eod) {
- NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
+ }
+
+ pred_info->succ.insert(new_vertex_info);
+
+ if (new_v_eod) {
+ NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
g);
-
+
// put edge tops, if applicable
if (!edgetops.empty()) {
assert(g[e].tops.empty() || g[e].tops == edgetops);
g[ee].tops = edgetops;
- }
-
- pred_info->succ.insert(new_vertex_info_eod);
- }
- }
-
- // for each successor, add edge from new vertex and update info
- for (VertexInfo *succ_info : old_vertex_info->succ) {
- NFAVertex succ_v = succ_info->v;
-
- // update info for successor
- succ_info->pred.erase(old_vertex_info);
-
- if (new_v_eod && succ_v == g.acceptEod) {
- // update info for new vertex
- new_vertex_info_eod->succ.insert(succ_info);
- insert(&g[new_v_eod].reports,
- g[old_vertex_info->v].reports);
-
- add_edge_if_not_present(new_v_eod, succ_v, g);
- succ_info->pred.insert(new_vertex_info_eod);
- } else {
- // update info for new vertex
- new_vertex_info->succ.insert(succ_info);
-
- // if edge doesn't exist, create it
- add_edge_if_not_present(new_v, succ_v, g);
- succ_info->pred.insert(new_vertex_info);
-
- if (is_any_accept(succ_v, g)) {
- insert(&g[new_v].reports,
- g[old_vertex_info->v].reports);
- }
- }
- }
- }
-
- // update classmap
- new_vertex_info->equivalence_class = eq_class;
- cur_class_vertices.insert(new_vertex_info);
-}
-
-// walk through vertices of an equivalence class and replace them with a single
-// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
-// report behaviour with a single vertex).
-static
+ }
+
+ pred_info->succ.insert(new_vertex_info_eod);
+ }
+ }
+
+ // for each successor, add edge from new vertex and update info
+ for (VertexInfo *succ_info : old_vertex_info->succ) {
+ NFAVertex succ_v = succ_info->v;
+
+ // update info for successor
+ succ_info->pred.erase(old_vertex_info);
+
+ if (new_v_eod && succ_v == g.acceptEod) {
+ // update info for new vertex
+ new_vertex_info_eod->succ.insert(succ_info);
+ insert(&g[new_v_eod].reports,
+ g[old_vertex_info->v].reports);
+
+ add_edge_if_not_present(new_v_eod, succ_v, g);
+ succ_info->pred.insert(new_vertex_info_eod);
+ } else {
+ // update info for new vertex
+ new_vertex_info->succ.insert(succ_info);
+
+ // if edge doesn't exist, create it
+ add_edge_if_not_present(new_v, succ_v, g);
+ succ_info->pred.insert(new_vertex_info);
+
+ if (is_any_accept(succ_v, g)) {
+ insert(&g[new_v].reports,
+ g[old_vertex_info->v].reports);
+ }
+ }
+ }
+ }
+
+ // update classmap
+ new_vertex_info->equivalence_class = eq_class;
+ cur_class_vertices.insert(new_vertex_info);
+}
+
+// walk through vertices of an equivalence class and replace them with a single
+// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
+// report behaviour with a single vertex).
+static
bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
vector<unique_ptr<VertexInfo>> &infos,
- NGHolder &g) {
- bool merged = false;
- set<NFAVertex> toRemove;
-
- // go through all classes and merge classes with more than one vertex
+ NGHolder &g) {
+ bool merged = false;
+ set<NFAVertex> toRemove;
+
+ // go through all classes and merge classes with more than one vertex
for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) {
- // get all vertices in current equivalence class
+ // get all vertices in current equivalence class
VertexInfoSet &cur_class_vertices = classes[eq_class];
-
- // we don't care for single-vertex classes
- if (cur_class_vertices.size() > 1) {
- merged = true;
- mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
- }
- }
-
- // remove all dead vertices
- DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
- remove_vertices(toRemove, g);
-
- return merged;
-}
-
+
+ // we don't care for single-vertex classes
+ if (cur_class_vertices.size() > 1) {
+ merged = true;
+ mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
+ }
+ }
+
+ // remove all dead vertices
+ DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
+ remove_vertices(toRemove, g);
+
+ return merged;
+}
+
static
bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
// create a list of equivalence classes to check
@@ -657,26 +657,26 @@ bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
return mergeEquivalentClasses(classes, infos, g);
}
-bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
- if (!cc.grey.equivalenceEnable) {
- DEBUG_PRINTF("equivalence processing disabled in grey box\n");
- return false;
- }
+bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
+ if (!cc.grey.equivalenceEnable) {
+ DEBUG_PRINTF("equivalence processing disabled in grey box\n");
+ return false;
+ }
renumber_vertices(g);
-
- // Cheap check: if all the non-special vertices have in-degree one and
- // out-degree one, there's no redundancy in this here graph and we can
- // vamoose.
- if (isIrreducible(g)) {
- DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
- return false;
- }
-
- // take note if we have merged any vertices
- bool merge = false;
+
+ // Cheap check: if all the non-special vertices have in-degree one and
+ // out-degree one, there's no redundancy in this here graph and we can
+ // vamoose.
+ if (isIrreducible(g)) {
+ DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
+ return false;
+ }
+
+ // take note if we have merged any vertices
+ bool merge = false;
merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
- return merge;
-}
-
-} // namespace ue2
+ return merge;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
index ef8f92e7e3..d716841e94 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
@@ -1,47 +1,47 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Equivalence class graph reduction pass.
- */
-
-#ifndef NG_EQUIVALENCE_H_
-#define NG_EQUIVALENCE_H_
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** Attempt to make the NFA graph \p g smaller by performing a number of local
- * transformations. */
-bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc);
-
-} // namespace ue2
-
-#endif /* NG_EQUIVALENCE_H_ */
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Equivalence class graph reduction pass.
+ */
+
+#ifndef NG_EQUIVALENCE_H_
+#define NG_EQUIVALENCE_H_
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** Attempt to make the NFA graph \p g smaller by performing a number of local
+ * transformations. */
+bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc);
+
+} // namespace ue2
+
+#endif /* NG_EQUIVALENCE_H_ */
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
index 9d90489471..9ef0f01ce7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
@@ -1,328 +1,328 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Execute an NFA over a given input, returning the set of states that
- * are active afterwards.
- *
- * Note: although our external interfaces for execute_graph() use std::set, we
- * use a dynamic bitset containing the vertex indices internally for
- * performance.
- */
-#include "ng_execute.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
-#include <sstream>
-#include <string>
-
-#include <boost/dynamic_bitset.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-struct StateInfo {
- StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Execute an NFA over a given input, returning the set of states that
+ * are active afterwards.
+ *
+ * Note: although our external interfaces for execute_graph() use std::set, we
+ * use a dynamic bitset containing the vertex indices internally for
+ * performance.
+ */
+#include "ng_execute.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
+#include <sstream>
+#include <string>
+
+#include <boost/dynamic_bitset.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+struct StateInfo {
+ StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
StateInfo() : vertex(NGHolder::null_vertex()) {}
- NFAVertex vertex;
- CharReach reach;
-};
-
-#ifdef DEBUG
-static
-std::string dumpStates(const dynamic_bitset<> &s) {
- std::ostringstream oss;
- for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
- oss << i << " ";
- }
- return oss.str();
-}
-#endif
-
-static
-void step(const NGHolder &g, const vector<StateInfo> &info,
- const dynamic_bitset<> &in, dynamic_bitset<> *out) {
- out->reset();
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- NFAVertex u = info[i].vertex;
- for (auto v : adjacent_vertices_range(u, g)) {
- out->set(g[v].index);
- }
- }
-}
-
-static
-void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
- const CharReach &cr) {
- for (size_t i = states->find_first(); i != states->npos;
- i = states->find_next(i)) {
- if ((info[i].reach & cr).none()) {
- states->reset(i);
- }
- }
-}
-
-template<typename inputT>
-static
-void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
- const inputT &input, dynamic_bitset<> *states,
- bool kill_sds) {
- dynamic_bitset<> &curr = *states;
- dynamic_bitset<> next(curr.size());
- DEBUG_PRINTF("%zu states in\n", states->count());
-
- for (const auto &e : input) {
- DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
- step(g, info, curr, &next);
- if (kill_sds) {
- next.reset(NODE_START_DOTSTAR);
- }
- filter_by_reach(info, &next, e);
- next.swap(curr);
-
- if (curr.empty()) {
- DEBUG_PRINTF("went dead\n");
- break;
- }
- }
-
- DEBUG_PRINTF("%zu states out\n", states->size());
-}
-
-static
-dynamic_bitset<> makeStateBitset(const NGHolder &g,
- const flat_set<NFAVertex> &in) {
- dynamic_bitset<> work_states(num_vertices(g));
- for (const auto &v : in) {
- u32 idx = g[v].index;
- work_states.set(idx);
- }
- return work_states;
-}
-
-static
-flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
- const vector<StateInfo> &info) {
- flat_set<NFAVertex> out;
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- out.insert(info[i].vertex);
- }
- return out;
-}
-
-static
-vector<StateInfo> makeInfoTable(const NGHolder &g) {
- vector<StateInfo> info(num_vertices(g));
- for (auto v : vertices_range(g)) {
- u32 idx = g[v].index;
- const CharReach &cr = g[v].char_reach;
- assert(idx < info.size());
- info[idx] = StateInfo(v, cr);
- }
- return info;
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
- const flat_set<NFAVertex> &initial_states,
- bool kill_sds) {
- assert(hasCorrectlyNumberedVertices(g));
-
- auto info = makeInfoTable(g);
- auto work_states = makeStateBitset(g, initial_states);
-
- execute_graph_i(g, info, input, &work_states, kill_sds);
-
- return getVertices(work_states, info);
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g,
- const vector<CharReach> &input,
- const flat_set<NFAVertex> &initial_states) {
- assert(hasCorrectlyNumberedVertices(g));
-
- auto info = makeInfoTable(g);
- auto work_states = makeStateBitset(g, initial_states);
-
- execute_graph_i(g, info, input, &work_states, false);
-
- return getVertices(work_states, info);
-}
-
-namespace {
-class eg_visitor : public boost::default_dfs_visitor {
-public:
- eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
- const NGHolder &input_g_in,
- map<NFAVertex, dynamic_bitset<> > &states_in)
- : vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
- info(info_in), input_g(input_g_in), states(states_in),
- succs(vertex_count) {}
-
+ NFAVertex vertex;
+ CharReach reach;
+};
+
+#ifdef DEBUG
+static
+std::string dumpStates(const dynamic_bitset<> &s) {
+ std::ostringstream oss;
+ for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
+ oss << i << " ";
+ }
+ return oss.str();
+}
+#endif
+
+static
+void step(const NGHolder &g, const vector<StateInfo> &info,
+ const dynamic_bitset<> &in, dynamic_bitset<> *out) {
+ out->reset();
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ NFAVertex u = info[i].vertex;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ out->set(g[v].index);
+ }
+ }
+}
+
+static
+void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
+ const CharReach &cr) {
+ for (size_t i = states->find_first(); i != states->npos;
+ i = states->find_next(i)) {
+ if ((info[i].reach & cr).none()) {
+ states->reset(i);
+ }
+ }
+}
+
+template<typename inputT>
+static
+void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
+ const inputT &input, dynamic_bitset<> *states,
+ bool kill_sds) {
+ dynamic_bitset<> &curr = *states;
+ dynamic_bitset<> next(curr.size());
+ DEBUG_PRINTF("%zu states in\n", states->count());
+
+ for (const auto &e : input) {
+ DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
+ step(g, info, curr, &next);
+ if (kill_sds) {
+ next.reset(NODE_START_DOTSTAR);
+ }
+ filter_by_reach(info, &next, e);
+ next.swap(curr);
+
+ if (curr.empty()) {
+ DEBUG_PRINTF("went dead\n");
+ break;
+ }
+ }
+
+ DEBUG_PRINTF("%zu states out\n", states->size());
+}
+
+static
+dynamic_bitset<> makeStateBitset(const NGHolder &g,
+ const flat_set<NFAVertex> &in) {
+ dynamic_bitset<> work_states(num_vertices(g));
+ for (const auto &v : in) {
+ u32 idx = g[v].index;
+ work_states.set(idx);
+ }
+ return work_states;
+}
+
+static
+flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
+ const vector<StateInfo> &info) {
+ flat_set<NFAVertex> out;
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ out.insert(info[i].vertex);
+ }
+ return out;
+}
+
+static
+vector<StateInfo> makeInfoTable(const NGHolder &g) {
+ vector<StateInfo> info(num_vertices(g));
+ for (auto v : vertices_range(g)) {
+ u32 idx = g[v].index;
+ const CharReach &cr = g[v].char_reach;
+ assert(idx < info.size());
+ info[idx] = StateInfo(v, cr);
+ }
+ return info;
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+ const flat_set<NFAVertex> &initial_states,
+ bool kill_sds) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ auto info = makeInfoTable(g);
+ auto work_states = makeStateBitset(g, initial_states);
+
+ execute_graph_i(g, info, input, &work_states, kill_sds);
+
+ return getVertices(work_states, info);
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+ const vector<CharReach> &input,
+ const flat_set<NFAVertex> &initial_states) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ auto info = makeInfoTable(g);
+ auto work_states = makeStateBitset(g, initial_states);
+
+ execute_graph_i(g, info, input, &work_states, false);
+
+ return getVertices(work_states, info);
+}
+
+namespace {
+class eg_visitor : public boost::default_dfs_visitor {
+public:
+ eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
+ const NGHolder &input_g_in,
+ map<NFAVertex, dynamic_bitset<> > &states_in)
+ : vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
+ info(info_in), input_g(input_g_in), states(states_in),
+ succs(vertex_count) {}
+
void finish_vertex(NFAVertex input_v,
const boost::reverse_graph<NGHolder, const NGHolder &> &) {
- if (input_v == input_g.accept) {
- return;
- }
- assert(input_v != input_g.acceptEod);
-
+ if (input_v == input_g.accept) {
+ return;
+ }
+ assert(input_v != input_g.acceptEod);
+
DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index);
-
- /* finish vertex is called on vertex --> implies that all its parents
- * (in the forward graph) are also finished. Our parents will have
- * pushed all of their successors for us into our stateset. */
- states[input_v].resize(vertex_count);
- dynamic_bitset<> our_states = states[input_v];
- states[input_v].reset();
-
- filter_by_reach(info, &our_states,
- input_g[input_v].char_reach);
-
- if (input_v != input_g.startDs &&
- edge(input_v, input_v, input_g).second) {
- bool changed;
- do {
- DEBUG_PRINTF("actually not finished -> have self loop\n");
- succs.reset();
- step(running_g, info, our_states, &succs);
- filter_by_reach(info, &succs,
- input_g[input_v].char_reach);
- dynamic_bitset<> our_states2 = our_states | succs;
- changed = our_states2 != our_states;
- our_states.swap(our_states2);
- } while (changed);
- }
-
- DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
-
- succs.reset();
- step(running_g, info, our_states, &succs);
-
- /* we need to push into all our (forward) children their successors
- * from us. */
- for (auto v : adjacent_vertices_range(input_v, input_g)) {
+
+ /* finish vertex is called on vertex --> implies that all its parents
+ * (in the forward graph) are also finished. Our parents will have
+ * pushed all of their successors for us into our stateset. */
+ states[input_v].resize(vertex_count);
+ dynamic_bitset<> our_states = states[input_v];
+ states[input_v].reset();
+
+ filter_by_reach(info, &our_states,
+ input_g[input_v].char_reach);
+
+ if (input_v != input_g.startDs &&
+ edge(input_v, input_v, input_g).second) {
+ bool changed;
+ do {
+ DEBUG_PRINTF("actually not finished -> have self loop\n");
+ succs.reset();
+ step(running_g, info, our_states, &succs);
+ filter_by_reach(info, &succs,
+ input_g[input_v].char_reach);
+ dynamic_bitset<> our_states2 = our_states | succs;
+ changed = our_states2 != our_states;
+ our_states.swap(our_states2);
+ } while (changed);
+ }
+
+ DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
+
+ succs.reset();
+ step(running_g, info, our_states, &succs);
+
+ /* we need to push into all our (forward) children their successors
+ * from us. */
+ for (auto v : adjacent_vertices_range(input_v, input_g)) {
DEBUG_PRINTF("pushing our states to pstate %zu\n",
- input_g[v].index);
- if (v == input_g.startDs) {
- /* no need for intra start edges */
- continue;
- }
-
- states[v].resize(vertex_count); // May not yet exist
-
- if (v != input_g.accept) {
- states[v] |= succs;
- } else {
- /* accept is a magical pseudo state which does not consume
- * characters and we are using to collect the output states. We
- * must fill it with our states rather than our succs. */
- DEBUG_PRINTF("prev outputted rstates: %s\n",
- dumpStates(states[v]).c_str());
- DEBUG_PRINTF("outputted rstates: %s\n",
- dumpStates(our_states).c_str());
-
- states[v] |= our_states;
-
- DEBUG_PRINTF("new outputted rstates: %s\n",
- dumpStates(states[v]).c_str());
- }
- }
-
- /* note: the states at this vertex are no longer required */
- }
-
-private:
- const size_t vertex_count;
- const NGHolder &running_g;
- const vector<StateInfo> &info;
- const NGHolder &input_g;
- map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
- states in running_g */
- dynamic_bitset<> succs; // temp use internally
-};
-} // namespace
-
-flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
- const NGHolder &input_dag,
- const flat_set<NFAVertex> &input_start_states,
- const flat_set<NFAVertex> &initial_states) {
- DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
- num_vertices(running_g), num_vertices(input_dag));
- assert(hasCorrectlyNumberedVertices(running_g));
- assert(in_degree(input_dag.acceptEod, input_dag) == 1);
-
- map<NFAVertex, boost::default_color_type> colours;
- /* could just a topo order, but really it is time to pull a slightly bigger
- * gun: DFS */
+ input_g[v].index);
+ if (v == input_g.startDs) {
+ /* no need for intra start edges */
+ continue;
+ }
+
+ states[v].resize(vertex_count); // May not yet exist
+
+ if (v != input_g.accept) {
+ states[v] |= succs;
+ } else {
+ /* accept is a magical pseudo state which does not consume
+ * characters and we are using to collect the output states. We
+ * must fill it with our states rather than our succs. */
+ DEBUG_PRINTF("prev outputted rstates: %s\n",
+ dumpStates(states[v]).c_str());
+ DEBUG_PRINTF("outputted rstates: %s\n",
+ dumpStates(our_states).c_str());
+
+ states[v] |= our_states;
+
+ DEBUG_PRINTF("new outputted rstates: %s\n",
+ dumpStates(states[v]).c_str());
+ }
+ }
+
+ /* note: the states at this vertex are no longer required */
+ }
+
+private:
+ const size_t vertex_count;
+ const NGHolder &running_g;
+ const vector<StateInfo> &info;
+ const NGHolder &input_g;
+ map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
+ states in running_g */
+ dynamic_bitset<> succs; // temp use internally
+};
+} // namespace
+
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+ const NGHolder &input_dag,
+ const flat_set<NFAVertex> &input_start_states,
+ const flat_set<NFAVertex> &initial_states) {
+ DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
+ num_vertices(running_g), num_vertices(input_dag));
+ assert(hasCorrectlyNumberedVertices(running_g));
+ assert(in_degree(input_dag.acceptEod, input_dag) == 1);
+
+ map<NFAVertex, boost::default_color_type> colours;
+ /* could just a topo order, but really it is time to pull a slightly bigger
+ * gun: DFS */
boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag);
- map<NFAVertex, dynamic_bitset<> > dfs_states;
-
- auto info = makeInfoTable(running_g);
- auto input_fs = makeStateBitset(running_g, initial_states);
-
- for (auto v : input_start_states) {
- dfs_states[v] = input_fs;
- }
-
- depth_first_visit(revg, input_dag.accept,
- eg_visitor(running_g, info, input_dag, dfs_states),
- make_assoc_property_map(colours));
-
- auto states = getVertices(dfs_states[input_dag.accept], info);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" output rstates:");
- for (const auto &v : states) {
+ map<NFAVertex, dynamic_bitset<> > dfs_states;
+
+ auto info = makeInfoTable(running_g);
+ auto input_fs = makeStateBitset(running_g, initial_states);
+
+ for (auto v : input_start_states) {
+ dfs_states[v] = input_fs;
+ }
+
+ depth_first_visit(revg, input_dag.accept,
+ eg_visitor(running_g, info, input_dag, dfs_states),
+ make_assoc_property_map(colours));
+
+ auto states = getVertices(dfs_states[input_dag.accept], info);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" output rstates:");
+ for (const auto &v : states) {
printf(" %zu", running_g[v].index);
- }
- printf("\n");
-#endif
-
- return states;
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
- const NGHolder &input_dag,
- const flat_set<NFAVertex> &initial_states) {
- auto input_start_states = {input_dag.start, input_dag.startDs};
- return execute_graph(running_g, input_dag, input_start_states,
- initial_states);
-}
-
+ }
+ printf("\n");
+#endif
+
+ return states;
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+ const NGHolder &input_dag,
+ const flat_set<NFAVertex> &initial_states) {
+ auto input_start_states = {input_dag.start, input_dag.startDs};
+ return execute_graph(running_g, input_dag, input_start_states,
+ initial_states);
+}
+
static
bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
const dynamic_bitset<> &s,
@@ -368,4 +368,4 @@ bool can_die_early(const NGHolder &g, u32 age_limit) {
age_limit);
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
index 32f5520d33..17625b2aa3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
@@ -1,72 +1,72 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Execute an NFA over a given input, returning the set of states that
- * are active afterwards.
- */
-
-#ifndef NG_EXECUTE_H
-#define NG_EXECUTE_H
-
-#include "ng_holder.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Execute an NFA over a given input, returning the set of states that
+ * are active afterwards.
+ */
+
+#ifndef NG_EXECUTE_H
+#define NG_EXECUTE_H
+
+#include "ng_holder.h"
#include "util/flat_containers.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-struct ue2_literal;
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
- const flat_set<NFAVertex> &initial,
- bool kill_sds = false);
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g,
- const std::vector<CharReach> &input,
- const flat_set<NFAVertex> &initial);
-
-/** on exit, states contains any state which may still be enabled after
- * receiving an input which corresponds to some path through the input_dag from
- * start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
- */
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
- const flat_set<NFAVertex> &initial);
-
-/* as above, but able to specify the source states for the input graph */
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
- const flat_set<NFAVertex> &input_start_states,
- const flat_set<NFAVertex> &initial);
-
+
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+struct ue2_literal;
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+ const flat_set<NFAVertex> &initial,
+ bool kill_sds = false);
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+ const std::vector<CharReach> &input,
+ const flat_set<NFAVertex> &initial);
+
+/** on exit, states contains any state which may still be enabled after
+ * receiving an input which corresponds to some path through the input_dag from
+ * start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
+ */
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+ const flat_set<NFAVertex> &initial);
+
+/* as above, but able to specify the source states for the input graph */
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+ const flat_set<NFAVertex> &input_start_states,
+ const flat_set<NFAVertex> &initial);
+
/* returns true if it is possible for the nfa to die within age_limit bytes */
bool can_die_early(const NGHolder &g, u32 age_limit);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
index f8abbd04a2..378c22bf82 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
@@ -1,102 +1,102 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Code for discovering properties of an NFA graph used by
* hs_expression_info().
- */
-#include "ng_expr_info.h"
-
-#include "hs_internal.h"
-#include "ng.h"
-#include "ng_asserts.h"
-#include "ng_depth.h"
-#include "ng_edge_redundancy.h"
+ */
+#include "ng_expr_info.h"
+
+#include "hs_internal.h"
+#include "ng.h"
+#include "ng_asserts.h"
+#include "ng_depth.h"
+#include "ng_edge_redundancy.h"
#include "ng_extparam.h"
#include "ng_fuzzy.h"
-#include "ng_holder.h"
+#include "ng_holder.h"
#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/expression_info.h"
-#include "parser/position.h" // for POS flags
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-#include <limits.h>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-/* get rid of leading \b and multiline ^ vertices */
-static
+#include "parser/position.h" // for POS flags
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+#include <limits.h>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+/* get rid of leading \b and multiline ^ vertices */
+static
void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
- vector<NFAVertex> victims;
-
+ vector<NFAVertex> victims;
+
for (auto v : adjacent_vertices_range(root, g)) {
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
- victims.push_back(v);
- }
- }
-
- for (auto u : victims) {
+ DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
+ victims.push_back(v);
+ }
+ }
+
+ for (auto u : victims) {
for (auto v : adjacent_vertices_range(u, g)) {
add_edge_if_not_present(root, v, g);
- }
- }
-
+ }
+ }
+
remove_vertices(victims, g);
-}
-
-static
+}
+
+static
void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
- const vector<DepthMinMax> &depths, DepthMinMax &info) {
+ const vector<DepthMinMax> &depths, DepthMinMax &info) {
if (is_any_accept(v, g)) {
- return;
- }
+ return;
+ }
if (is_any_start(v, g)) {
info.min = depth(0);
- info.max = max(info.max, depth(0));
- return;
- }
-
+ info.max = max(info.max, depth(0));
+ return;
+ }
+
u32 idx = g[v].index;
- assert(idx < depths.size());
- const DepthMinMax &d = depths.at(idx);
-
+ assert(idx < depths.size());
+ const DepthMinMax &d = depths.at(idx);
+
for (ReportID report_id : g[v].reports) {
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
@@ -126,24 +126,24 @@ void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
- }
-}
-
-static
+ }
+}
+
+static
bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
for (const auto &report_id : all_reports(g)) {
- if (rm.getReport(report_id).offsetAdjust) {
- return true;
- }
- }
- return false;
-}
-
+ if (rm.getReport(report_id).offsetAdjust) {
+ return true;
+ }
+ }
+ return false;
+}
+
void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
NGHolder &g, ExpressionInfo &expr,
hs_expr_info *info) {
- assert(info);
-
+ assert(info);
+
// remove reports that aren't on vertices connected to accept.
clearReports(g);
@@ -154,16 +154,16 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
* match those in NG::addGraph().
*/
- /* ensure utf8 starts at cp boundary */
+ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, g, expr);
-
+
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
-
+
bool hamming = expr.hamm_distance > 0;
u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
-
+
// validate graph's suitability for fuzzing
validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);
@@ -189,30 +189,30 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
auto depths = calcDepthsFrom(g, g.start);
- DepthMinMax d;
-
+ DepthMinMax d;
+
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
checkVertex(rm, g, u, depths, d);
- }
-
+ }
+
for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
checkVertex(rm, g, u, depths, d);
- }
-
- if (d.max.is_finite()) {
- info->max_width = d.max;
- } else {
- info->max_width = UINT_MAX;
- }
- if (d.min.is_finite()) {
- info->min_width = d.min;
- } else {
- info->min_width = UINT_MAX;
- }
-
+ }
+
+ if (d.max.is_finite()) {
+ info->max_width = d.max;
+ } else {
+ info->max_width = UINT_MAX;
+ }
+ if (d.min.is_finite()) {
+ info->min_width = d.min;
+ } else {
+ info->min_width = UINT_MAX;
+ }
+
info->unordered_matches = hasOffsetAdjust(rm, g);
info->matches_at_eod = can_match_at_eod(g);
info->matches_only_at_eod = can_only_match_at_eod(g);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
index f9bd680939..9500338f55 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Code for discovering properties of an expression used by
- * hs_expression_info.
- */
-
-#ifndef NG_EXPR_INFO_H
-#define NG_EXPR_INFO_H
-
-struct hs_expr_info;
-
-namespace ue2 {
-
+ * hs_expression_info.
+ */
+
+#ifndef NG_EXPR_INFO_H
+#define NG_EXPR_INFO_H
+
+struct hs_expr_info;
+
+namespace ue2 {
+
class ExpressionInfo;
class NGHolder;
-class ReportManager;
+class ReportManager;
struct CompileContext;
-
+
void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
NGHolder &g, ExpressionInfo &expr, hs_expr_info *info);
-
-} // namespace ue2
-
-#endif // NG_EXPR_INFO_H
+
+} // namespace ue2
+
+#endif // NG_EXPR_INFO_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
index 6eb23113f3..cee47ffe70 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Propagate extended parameters to vertex reports and reduce graph if
- * possible.
- *
- * This code handles the propagation of the extension parameters specified by
+ * \brief Propagate extended parameters to vertex reports and reduce graph if
+ * possible.
+ *
+ * This code handles the propagation of the extension parameters specified by
* the user with the \ref hs_expr_ext structure into the reports on the graph's
- * vertices.
- *
- * There are also some analyses that prune edges that cannot contribute to a
- * match given these constraints, or transform the graph in order to make a
- * constraint implicit.
- */
+ * vertices.
+ *
+ * There are also some analyses that prune edges that cannot contribute to a
+ * match given these constraints, or transform the graph in order to make a
+ * constraint implicit.
+ */
#include "ng_extparam.h"
-#include "ng.h"
-#include "ng_depth.h"
-#include "ng_dump.h"
-#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_som_util.h"
-#include "ng_width.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "ng.h"
+#include "ng_depth.h"
+#include "ng_dump.h"
+#include "ng_prune.h"
+#include "ng_reports.h"
+#include "ng_som_util.h"
+#include "ng_width.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "parser/position.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <sstream>
-#include <string>
-
-using namespace std;
-
-namespace ue2 {
-
-static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
-static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
-
+#include "parser/position.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+namespace ue2 {
+
+static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
+static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
+
/** True if all the given reports have the same extparam bounds. */
template<typename Container>
bool hasSameBounds(const Container &reports, const ReportManager &rm) {
@@ -91,82 +91,82 @@ bool hasSameBounds(const Container &reports, const ReportManager &rm) {
* \brief Find the (min, max) offset adjustment for the reports on a given
* vertex.
*/
-static
-pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
- const NGHolder &g, NFAVertex v) {
- s32 minAdj = 0, maxAdj = 0;
- const auto &reports = g[v].reports;
- for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
- const Report &ir = rm.getReport(*ri);
- if (ri == reports.begin()) {
- minAdj = ir.offsetAdjust;
- maxAdj = ir.offsetAdjust;
- } else {
- minAdj = min(minAdj, ir.offsetAdjust);
- maxAdj = max(maxAdj, ir.offsetAdjust);
- }
- }
-
- return make_pair(minAdj, maxAdj);
-}
-
-/** \brief Find the (min, max) length of any match for the given holder. */
-static
-DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
- DepthMinMax match_depths;
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- pair<s32, s32> adj;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
- match_depths = unionDepthMinMax(match_depths, d);
- }
-
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
- match_depths = unionDepthMinMax(match_depths, d);
- }
-
- DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
-
- assert(match_depths.min.is_reachable());
- assert(match_depths.max.is_reachable());
- return match_depths;
-}
-
+static
+pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
+ const NGHolder &g, NFAVertex v) {
+ s32 minAdj = 0, maxAdj = 0;
+ const auto &reports = g[v].reports;
+ for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
+ const Report &ir = rm.getReport(*ri);
+ if (ri == reports.begin()) {
+ minAdj = ir.offsetAdjust;
+ maxAdj = ir.offsetAdjust;
+ } else {
+ minAdj = min(minAdj, ir.offsetAdjust);
+ maxAdj = max(maxAdj, ir.offsetAdjust);
+ }
+ }
+
+ return make_pair(minAdj, maxAdj);
+}
+
+/** \brief Find the (min, max) length of any match for the given holder. */
+static
+DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
+ DepthMinMax match_depths;
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ pair<s32, s32> adj;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+ match_depths = unionDepthMinMax(match_depths, d);
+ }
+
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+ match_depths = unionDepthMinMax(match_depths, d);
+ }
+
+ DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
+
+ assert(match_depths.min.is_reachable());
+ assert(match_depths.max.is_reachable());
+ return match_depths;
+}
+
template<typename Function>
void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
Function func) {
- for (auto v : inv_adjacent_vertices_range(accept, g)) {
- if (v == g.accept) {
+ for (auto v : inv_adjacent_vertices_range(accept, g)) {
+ if (v == g.accept) {
// Don't operate on accept: the accept->acceptEod edge is stylised.
- assert(accept == g.acceptEod);
+ assert(accept == g.acceptEod);
assert(g[v].reports.empty());
- continue;
- }
-
+ continue;
+ }
+
if (!seen.insert(v).second) {
continue; // We have already processed v.
- }
-
- auto &reports = g[v].reports;
+ }
+
+ auto &reports = g[v].reports;
if (reports.empty()) {
continue;
}
@@ -177,7 +177,7 @@ void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
reports = std::move(new_reports);
}
}
-
+
/**
* Generic function for replacing all the reports in the graph.
*
@@ -190,7 +190,7 @@ void replaceReports(NGHolder &g, Function func) {
replaceReports(g, g.accept, seen, func);
replaceReports(g, g.acceptEod, seen, func);
}
-
+
/** \brief Replace the graph's reports with new reports that specify bounds. */
static
void updateReportBounds(ReportManager &rm, NGHolder &g,
@@ -199,9 +199,9 @@ void updateReportBounds(ReportManager &rm, NGHolder &g,
replaceReports(g, [&](NFAVertex, ReportID id) {
Report report = rm.getReport(id); // make a copy
assert(!report.hasBounds());
-
+
// Note that we need to cope with offset adjustment here.
-
+
report.minOffset = expr.min_offset - report.offsetAdjust;
if (expr.max_offset == MAX_OFFSET) {
report.maxOffset = MAX_OFFSET;
@@ -209,30 +209,30 @@ void updateReportBounds(ReportManager &rm, NGHolder &g,
report.maxOffset = expr.max_offset - report.offsetAdjust;
}
assert(report.maxOffset >= report.minOffset);
-
+
report.minLength = expr.min_length;
if (expr.min_length && !expr.som) {
report.quashSom = true;
- }
-
+ }
+
DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
"min_length=%llu\n", id, report.minOffset,
report.maxOffset, report.minLength);
return rm.getInternalId(report);
});
-}
-
-static
-bool hasVirtualStarts(const NGHolder &g) {
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- return true;
- }
- }
- return false;
-}
-
+}
+
+static
+bool hasVirtualStarts(const NGHolder &g) {
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
+ return true;
+ }
+ }
+ return false;
+}
+
/** Set the min_length param for all reports to zero. */
static
void clearMinLengthParam(NGHolder &g, ReportManager &rm) {
@@ -272,11 +272,11 @@ void clearOffsetParams(NGHolder &g, ReportManager &rm) {
* can use that knowledge to anchor it which will limit its lifespan. Note that
* we can't use this transformation if there's a min_length, as it's currently
* handled using "sly SOM".
- *
- * Note that it is possible to handle graphs that have a combination of
- * anchored and unanchored paths, but it's too tricky for the moment.
- */
-static
+ *
+ * Note that it is possible to handle graphs that have a combination of
+ * anchored and unanchored paths, but it's too tricky for the moment.
+ */
+static
bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
if (!isFloating(g)) {
return false;
@@ -303,99 +303,99 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
const depth minWidth = findMinWidth(g);
const depth maxWidth = findMaxWidth(g);
- assert(minWidth <= maxWidth);
- assert(maxWidth.is_reachable());
-
+ assert(minWidth <= maxWidth);
+ assert(maxWidth.is_reachable());
+
const auto &first_report = rm.getReport(*reports.begin());
const auto min_offset = first_report.minOffset;
const auto max_offset = first_report.maxOffset;
assert(max_offset < MAX_OFFSET);
- DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
+ DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
minWidth.str().c_str(), maxWidth.str().c_str(),
min_offset, max_offset);
-
+
if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
- return false;
- }
-
+ return false;
+ }
+
if (max_offset < minWidth) {
- assert(0);
- return false;
- }
-
- // If the pattern has virtual starts, we probably don't want to touch it.
- if (hasVirtualStarts(g)) {
- DEBUG_PRINTF("virtual starts, bailing\n");
- return false;
- }
-
- // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
- // would just need to be a little careful with reports.
- if (isVacuous(g)) {
- DEBUG_PRINTF("vacuous, bailing\n");
- return false;
- }
-
- u32 min_bound, max_bound;
- if (maxWidth.is_infinite()) {
- min_bound = 0;
+ assert(0);
+ return false;
+ }
+
+ // If the pattern has virtual starts, we probably don't want to touch it.
+ if (hasVirtualStarts(g)) {
+ DEBUG_PRINTF("virtual starts, bailing\n");
+ return false;
+ }
+
+ // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
+ // would just need to be a little careful with reports.
+ if (isVacuous(g)) {
+ DEBUG_PRINTF("vacuous, bailing\n");
+ return false;
+ }
+
+ u32 min_bound, max_bound;
+ if (maxWidth.is_infinite()) {
+ min_bound = 0;
max_bound = max_offset - minWidth;
- } else {
+ } else {
min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0;
max_bound = max_offset - minWidth;
- }
-
- DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
-
- vector<NFAVertex> initials;
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (v == g.startDs) {
- continue;
- }
- initials.push_back(v);
- }
- if (initials.empty()) {
- DEBUG_PRINTF("no initial vertices\n");
- return false;
- }
-
- // Wire up 'min_offset' mandatory dots from anchored start.
- NFAVertex u = g.start;
- for (u32 i = 0; i < min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- add_edge(u, v, g);
- u = v;
- }
-
- NFAVertex head = u;
-
- // Wire up optional dots for (max_offset - min_offset).
- for (u32 i = 0; i < max_bound - min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- if (head != u) {
- add_edge(head, v, g);
- }
- add_edge(u, v, g);
- u = v;
- }
-
- // Remove edges from starts and wire both head and u to our initials.
- for (auto v : initials) {
- remove_edge(g.startDs, v, g);
- remove_edge(g.start, v, g);
-
- if (head != u) {
- add_edge(head, v, g);
- }
- add_edge(u, v, g);
- }
-
+ }
+
+ DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
+
+ vector<NFAVertex> initials;
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ initials.push_back(v);
+ }
+ if (initials.empty()) {
+ DEBUG_PRINTF("no initial vertices\n");
+ return false;
+ }
+
+ // Wire up 'min_offset' mandatory dots from anchored start.
+ NFAVertex u = g.start;
+ for (u32 i = 0; i < min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ add_edge(u, v, g);
+ u = v;
+ }
+
+ NFAVertex head = u;
+
+ // Wire up optional dots for (max_offset - min_offset).
+ for (u32 i = 0; i < max_bound - min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ if (head != u) {
+ add_edge(head, v, g);
+ }
+ add_edge(u, v, g);
+ u = v;
+ }
+
+ // Remove edges from starts and wire both head and u to our initials.
+ for (auto v : initials) {
+ remove_edge(g.startDs, v, g);
+ remove_edge(g.start, v, g);
+
+ if (head != u) {
+ add_edge(head, v, g);
+ }
+ add_edge(u, v, g);
+ }
+
renumber_vertices(g);
renumber_edges(g);
-
+
if (minWidth == maxWidth) {
// For a fixed width pattern, we can retire the offsets as
// they are implicit in the graph now.
@@ -403,68 +403,68 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
}
clearReports(g);
- return true;
-}
-
-static
-NFAVertex findSingleCyclic(const NGHolder &g) {
+ return true;
+}
+
+static
+NFAVertex findSingleCyclic(const NGHolder &g) {
NFAVertex v = NGHolder::null_vertex();
- for (const auto &e : edges_range(g)) {
- if (source(e, g) == target(e, g)) {
- if (source(e, g) == g.startDs) {
- continue;
- }
+ for (const auto &e : edges_range(g)) {
+ if (source(e, g) == target(e, g)) {
+ if (source(e, g) == g.startDs) {
+ continue;
+ }
if (v != NGHolder::null_vertex()) {
- // More than one cyclic vertex.
+ // More than one cyclic vertex.
return NGHolder::null_vertex();
- }
- v = source(e, g);
- }
- }
-
+ }
+ v = source(e, g);
+ }
+ }
+
if (v != NGHolder::null_vertex()) {
DEBUG_PRINTF("cyclic is %zu\n", g[v].index);
- assert(!is_special(v, g));
- }
- return v;
-}
-
-static
+ assert(!is_special(v, g));
+ }
+ return v;
+}
+
+static
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
- int *adjust) {
- const auto &reports = all_reports(g);
- if (reports.empty()) {
- assert(0);
- return false;
- }
-
- int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- if (ir.offsetAdjust != offsetAdjust) {
- DEBUG_PRINTF("different adjusts!\n");
- return false;
- }
- }
-
- *adjust = offsetAdjust;
- return true;
-}
-
+ int *adjust) {
+ const auto &reports = all_reports(g);
+ if (reports.empty()) {
+ assert(0);
+ return false;
+ }
+
+ int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ if (ir.offsetAdjust != offsetAdjust) {
+ DEBUG_PRINTF("different adjusts!\n");
+ return false;
+ }
+ }
+
+ *adjust = offsetAdjust;
+ return true;
+}
+
/**
* If the pattern has a min_length and is of "ratchet" form with one unbounded
- * repeat, that repeat can become a bounded repeat.
- *
- * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
- */
-static
+ * repeat, that repeat can become a bounded repeat.
+ *
+ * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
+ */
+static
bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
const auto &reports = all_reports(g);
-
+
if (reports.empty()) {
- return false;
- }
-
+ return false;
+ }
+
if (!hasSameBounds(reports, rm)) {
DEBUG_PRINTF("mixed report bounds\n");
return false;
@@ -475,249 +475,249 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
return false;
}
- // If the pattern has virtual starts, we probably don't want to touch it.
- if (hasVirtualStarts(g)) {
- DEBUG_PRINTF("virtual starts, bailing\n");
- return false;
- }
-
- // The graph must contain a single cyclic vertex (other than startDs), and
- // that vertex can have one pred and one successor.
- NFAVertex cyclic = findSingleCyclic(g);
+ // If the pattern has virtual starts, we probably don't want to touch it.
+ if (hasVirtualStarts(g)) {
+ DEBUG_PRINTF("virtual starts, bailing\n");
+ return false;
+ }
+
+ // The graph must contain a single cyclic vertex (other than startDs), and
+ // that vertex can have one pred and one successor.
+ NFAVertex cyclic = findSingleCyclic(g);
if (cyclic == NGHolder::null_vertex()) {
- return false;
- }
-
+ return false;
+ }
+
NGHolder::adjacency_iterator ai, ae;
- tie(ai, ae) = adjacent_vertices(g.start, g);
- if (*ai == g.startDs) {
- ++ai;
- }
- NFAVertex v = *ai;
- if (++ai != ae) {
- DEBUG_PRINTF("more than one initial vertex\n");
- return false;
- }
-
- u32 width = 0;
-
- // Walk from the start vertex to the cyclic state and ensure we have a
- // chain of vertices.
- while (v != cyclic) {
+ tie(ai, ae) = adjacent_vertices(g.start, g);
+ if (*ai == g.startDs) {
+ ++ai;
+ }
+ NFAVertex v = *ai;
+ if (++ai != ae) {
+ DEBUG_PRINTF("more than one initial vertex\n");
+ return false;
+ }
+
+ u32 width = 0;
+
+ // Walk from the start vertex to the cyclic state and ensure we have a
+ // chain of vertices.
+ while (v != cyclic) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- width++;
+ width++;
auto succ = succs(v, g);
- if (contains(succ, cyclic)) {
- if (succ.size() == 1) {
- v = cyclic;
- } else if (succ.size() == 2) {
- // Cyclic and jump edge.
- succ.erase(cyclic);
- NFAVertex v2 = *succ.begin();
- if (!edge(cyclic, v2, g).second) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = cyclic;
- } else {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- } else {
- if (succ.size() != 1) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = *succ.begin();
- }
- }
-
- // Check the cyclic state is A-OK.
- v = getSoleDestVertex(g, cyclic);
+ if (contains(succ, cyclic)) {
+ if (succ.size() == 1) {
+ v = cyclic;
+ } else if (succ.size() == 2) {
+ // Cyclic and jump edge.
+ succ.erase(cyclic);
+ NFAVertex v2 = *succ.begin();
+ if (!edge(cyclic, v2, g).second) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = cyclic;
+ } else {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ } else {
+ if (succ.size() != 1) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = *succ.begin();
+ }
+ }
+
+ // Check the cyclic state is A-OK.
+ v = getSoleDestVertex(g, cyclic);
if (v == NGHolder::null_vertex()) {
- DEBUG_PRINTF("cyclic has more than one successor\n");
- return false;
- }
-
- // Walk from the cyclic state to an accept and ensure we have a chain of
- // vertices.
- while (!is_any_accept(v, g)) {
+ DEBUG_PRINTF("cyclic has more than one successor\n");
+ return false;
+ }
+
+ // Walk from the cyclic state to an accept and ensure we have a chain of
+ // vertices.
+ while (!is_any_accept(v, g)) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- width++;
+ width++;
auto succ = succs(v, g);
- if (succ.size() != 1) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = *succ.begin();
- }
-
- int offsetAdjust = 0;
- if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
- return false;
- }
- DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
- width += offsetAdjust;
-
+ if (succ.size() != 1) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = *succ.begin();
+ }
+
+ int offsetAdjust = 0;
+ if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
+ return false;
+ }
+ DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
+ width += offsetAdjust;
+
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
- g[cyclic].index);
-
+ g[cyclic].index);
+
if (width >= min_length) {
- DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
+ DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
min_length, width);
clearMinLengthParam(g, rm);
- return true;
- }
-
- vector<NFAVertex> preds;
- vector<NFAEdge> dead;
- for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
+ return true;
+ }
+
+ vector<NFAVertex> preds;
+ vector<NFAEdge> dead;
+ for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
DEBUG_PRINTF("pred %zu\n", g[u].index);
- if (u == cyclic) {
- continue;
- }
- preds.push_back(u);
-
- // We want to delete the out-edges of each predecessor, but need to
- // make sure we don't delete the startDs self loop.
- for (const auto &e : out_edges_range(u, g)) {
- if (target(e, g) != g.startDs) {
- dead.push_back(e);
- }
- }
- }
-
- remove_edges(dead, g);
-
- assert(!preds.empty());
-
- const CharReach &cr = g[cyclic].char_reach;
-
+ if (u == cyclic) {
+ continue;
+ }
+ preds.push_back(u);
+
+ // We want to delete the out-edges of each predecessor, but need to
+ // make sure we don't delete the startDs self loop.
+ for (const auto &e : out_edges_range(u, g)) {
+ if (target(e, g) != g.startDs) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ remove_edges(dead, g);
+
+ assert(!preds.empty());
+
+ const CharReach &cr = g[cyclic].char_reach;
+
for (u32 i = 0; i < min_length - width - 1; ++i) {
- v = add_vertex(g);
- g[v].char_reach = cr;
-
- for (auto u : preds) {
- add_edge(u, v, g);
- }
- preds.clear();
- preds.push_back(v);
- }
- assert(!preds.empty());
- for (auto u : preds) {
- add_edge(u, cyclic, g);
- }
-
+ v = add_vertex(g);
+ g[v].char_reach = cr;
+
+ for (auto u : preds) {
+ add_edge(u, v, g);
+ }
+ preds.clear();
+ preds.push_back(v);
+ }
+ assert(!preds.empty());
+ for (auto u : preds) {
+ add_edge(u, cyclic, g);
+ }
+
renumber_vertices(g);
renumber_edges(g);
clearMinLengthParam(g, rm);
- clearReports(g);
- return true;
-}
-
-static
+ clearReports(g);
+ return true;
+}
+
+static
bool hasExtParams(const ExpressionInfo &expr) {
if (expr.min_length != 0) {
- return true;
- }
+ return true;
+ }
if (expr.min_offset != 0) {
- return true;
- }
+ return true;
+ }
if (expr.max_offset != MAX_OFFSET) {
- return true;
- }
- return false;
-}
-
-static
-const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
- if (d.toAccept.max.is_unreachable()) {
- return d.toAcceptEod.max;
- } else if (d.toAcceptEod.max.is_unreachable()) {
- return d.toAccept.max;
- }
- return max(d.toAccept.max, d.toAcceptEod.max);
-}
-
-static
-const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
- return min(d.fromStartDotStar.min, d.fromStart.min);
-}
-
-static
-const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
- return min(d.toAccept.min, d.toAcceptEod.min);
-}
-
-static
+ return true;
+ }
+ return false;
+}
+
+static
+const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
+ if (d.toAccept.max.is_unreachable()) {
+ return d.toAcceptEod.max;
+ } else if (d.toAcceptEod.max.is_unreachable()) {
+ return d.toAccept.max;
+ }
+ return max(d.toAccept.max, d.toAcceptEod.max);
+}
+
+static
+const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
+ return min(d.fromStartDotStar.min, d.fromStart.min);
+}
+
+static
+const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
+ return min(d.toAccept.min, d.toAcceptEod.min);
+}
+
+static
bool isEdgePrunable(const NGHolder &g, const Report &report,
- const vector<NFAVertexBidiDepth> &depths,
- const NFAEdge &e) {
- const NFAVertex u = source(e, g);
- const NFAVertex v = target(e, g);
-
+ const vector<NFAVertexBidiDepth> &depths,
+ const NFAEdge &e) {
+ const NFAVertex u = source(e, g);
+ const NFAVertex v = target(e, g);
+
DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index);
-
- // Leave our special-to-special edges alone.
- if (is_special(u, g) && is_special(v, g)) {
- DEBUG_PRINTF("ignoring special-to-special\n");
- return false;
- }
-
- // We must be careful around start: we don't want to remove (start, v) if
- // (startDs, v) exists as well, since later code will assume the presence
- // of both edges, but other cases are OK.
- if (u == g.start && edge(g.startDs, v, g).second) {
- DEBUG_PRINTF("ignoring unanchored start edge\n");
- return false;
- }
-
- u32 u_idx = g[u].index;
- u32 v_idx = g[v].index;
- assert(u_idx < depths.size() && v_idx < depths.size());
-
- const NFAVertexBidiDepth &du = depths.at(u_idx);
- const NFAVertexBidiDepth &dv = depths.at(v_idx);
-
+
+ // Leave our special-to-special edges alone.
+ if (is_special(u, g) && is_special(v, g)) {
+ DEBUG_PRINTF("ignoring special-to-special\n");
+ return false;
+ }
+
+ // We must be careful around start: we don't want to remove (start, v) if
+ // (startDs, v) exists as well, since later code will assume the presence
+ // of both edges, but other cases are OK.
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ DEBUG_PRINTF("ignoring unanchored start edge\n");
+ return false;
+ }
+
+ u32 u_idx = g[u].index;
+ u32 v_idx = g[v].index;
+ assert(u_idx < depths.size() && v_idx < depths.size());
+
+ const NFAVertexBidiDepth &du = depths.at(u_idx);
+ const NFAVertexBidiDepth &dv = depths.at(v_idx);
+
if (report.minOffset) {
depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv);
if (max_offset.is_finite() && max_offset < report.minOffset) {
- DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
- return true;
- }
- }
-
+ DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
+ return true;
+ }
+ }
+
if (report.maxOffset != MAX_OFFSET) {
- depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
- assert(min_offset.is_finite());
-
+ depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
+ assert(min_offset.is_finite());
+
if (min_offset > report.maxOffset) {
- DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
- return true;
- }
- }
-
+ DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
+ return true;
+ }
+ }
+
if (report.minLength && is_any_accept(v, g)) {
- // Simple take on min_length. If we're an edge to accept and our max
- // dist from start is too small, we can be pruned.
+ // Simple take on min_length. If we're an edge to accept and our max
+ // dist from start is too small, we can be pruned.
const depth &width = maxDistFromInit(du);
if (width.is_finite() && width < report.minLength) {
- DEBUG_PRINTF("max width %s from start too small for min_length\n",
- width.str().c_str());
- return true;
- }
- }
-
- return false;
-}
-
-static
+ DEBUG_PRINTF("max width %s from start too small for min_length\n",
+ width.str().c_str());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
const auto &reports = all_reports(g);
if (reports.empty()) {
return;
}
-
+
if (!hasSameBounds(reports, rm)) {
DEBUG_PRINTF("report bounds vary\n");
return;
@@ -727,32 +727,32 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
auto depths = calcBidiDepths(g);
- vector<NFAEdge> dead;
-
- for (const auto &e : edges_range(g)) {
+ vector<NFAEdge> dead;
+
+ for (const auto &e : edges_range(g)) {
if (isEdgePrunable(g, report, depths, e)) {
- DEBUG_PRINTF("pruning\n");
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUseless(g);
+ DEBUG_PRINTF("pruning\n");
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUseless(g);
clearReports(g);
-}
-
+}
+
/**
* Remove vacuous edges in graphs where the min_offset or min_length
* constraints dictate that they can never produce a match.
*/
-static
+static
void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
- vector<NFAEdge> dead;
-
+ vector<NFAEdge> dead;
+
auto has_min_offset = [&](NFAVertex v) {
assert(!g[v].reports.empty()); // must be reporter
return all_of_in(g[v].reports, [&](ReportID id) {
@@ -767,157 +767,157 @@ void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
});
};
- for (const auto &e : edges_range(g)) {
- const NFAVertex u = source(e, g);
- const NFAVertex v = target(e, g);
-
+ for (const auto &e : edges_range(g)) {
+ const NFAVertex u = source(e, g);
+ const NFAVertex v = target(e, g);
+
// Special case: Crudely remove vacuous edges from start in graphs with
// a min_offset.
if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) {
- DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
- dead.push_back(e);
- continue;
- }
-
- // If a min_length is set, vacuous edges can be removed.
+ DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
+ dead.push_back(e);
+ continue;
+ }
+
+ // If a min_length is set, vacuous edges can be removed.
if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) {
- DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
- dead.push_back(e);
- continue;
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
+ DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
+ dead.push_back(e);
+ continue;
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
+ remove_edges(dead, g);
+ pruneUseless(g);
clearReports(g);
-}
-
-static
+}
+
+static
void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths,
- const ReportManager &rm, NFAVertex accept) {
- vector<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(accept, g)) {
- NFAVertex v = source(e, g);
- if (v == g.accept) {
- assert(accept == g.acceptEod); // stylised edge
- continue;
- }
-
+ const ReportManager &rm, NFAVertex accept) {
+ vector<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(accept, g)) {
+ NFAVertex v = source(e, g);
+ if (v == g.accept) {
+ assert(accept == g.acceptEod); // stylised edge
+ continue;
+ }
+
if (!hasSameBounds(g[v].reports, rm)) {
continue;
}
const auto &report = rm.getReport(*g[v].reports.begin());
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
-
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+
if (d.max.is_finite() && d.max < report.minLength) {
- DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
+ DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
d.max.str().c_str(), report.minLength);
- dead.push_back(e);
- continue;
- }
-
+ dead.push_back(e);
+ continue;
+ }
+
if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) {
- DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
+ DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
d.min.str().c_str(), report.maxOffset);
- dead.push_back(e);
- continue;
- }
- }
-
- remove_edges(dead, g);
-}
-
+ dead.push_back(e);
+ continue;
+ }
+ }
+
+ remove_edges(dead, g);
+}
+
/**
* Remove edges to accepts that can never produce a match long enough to
* satisfy our min_length and max_offset constraints.
*/
-static
+static
void pruneUnmatchable(NGHolder &g, const ReportManager &rm) {
if (!any_of_in(all_reports(g), [&](ReportID id) {
return rm.getReport(id).minLength > 0;
})) {
- return;
- }
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- pruneUnmatchable(g, depths, rm, g.accept);
- pruneUnmatchable(g, depths, rm, g.acceptEod);
-
- pruneUseless(g);
+ return;
+ }
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ pruneUnmatchable(g, depths, rm, g.accept);
+ pruneUnmatchable(g, depths, rm, g.acceptEod);
+
+ pruneUseless(g);
clearReports(g);
-}
-
-static
-bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
+}
+
+static
+bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
return any_of_in(all_reports(g), [&rm](ReportID id) {
return rm.getReport(id).offsetAdjust != 0;
});
-}
-
+}
+
void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
ReportManager &rm) {
if (!hasExtParams(expr)) {
- return;
- }
-
- depth minWidth = findMinWidth(g);
- depth maxWidth = findMaxWidth(g);
- bool is_anchored = !has_proper_successor(g.startDs, g)
- && out_degree(g.start, g);
-
- DepthMinMax match_depths = findMatchLengths(rm, g);
- DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
-
+ return;
+ }
+
+ depth minWidth = findMinWidth(g);
+ depth maxWidth = findMaxWidth(g);
+ bool is_anchored = !has_proper_successor(g.startDs, g)
+ && out_degree(g.start, g);
+
+ DepthMinMax match_depths = findMatchLengths(rm, g);
+ DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
+
if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
- ostringstream oss;
- oss << "Expression is anchored and cannot satisfy min_offset="
+ ostringstream oss;
+ oss << "Expression is anchored and cannot satisfy min_offset="
<< expr.min_offset << " as it can only produce matches of length "
- << maxWidth << " bytes at most.";
+ << maxWidth << " bytes at most.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (minWidth > expr.max_offset) {
- ostringstream oss;
+ ostringstream oss;
oss << "Expression has max_offset=" << expr.max_offset
<< " but requires " << minWidth << " bytes to match.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
- ostringstream oss;
+ ostringstream oss;
oss << "Expression has min_length=" << expr.min_length << " but can "
- "only produce matches of length " << match_depths.max <<
- " bytes at most.";
+ "only produce matches of length " << match_depths.max <<
+ " bytes at most.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (expr.min_length && expr.min_length <= match_depths.min) {
- DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
+ DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
expr.min_length);
expr.min_length = 0;
- }
-
+ }
+
if (!hasExtParams(expr)) {
- return;
- }
-
+ return;
+ }
+
updateReportBounds(rm, g, expr);
}
-
+
/**
* If the pattern is completely anchored and has a min_length set, this can
* be converted to a min_offset.
@@ -926,8 +926,8 @@ static
void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) {
if (has_proper_successor(g.startDs, g)) {
return; // not wholly anchored
- }
-
+ }
+
replaceReports(g, [&rm](NFAVertex, ReportID id) {
const auto &report = rm.getReport(id);
if (report.minLength) {
@@ -984,52 +984,52 @@ void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) {
[&](ReportID id) { return rm.getReport(id).hasBounds(); })) {
DEBUG_PRINTF("no extparam bounds\n");
return;
- }
-
+ }
+
DEBUG_PRINTF("graph has extparam bounds\n");
-
+
pruneVacuousEdges(g, rm);
if (can_never_match(g)) {
return;
}
-
+
pruneUnmatchable(g, rm);
if (can_never_match(g)) {
return;
- }
-
+ }
+
if (!hasOffsetAdjustments(rm, g)) {
pruneExtUnreachable(g, rm);
if (can_never_match(g)) {
return;
}
- }
-
+ }
+
replaceMinLengthWithOffset(g, rm);
if (can_never_match(g)) {
- return;
- }
-
- // If the pattern has a min_length and is of "ratchet" form with one
- // unbounded repeat, that repeat can become a bounded repeat.
- // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
+ return;
+ }
+
+ // If the pattern has a min_length and is of "ratchet" form with one
+ // unbounded repeat, that repeat can become a bounded repeat.
+ // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
transformMinLengthToRepeat(g, rm);
if (can_never_match(g)) {
return;
- }
-
- // If the pattern is unanchored, has a max_offset and has not asked for
- // SOM, we can use that knowledge to anchor it which will limit its
- // lifespan. Note that we can't use this transformation if there's a
- // min_length, as it's currently handled using "sly SOM".
+ }
+
+ // If the pattern is unanchored, has a max_offset and has not asked for
+ // SOM, we can use that knowledge to anchor it which will limit its
+ // lifespan. Note that we can't use this transformation if there's a
+ // min_length, as it's currently handled using "sly SOM".
if (som == SOM_NONE) {
anchorPatternWithBoundedRepeat(g, rm);
if (can_never_match(g)) {
return;
- }
- }
-
+ }
+ }
+
removeUnneededOffsetBounds(g, rm);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
index ae818075c0..43543b1255 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Propagate extended parameters to vertex reports and reduce graph if
- * possible.
- */
-
-#ifndef NG_EXTPARAM_H
-#define NG_EXTPARAM_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Propagate extended parameters to vertex reports and reduce graph if
+ * possible.
+ */
+
+#ifndef NG_EXTPARAM_H
+#define NG_EXTPARAM_H
+
#include "som/som.h"
-namespace ue2 {
-
+namespace ue2 {
+
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
+class ReportManager;
+
/**
* \brief Propagate extended parameter information to vertex reports. Will
* throw CompileError if this expression's extended parameters are not
@@ -52,13 +52,13 @@ class ReportManager;
*/
void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
ReportManager &rm);
-
+
/**
* \brief Perform graph reductions (if possible) to do with extended parameter
* constraints on reports.
*/
void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
index 8fb264d8a9..01fb0090c6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
@@ -1,142 +1,142 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose mask construction from NGHolder.
- */
-#include "ng_fixed_width.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "rose/rose_build.h"
-#include "util/container.h"
-#include "ue2common.h"
-
-#include <algorithm>
-#include <iterator>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose mask construction from NGHolder.
+ */
+#include "ng_fixed_width.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "rose/rose_build.h"
+#include "util/container.h"
+#include "ue2common.h"
+
+#include <algorithm>
+#include <iterator>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
flat_set<ReportID> *reports) {
- DEBUG_PRINTF("looking for a mask pattern\n");
- set<NFAVertex> s_succ;
- insert(&s_succ, adjacent_vertices(g.start, g));
-
- set<NFAVertex> sds_succ;
- insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
- *anchored = sds_succ.size() == 1; /* sds itself */
- bool floating = is_subset_of(s_succ, sds_succ);
-
- DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(),
- *anchored ? " anchored" : "", floating ? " floating" : "");
- if (!*anchored && !floating) {
- DEBUG_PRINTF("semi-anchored\n");
- return false;
- }
-
- set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
- succs.erase(g.startDs);
- if (succs.size() != 1) {
- DEBUG_PRINTF("branchy root\n");
- return false;
- }
-
- NFAVertex u = *anchored ? g.start : g.startDs;
- NFAVertex v = *succs.begin();
-
- while (true) {
+ DEBUG_PRINTF("looking for a mask pattern\n");
+ set<NFAVertex> s_succ;
+ insert(&s_succ, adjacent_vertices(g.start, g));
+
+ set<NFAVertex> sds_succ;
+ insert(&sds_succ, adjacent_vertices(g.startDs, g));
+
+ *anchored = sds_succ.size() == 1; /* sds itself */
+ bool floating = is_subset_of(s_succ, sds_succ);
+
+ DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(),
+ *anchored ? " anchored" : "", floating ? " floating" : "");
+ if (!*anchored && !floating) {
+ DEBUG_PRINTF("semi-anchored\n");
+ return false;
+ }
+
+ set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
+ succs.erase(g.startDs);
+ if (succs.size() != 1) {
+ DEBUG_PRINTF("branchy root\n");
+ return false;
+ }
+
+ NFAVertex u = *anchored ? g.start : g.startDs;
+ NFAVertex v = *succs.begin();
+
+ while (true) {
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
-
- assert(v != g.acceptEod);
-
- // If we've reached an accept, we MAY have found a valid Rose pattern
- if (v == g.accept) {
- DEBUG_PRINTF("accept\n");
- insert(reports, g[u].reports);
- return true;
- }
-
- mask->push_back(g[v].char_reach);
-
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("out_degree != 1\n");
- return false; /* not a chain */
- }
-
- u = v;
- v = *adjacent_vertices(v, g).first;
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
- * to case accept (large cycles),
- * ensures term */
- return false;
- }
- }
-}
-
-bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
- if (!grey.roseMasks) {
- return false;
- }
-
- if (in_degree(g.acceptEod,g) != 1) {
- DEBUG_PRINTF("EOD anchoring not supported\n");
- return false;
- }
-
+
+ assert(v != g.acceptEod);
+
+ // If we've reached an accept, we MAY have found a valid Rose pattern
+ if (v == g.accept) {
+ DEBUG_PRINTF("accept\n");
+ insert(reports, g[u].reports);
+ return true;
+ }
+
+ mask->push_back(g[v].char_reach);
+
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("out_degree != 1\n");
+ return false; /* not a chain */
+ }
+
+ u = v;
+ v = *adjacent_vertices(v, g).first;
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
+ * to case accept (large cycles),
+ * ensures term */
+ return false;
+ }
+ }
+}
+
+bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
+ if (!grey.roseMasks) {
+ return false;
+ }
+
+ if (in_degree(g.acceptEod,g) != 1) {
+ DEBUG_PRINTF("EOD anchoring not supported\n");
+ return false;
+ }
+
flat_set<ReportID> reports;
- bool anchored = false;
- vector<CharReach> mask;
-
- if (!findMask(g, &mask, &anchored, &reports)) {
- return false;
- }
-
- DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : "");
-
- assert(!mask.empty());
- assert(!reports.empty());
-
- if (rose.add(anchored, mask, reports)) {
- DEBUG_PRINTF("added as rose mask\n");
- return true;
- } else {
- DEBUG_PRINTF("failed to add masky\n");
- return false;
- }
-}
-
-} // namespace ue2
+ bool anchored = false;
+ vector<CharReach> mask;
+
+ if (!findMask(g, &mask, &anchored, &reports)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : "");
+
+ assert(!mask.empty());
+ assert(!reports.empty());
+
+ if (rose.add(anchored, mask, reports)) {
+ DEBUG_PRINTF("added as rose mask\n");
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add masky\n");
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
index d8286742cd..7a2d0fff3b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose mask construction from NGHolder.
- */
-
-#ifndef NG_FIXED_WIDTH_H
-#define NG_FIXED_WIDTH_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct Grey;
-
-bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey);
-
-} // namespace ue2
-
-#endif // NG_FIXED_WIDTH_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose mask construction from NGHolder.
+ */
+
+#ifndef NG_FIXED_WIDTH_H
+#define NG_FIXED_WIDTH_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct Grey;
+
+bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey);
+
+} // namespace ue2
+
+#endif // NG_FIXED_WIDTH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
index 8054544772..f6594616c4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
@@ -1,124 +1,124 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for Haig SOM DFA.
- */
-#include "ng_haig.h"
-
-#include "grey.h"
-#include "nfa/goughcompile.h"
-#include "ng_holder.h"
-#include "ng_mcclellan_internal.h"
-#include "ng_som_util.h"
-#include "ng_squash.h"
-#include "util/bitfield.h"
-#include "util/container.h"
-#include "util/determinise.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for Haig SOM DFA.
+ */
+#include "ng_haig.h"
+
+#include "grey.h"
+#include "nfa/goughcompile.h"
+#include "ng_holder.h"
+#include "ng_mcclellan_internal.h"
+#include "ng_som_util.h"
+#include "ng_squash.h"
+#include "util/bitfield.h"
+#include "util/container.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
#include "util/graph.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/hash_dynamic_bitset.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include <vector>
-#include <boost/dynamic_bitset.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-#define NFA_STATE_LIMIT 256
-
-#define HAIG_MAX_NFA_STATE 600
-#define HAIG_MAX_LIVE_SOM_SLOTS 32
-
-namespace {
-struct haig_too_wide {
-};
-
-template<typename stateset>
-static
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <vector>
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+#define NFA_STATE_LIMIT 256
+
+#define HAIG_MAX_NFA_STATE 600
+#define HAIG_MAX_LIVE_SOM_SLOTS 32
+
+namespace {
+struct haig_too_wide {
+};
+
+template<typename stateset>
+static
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
- stateset *init, stateset *initDS,
- vector<NFAVertex> *v_by_index) {
+ stateset *init, stateset *initDS,
+ vector<NFAVertex> *v_by_index) {
DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str());
- for (auto v : vertices_range(g)) {
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
+ continue;
+ }
u32 v_index = g[v].index;
- if (is_any_start(v, g)) {
- init->set(v_index);
- if (hasSelfLoop(v, g) || is_triggered(g)) {
- DEBUG_PRINTF("setting %u\n", v_index);
- initDS->set(v_index);
- }
- }
- assert(v_index < init->size());
- }
-
- v_by_index->clear();
+ if (is_any_start(v, g)) {
+ init->set(v_index);
+ if (hasSelfLoop(v, g) || is_triggered(g)) {
+ DEBUG_PRINTF("setting %u\n", v_index);
+ initDS->set(v_index);
+ }
+ }
+ assert(v_index < init->size());
+ }
+
+ v_by_index->clear();
v_by_index->resize(num_vertices(g), NGHolder::null_vertex());
-
- for (auto v : vertices_range(g)) {
- u32 v_index = g[v].index;
+
+ for (auto v : vertices_range(g)) {
+ u32 v_index = g[v].index;
assert((*v_by_index)[v_index] == NGHolder::null_vertex());
- (*v_by_index)[v_index] = v;
- }
-}
-
-template<typename StateSet>
-void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- accept->set(g[v].index);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- acceptEod->set(g[v].index);
- }
-}
-
+ (*v_by_index)[v_index] = v;
+ }
+}
+
+template<typename StateSet>
+void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ accept->set(g[v].index);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ acceptEod->set(g[v].index);
+ }
+}
+
template<typename Automaton_Traits>
-class Automaton_Base {
+class Automaton_Base {
public:
using StateSet = typename Automaton_Traits::StateSet;
using StateMap = typename Automaton_Traits::StateMap;
-protected:
+protected:
Automaton_Base(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers,
bool unordered_som)
@@ -131,50 +131,50 @@ protected:
acceptEod(Automaton_Traits::init_states(numStates)),
toppable(Automaton_Traits::init_states(numStates)),
dead(Automaton_Traits::init_states(numStates)) {
- calculateAlphabet(graph, alpha, unalpha, &alphasize);
- assert(alphasize <= ALPHABET_SIZE);
-
+ calculateAlphabet(graph, alpha, unalpha, &alphasize);
+ assert(alphasize <= ALPHABET_SIZE);
+
populateInit(graph, unused, &init, &initDS, &v_by_index);
- populateAccepts(graph, &accept, &acceptEod);
-
- start_anchored = DEAD_STATE + 1;
- if (initDS == init) {
- start_floating = start_anchored;
- } else if (initDS.any()) {
- start_floating = start_anchored + 1;
- } else {
- start_floating = DEAD_STATE;
- }
-
+ populateAccepts(graph, &accept, &acceptEod);
+
+ start_anchored = DEAD_STATE + 1;
+ if (initDS == init) {
+ start_floating = start_anchored;
+ } else if (initDS.any()) {
+ start_floating = start_anchored + 1;
+ } else {
+ start_floating = DEAD_STATE;
+ }
+
cr_by_index = populateCR(graph, v_by_index, alpha);
- if (!unordered_som) {
- for (const auto &sq : findSquashers(graph, som)) {
- NFAVertex v = sq.first;
- u32 vert_id = graph[v].index;
- squash.set(vert_id);
- squash_mask[vert_id] = shrinkStateSet(sq.second);
- }
- }
-
- if (is_triggered(graph)) {
+ if (!unordered_som) {
+ for (const auto &sq : findSquashers(graph, som)) {
+ NFAVertex v = sq.first;
+ u32 vert_id = graph[v].index;
+ squash.set(vert_id);
+ squash_mask[vert_id] = shrinkStateSet(sq.second);
+ }
+ }
+
+ if (is_triggered(graph)) {
dynamic_bitset<> temp(numStates);
markToppableStarts(graph, unused, false, triggers, &temp);
toppable = Automaton_Traits::copy_states(temp, numStates);
- }
- }
-
-private:
- // Convert an NFAStateSet (as used by the squash code) into a StateSet.
- StateSet shrinkStateSet(const NFAStateSet &in) const {
+ }
+ }
+
+private:
+ // Convert an NFAStateSet (as used by the squash code) into a StateSet.
+ StateSet shrinkStateSet(const NFAStateSet &in) const {
StateSet out = Automaton_Traits::init_states(numStates);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
-
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
+
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
StateSet acc = in & (eod ? acceptEod : accept);
for (size_t i = acc.find_first(); i != StateSet::npos;
@@ -186,27 +186,27 @@ private:
}
}
-public:
- void transition(const StateSet &in, StateSet *next) {
- transition_graph(*this, v_by_index, in, next);
- }
-
- const vector<StateSet> initial() {
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ transition_graph(*this, v_by_index, in, next);
+ }
+
+ const vector<StateSet> initial() {
vector<StateSet> rv = {init};
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(initDS);
- }
- return rv;
- }
-
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, false, rv);
- }
-
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, true, rv);
- }
-
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(initDS);
+ }
+ return rv;
+ }
+
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, false, rv);
+ }
+
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, true, rv);
+ }
+
static bool canPrune(const flat_set<ReportID> &) { return false; }
const NGHolder &graph;
@@ -223,40 +223,40 @@ public:
u16 start_anchored;
u16 start_floating;
- vector<NFAVertex> v_by_index;
- vector<CharReach> cr_by_index; /* pre alpha'ed */
- StateSet init;
- StateSet initDS;
- StateSet squash; /* states which allow us to mask out other states */
- StateSet accept;
- StateSet acceptEod;
- StateSet toppable; /* states which are allowed to be on when a top arrives,
- * triggered dfas only */
- map<u32, StateSet> squash_mask;
- StateSet dead;
-};
-
+ vector<NFAVertex> v_by_index;
+ vector<CharReach> cr_by_index; /* pre alpha'ed */
+ StateSet init;
+ StateSet initDS;
+ StateSet squash; /* states which allow us to mask out other states */
+ StateSet accept;
+ StateSet acceptEod;
+ StateSet toppable; /* states which are allowed to be on when a top arrives,
+ * triggered dfas only */
+ map<u32, StateSet> squash_mask;
+ StateSet dead;
+};
+
struct Big_Traits {
using StateSet = dynamic_bitset<>;
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
-
+
static StateSet init_states(u32 num) {
return StateSet(num);
}
-
+
static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) {
assert(in.size() == num);
return in;
}
};
-
+
class Automaton_Big : public Automaton_Base<Big_Traits> {
public:
Automaton_Big(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som)
: Automaton_Base(graph_in, som, triggers, unordered_som) {}
};
-
+
struct Graph_Traits {
using StateSet = bitfield<NFA_STATE_LIMIT>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
@@ -264,520 +264,520 @@ struct Graph_Traits {
static StateSet init_states(UNUSED u32 num) {
assert(num <= NFA_STATE_LIMIT);
return StateSet();
- }
-
+ }
+
static StateSet copy_states(const dynamic_bitset<> &in, u32 num) {
StateSet out = init_states(num);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
};
-
+
class Automaton_Graph : public Automaton_Base<Graph_Traits> {
-public:
+public:
Automaton_Graph(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers,
bool unordered_som)
: Automaton_Base(graph_in, som, triggers, unordered_som) {}
-};
-
-class Automaton_Haig_Merge {
-public:
+};
+
+class Automaton_Haig_Merge {
+public:
using StateSet = vector<u16>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
-
- explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
- : nfas(in.begin(), in.end()), dead(in.size()) {
- calculateAlphabet();
- populateAsFs();
- }
-
- void populateAsFs(void) {
- bool fs_same = true;
- bool fs_dead = true;
-
- as.resize(nfas.size());
- fs.resize(nfas.size());
- for (u32 i = 0; i < nfas.size(); i++) {
- as[i] = nfas[i]->start_anchored;
- fs[i] = nfas[i]->start_floating;
-
- if (fs[i]) {
- fs_dead = false;
- }
-
- if (as[i] != fs[i]) {
- fs_same = false;
- }
- }
-
- start_anchored = DEAD_STATE + 1;
- if (fs_same) {
- start_floating = start_anchored;
- } else if (fs_dead) {
- start_floating = DEAD_STATE;
- } else {
- start_floating = start_anchored + 1;
- }
- }
-
- void calculateAlphabet(void) {
- DEBUG_PRINTF("calculating alphabet\n");
- vector<CharReach> esets(1, CharReach::dot());
-
- for (const auto &haig : nfas) {
- DEBUG_PRINTF("...next dfa alphabet\n");
- assert(haig);
- const auto &alpha_remap = haig->alpha_remap;
-
- for (size_t i = 0; i < esets.size(); i++) {
- assert(esets[i].any());
- if (esets[i].count() == 1) {
- DEBUG_PRINTF("skipping singleton eq set\n");
- continue;
- }
-
- CharReach t;
- u8 leader_s = alpha_remap[esets[i].find_first()];
-
- DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
-
- for (size_t s = esets[i].find_first();
- s != CharReach::npos; s = esets[i].find_next(s)) {
- if (alpha_remap[s] != leader_s) {
- t.set(s);
- }
- }
-
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
-
- alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
- }
-
- void transition(const StateSet &in, StateSet *next) {
- u16 t[ALPHABET_SIZE];
-
- for (u32 i = 0; i < alphasize; i++) {
- next[i].resize(nfas.size());
- }
-
- for (u32 j = 0; j < nfas.size(); j++) {
- getFullTransitionFromState(*nfas[j], in[j], t);
- for (u32 i = 0; i < alphasize; i++) {
- next[i][j]= t[unalpha[i]];
- }
- }
- }
-
- const vector<StateSet> initial() {
- vector<StateSet> rv(1, as);
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(fs);
- }
- return rv;
- }
-
-private:
- void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
- flat_set<ReportID> &r) {
- for (u32 i = 0; i < nfas.size(); i++) {
- const auto &rs = nfas[i]->states[in[i]].*r_set;
- insert(&r, rs);
- }
- }
-
-public:
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, &dstate::reports, rv);
- }
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, &dstate::reports_eod, rv);
- }
-
- static bool canPrune(const flat_set<ReportID> &) { return false; }
-
-private:
- vector<const raw_som_dfa *> nfas;
- vector<dstate_id_t> as;
- vector<dstate_id_t> fs;
-public:
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
- StateSet dead;
-
- u16 start_anchored;
- u16 start_floating;
-};
-}
-
-enum bslm_mode {
- ONLY_EXISTING,
- INCLUDE_INVALID
-};
-
-static
-bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
- return is_virtual_start(v, g) || is_any_start(v, g);
-}
-
-static
+
+ explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
+ : nfas(in.begin(), in.end()), dead(in.size()) {
+ calculateAlphabet();
+ populateAsFs();
+ }
+
+ void populateAsFs(void) {
+ bool fs_same = true;
+ bool fs_dead = true;
+
+ as.resize(nfas.size());
+ fs.resize(nfas.size());
+ for (u32 i = 0; i < nfas.size(); i++) {
+ as[i] = nfas[i]->start_anchored;
+ fs[i] = nfas[i]->start_floating;
+
+ if (fs[i]) {
+ fs_dead = false;
+ }
+
+ if (as[i] != fs[i]) {
+ fs_same = false;
+ }
+ }
+
+ start_anchored = DEAD_STATE + 1;
+ if (fs_same) {
+ start_floating = start_anchored;
+ } else if (fs_dead) {
+ start_floating = DEAD_STATE;
+ } else {
+ start_floating = start_anchored + 1;
+ }
+ }
+
+ void calculateAlphabet(void) {
+ DEBUG_PRINTF("calculating alphabet\n");
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (const auto &haig : nfas) {
+ DEBUG_PRINTF("...next dfa alphabet\n");
+ assert(haig);
+ const auto &alpha_remap = haig->alpha_remap;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ assert(esets[i].any());
+ if (esets[i].count() == 1) {
+ DEBUG_PRINTF("skipping singleton eq set\n");
+ continue;
+ }
+
+ CharReach t;
+ u8 leader_s = alpha_remap[esets[i].find_first()];
+
+ DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
+
+ for (size_t s = esets[i].find_first();
+ s != CharReach::npos; s = esets[i].find_next(s)) {
+ if (alpha_remap[s] != leader_s) {
+ t.set(s);
+ }
+ }
+
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+
+ alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+ }
+
+ void transition(const StateSet &in, StateSet *next) {
+ u16 t[ALPHABET_SIZE];
+
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i].resize(nfas.size());
+ }
+
+ for (u32 j = 0; j < nfas.size(); j++) {
+ getFullTransitionFromState(*nfas[j], in[j], t);
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i][j]= t[unalpha[i]];
+ }
+ }
+ }
+
+ const vector<StateSet> initial() {
+ vector<StateSet> rv(1, as);
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(fs);
+ }
+ return rv;
+ }
+
+private:
+ void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
+ flat_set<ReportID> &r) {
+ for (u32 i = 0; i < nfas.size(); i++) {
+ const auto &rs = nfas[i]->states[in[i]].*r_set;
+ insert(&r, rs);
+ }
+ }
+
+public:
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, &dstate::reports, rv);
+ }
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, &dstate::reports_eod, rv);
+ }
+
+ static bool canPrune(const flat_set<ReportID> &) { return false; }
+
+private:
+ vector<const raw_som_dfa *> nfas;
+ vector<dstate_id_t> as;
+ vector<dstate_id_t> fs;
+public:
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+ StateSet dead;
+
+ u16 start_anchored;
+ u16 start_floating;
+};
+}
+
+enum bslm_mode {
+ ONLY_EXISTING,
+ INCLUDE_INVALID
+};
+
+static
+bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
+ return is_virtual_start(v, g) || is_any_start(v, g);
+}
+
+static
s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused,
- NFAVertex v) {
- if (is_triggered(g) && v == g.start) {
+ NFAVertex v) {
+ if (is_triggered(g) && v == g.start) {
assert(!contains(unused, v));
- } else if (is_any_start_inc_virtual(v, g)) {
- return CREATE_NEW_SOM;
- }
-
- return g[v].index;
-}
-
-template<typename stateset>
-static
-void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
- const vector<NFAVertex> &state_mapping,
- som_tran_info &preds) {
- for (size_t i = nfa_states.find_first(); i != stateset::npos;
- i = nfa_states.find_next(i)) {
- NFAVertex v = state_mapping[i];
- s32 slot_id = g[v].index;
-
+ } else if (is_any_start_inc_virtual(v, g)) {
+ return CREATE_NEW_SOM;
+ }
+
+ return g[v].index;
+}
+
+template<typename stateset>
+static
+void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
+ const vector<NFAVertex> &state_mapping,
+ som_tran_info &preds) {
+ for (size_t i = nfa_states.find_first(); i != stateset::npos;
+ i = nfa_states.find_next(i)) {
+ NFAVertex v = state_mapping[i];
+ s32 slot_id = g[v].index;
+
DEBUG_PRINTF("d vertex %zu\n", g[v].index);
- vector<u32> &out_map = preds[slot_id];
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- out_map.push_back(g[u].index);
- }
-
- sort(out_map.begin(), out_map.end());
- assert(!out_map.empty() || v == g.start);
- }
-}
-
-template<typename stateset>
-static
+ vector<u32> &out_map = preds[slot_id];
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ out_map.push_back(g[u].index);
+ }
+
+ sort(out_map.begin(), out_map.end());
+ assert(!out_map.empty() || v == g.start);
+ }
+}
+
+template<typename stateset>
+static
void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused,
- NFAVertex accept_v, const stateset &source_nfa_states,
- const vector<NFAVertex> &state_mapping,
- set<som_report> &out) {
- for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
- i = source_nfa_states.find_next(i)) {
- NFAVertex v = state_mapping[i];
- if (!edge(v, accept_v, g).second) {
- continue;
- }
- for (ReportID report_id : g[v].reports) {
+ NFAVertex accept_v, const stateset &source_nfa_states,
+ const vector<NFAVertex> &state_mapping,
+ set<som_report> &out) {
+ for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
+ i = source_nfa_states.find_next(i)) {
+ NFAVertex v = state_mapping[i];
+ if (!edge(v, accept_v, g).second) {
+ continue;
+ }
+ for (ReportID report_id : g[v].reports) {
out.insert(som_report(report_id, getSlotID(g, unused, v)));
- }
- }
-}
-
-static
-void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
- if (is_triggered(g)) {
- return;
- }
-
- DEBUG_PRINTF("seeing who creates new som values\n");
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- for (auto v : vertices_range(g)) {
- if (is_any_start_inc_virtual(v, g)) {
+ }
+ }
+}
+
+static
+void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
+ if (is_triggered(g)) {
+ return;
+ }
+
+ DEBUG_PRINTF("seeing who creates new som values\n");
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ for (auto v : vertices_range(g)) {
+ if (is_any_start_inc_virtual(v, g)) {
DEBUG_PRINTF("%zu creates new som value\n", g[v].index);
- out->emplace(g[v].index, 0U);
- continue;
- }
-
- if (is_any_accept(v, g)) {
- continue;
- }
-
- const DepthMinMax &d = depths[g[v].index];
- if (d.min == d.max && d.min.is_finite()) {
+ out->emplace(g[v].index, 0U);
+ continue;
+ }
+
+ if (is_any_accept(v, g)) {
+ continue;
+ }
+
+ const DepthMinMax &d = depths[g[v].index];
+ if (d.min == d.max && d.min.is_finite()) {
DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min);
- out->emplace(g[v].index, d.min);
- }
- }
-}
-
-template<class Auto>
-static
+ out->emplace(g[v].index, d.min);
+ }
+ }
+}
+
+template<class Auto>
+static
bool doHaig(const NGHolder &g, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som,
raw_som_dfa *rdfa) {
- u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
- a fight */
+ u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
+ a fight */
using StateSet = typename Auto::StateSet;
- vector<StateSet> nfa_state_map;
+ vector<StateSet> nfa_state_map;
Auto n(g, som, triggers, unordered_som);
- try {
+ try {
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return false;
- }
- } catch (haig_too_wide &) {
- DEBUG_PRINTF("too many live som states\n");
- return false;
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
-
- rdfa->state_som.reserve(rdfa->states.size());
- for (u32 i = 0; i < rdfa->states.size(); i++) {
- rdfa->state_som.push_back(dstate_som());
- const StateSet &source_states = nfa_state_map[i];
- if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
- DEBUG_PRINTF("too many live states\n");
- return false;
- }
-
- DEBUG_PRINTF("generating som info for %u\n", i);
-
- haig_do_preds(g, source_states, n.v_by_index,
- rdfa->state_som.back().preds);
-
+ DEBUG_PRINTF("state limit exceeded\n");
+ return false;
+ }
+ } catch (haig_too_wide &) {
+ DEBUG_PRINTF("too many live som states\n");
+ return false;
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+
+ rdfa->state_som.reserve(rdfa->states.size());
+ for (u32 i = 0; i < rdfa->states.size(); i++) {
+ rdfa->state_som.push_back(dstate_som());
+ const StateSet &source_states = nfa_state_map[i];
+ if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
+ DEBUG_PRINTF("too many live states\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("generating som info for %u\n", i);
+
+ haig_do_preds(g, source_states, n.v_by_index,
+ rdfa->state_som.back().preds);
+
haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index,
- rdfa->state_som.back().reports);
+ rdfa->state_som.back().reports);
haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index,
- rdfa->state_som.back().reports_eod);
- }
-
- haig_note_starts(g, &rdfa->new_som_nfa_states);
-
- return true;
-}
-
+ rdfa->state_som.back().reports_eod);
+ }
+
+ haig_note_starts(g, &rdfa->new_som_nfa_states);
+
+ return true;
+}
+
unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const vector<vector<CharReach>> &triggers, const Grey &grey,
bool unordered_som) {
- assert(is_triggered(g) != triggers.empty());
- assert(!unordered_som || is_triggered(g));
-
- if (!grey.allowGough) {
- /* must be at least one engine capable of handling raw som dfas */
- return nullptr;
- }
-
- DEBUG_PRINTF("attempting to build haig \n");
- assert(allMatchStatesHaveReports(g));
- assert(hasCorrectlyNumberedVertices(g));
-
- u32 numStates = num_vertices(g);
- if (numStates > HAIG_MAX_NFA_STATE) {
- DEBUG_PRINTF("giving up... looks too big\n");
- return nullptr;
- }
-
- auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
- somPrecision);
-
- DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
- bool rv;
- if (numStates <= NFA_STATE_LIMIT) {
- /* fast path */
+ assert(is_triggered(g) != triggers.empty());
+ assert(!unordered_som || is_triggered(g));
+
+ if (!grey.allowGough) {
+ /* must be at least one engine capable of handling raw som dfas */
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("attempting to build haig \n");
+ assert(allMatchStatesHaveReports(g));
+ assert(hasCorrectlyNumberedVertices(g));
+
+ u32 numStates = num_vertices(g);
+ if (numStates > HAIG_MAX_NFA_STATE) {
+ DEBUG_PRINTF("giving up... looks too big\n");
+ return nullptr;
+ }
+
+ auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
+ somPrecision);
+
+ DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
+ bool rv;
+ if (numStates <= NFA_STATE_LIMIT) {
+ /* fast path */
rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som,
- rdfa.get());
- } else {
- /* not the fast path */
+ rdfa.get());
+ } else {
+ /* not the fast path */
rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get());
- }
-
- if (!rv) {
- return nullptr;
- }
-
- DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
- rdfa->start_anchored, rdfa->start_floating);
-
- assert(rdfa->kind == g.kind);
- return rdfa;
-}
-
-static
-void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- const vector<dstate_id_t> &source_nfa_states,
- som_tran_info &som_tran) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
-
- const som_tran_info &som_tran_d
- = dfas[d]->state_som[source_nfa_states[d]].preds;
- for (som_tran_info::const_iterator it = som_tran_d.begin();
- it != som_tran_d.end(); ++it) {
- assert(it->first != CREATE_NEW_SOM);
- u32 dest_slot = it->first < N_SPECIALS ? it->first
- : it->first + adj;
- vector<u32> &out = som_tran[dest_slot];
-
- if (!out.empty()) {
- /* stylised specials already done; it does not matter who builds
- the preds */
- assert(dest_slot < N_SPECIALS);
- continue;
- }
- for (vector<u32>::const_iterator jt = it->second.begin();
- jt != it->second.end(); ++jt) {
- if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
- out.push_back(*jt);
- } else {
- out.push_back(*jt + adj);
- }
- }
- }
- }
-}
-
-static
-void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- map<u32, u32> *out) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
- const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
- for (map<u32, u32>::const_iterator it = new_soms.begin();
- it != new_soms.end(); ++it) {
- if (it->first < N_SPECIALS) {
- assert(!it->second);
- out->emplace(it->first, 0U);
- } else {
- assert(d + 1 >= per_dfa_adj.size()
- || it->first + adj < per_dfa_adj[d + 1]);
- out->emplace(it->first + adj, it->second);
- }
- }
- }
-}
-
-static never_inline
-void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- const vector<dstate_id_t> &source_nfa_states,
- bool eod, set<som_report> &out) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
-
- const set<som_report> &reps = eod
- ? dfas[d]->state_som[source_nfa_states[d]].reports_eod
- : dfas[d]->state_som[source_nfa_states[d]].reports;
- for (set<som_report>::const_iterator it = reps.begin();
- it != reps.end(); ++it) {
- u32 slot = it->slot;
- if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
- slot += adj;
- }
- out.insert(som_report(it->report, slot));
- }
- }
-}
-
-static
-u32 total_slots_used(const raw_som_dfa &rdfa) {
- u32 rv = 0;
- for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
- it != rdfa.state_som.end(); ++it) {
- for (som_tran_info::const_iterator jt = it->preds.begin();
- jt != it->preds.end(); ++jt) {
- assert(jt->first != CREATE_NEW_SOM);
- ENSURE_AT_LEAST(&rv, jt->first + 1);
- }
- }
- const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
- for (map<u32, u32>::const_iterator it = new_soms.begin();
- it != new_soms.end(); ++it) {
- ENSURE_AT_LEAST(&rv, it->first + 1);
- }
- return rv;
-}
-
-unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
- u32 limit) {
- assert(!dfas.empty());
-
- Automaton_Haig_Merge n(dfas);
-
- DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
-
- bool unordered_som = false;
- for (const auto &haig : dfas) {
- assert(haig);
- assert(haig->kind == dfas.front()->kind);
- unordered_som |= haig->unordered_som_triggers;
- if (haig->states.size() > limit) {
- DEBUG_PRINTF("too many states!\n");
- return nullptr;
- }
- }
-
+ }
+
+ if (!rv) {
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
+ rdfa->start_anchored, rdfa->start_floating);
+
+ assert(rdfa->kind == g.kind);
+ return rdfa;
+}
+
+static
+void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ const vector<dstate_id_t> &source_nfa_states,
+ som_tran_info &som_tran) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+
+ const som_tran_info &som_tran_d
+ = dfas[d]->state_som[source_nfa_states[d]].preds;
+ for (som_tran_info::const_iterator it = som_tran_d.begin();
+ it != som_tran_d.end(); ++it) {
+ assert(it->first != CREATE_NEW_SOM);
+ u32 dest_slot = it->first < N_SPECIALS ? it->first
+ : it->first + adj;
+ vector<u32> &out = som_tran[dest_slot];
+
+ if (!out.empty()) {
+ /* stylised specials already done; it does not matter who builds
+ the preds */
+ assert(dest_slot < N_SPECIALS);
+ continue;
+ }
+ for (vector<u32>::const_iterator jt = it->second.begin();
+ jt != it->second.end(); ++jt) {
+ if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
+ out.push_back(*jt);
+ } else {
+ out.push_back(*jt + adj);
+ }
+ }
+ }
+ }
+}
+
+static
+void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ map<u32, u32> *out) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+ const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
+ for (map<u32, u32>::const_iterator it = new_soms.begin();
+ it != new_soms.end(); ++it) {
+ if (it->first < N_SPECIALS) {
+ assert(!it->second);
+ out->emplace(it->first, 0U);
+ } else {
+ assert(d + 1 >= per_dfa_adj.size()
+ || it->first + adj < per_dfa_adj[d + 1]);
+ out->emplace(it->first + adj, it->second);
+ }
+ }
+ }
+}
+
+static never_inline
+void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ const vector<dstate_id_t> &source_nfa_states,
+ bool eod, set<som_report> &out) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+
+ const set<som_report> &reps = eod
+ ? dfas[d]->state_som[source_nfa_states[d]].reports_eod
+ : dfas[d]->state_som[source_nfa_states[d]].reports;
+ for (set<som_report>::const_iterator it = reps.begin();
+ it != reps.end(); ++it) {
+ u32 slot = it->slot;
+ if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
+ slot += adj;
+ }
+ out.insert(som_report(it->report, slot));
+ }
+ }
+}
+
+static
+u32 total_slots_used(const raw_som_dfa &rdfa) {
+ u32 rv = 0;
+ for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
+ it != rdfa.state_som.end(); ++it) {
+ for (som_tran_info::const_iterator jt = it->preds.begin();
+ jt != it->preds.end(); ++jt) {
+ assert(jt->first != CREATE_NEW_SOM);
+ ENSURE_AT_LEAST(&rv, jt->first + 1);
+ }
+ }
+ const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
+ for (map<u32, u32>::const_iterator it = new_soms.begin();
+ it != new_soms.end(); ++it) {
+ ENSURE_AT_LEAST(&rv, it->first + 1);
+ }
+ return rv;
+}
+
+unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
+ u32 limit) {
+ assert(!dfas.empty());
+
+ Automaton_Haig_Merge n(dfas);
+
+ DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
+
+ bool unordered_som = false;
+ for (const auto &haig : dfas) {
+ assert(haig);
+ assert(haig->kind == dfas.front()->kind);
+ unordered_som |= haig->unordered_som_triggers;
+ if (haig->states.size() > limit) {
+ DEBUG_PRINTF("too many states!\n");
+ return nullptr;
+ }
+ }
+
using StateSet = Automaton_Haig_Merge::StateSet;
- vector<StateSet> nfa_state_map;
- auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
- NODE_START,
- dfas[0]->stream_som_loc_width);
-
+ vector<StateSet> nfa_state_map;
+ auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
+ NODE_START,
+ dfas[0]->stream_som_loc_width);
+
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
-
- vector<u32> per_dfa_adj;
- u32 curr_adj = 0;
- for (const auto &haig : dfas) {
- per_dfa_adj.push_back(curr_adj);
- curr_adj += total_slots_used(*haig);
- if (curr_adj < per_dfa_adj.back()) {
- /* overflowed our som slot count */
- return nullptr;
- }
- }
-
- rdfa->state_som.reserve(rdfa->states.size());
- for (u32 i = 0; i < rdfa->states.size(); i++) {
- rdfa->state_som.push_back(dstate_som());
- const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
- DEBUG_PRINTF("finishing state %u\n", i);
-
- haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
- rdfa->state_som.back().preds);
-
- if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
- DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
- rdfa->state_som.back().preds.size());
- return nullptr;
- }
-
- haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
- false /* not eod */,
- rdfa->state_som.back().reports);
- haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
- true /* eod */,
- rdfa->state_som.back().reports_eod);
- }
-
- haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
-
- DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
- rdfa->start_anchored, rdfa->start_floating);
-
- return rdfa;
-}
-
-} // namespace ue2
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+
+ vector<u32> per_dfa_adj;
+ u32 curr_adj = 0;
+ for (const auto &haig : dfas) {
+ per_dfa_adj.push_back(curr_adj);
+ curr_adj += total_slots_used(*haig);
+ if (curr_adj < per_dfa_adj.back()) {
+ /* overflowed our som slot count */
+ return nullptr;
+ }
+ }
+
+ rdfa->state_som.reserve(rdfa->states.size());
+ for (u32 i = 0; i < rdfa->states.size(); i++) {
+ rdfa->state_som.push_back(dstate_som());
+ const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
+ DEBUG_PRINTF("finishing state %u\n", i);
+
+ haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
+ rdfa->state_som.back().preds);
+
+ if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
+ DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
+ rdfa->state_som.back().preds.size());
+ return nullptr;
+ }
+
+ haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
+ false /* not eod */,
+ rdfa->state_som.back().reports);
+ haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
+ true /* eod */,
+ rdfa->state_som.back().reports_eod);
+ }
+
+ haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
+
+ DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
+ rdfa->start_anchored, rdfa->start_floating);
+
+ return rdfa;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
index baff2f5866..136c2a7ddf 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
@@ -1,68 +1,68 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for Haig SOM DFA.
- */
-
-#ifndef NG_HAIG_H
-#define NG_HAIG_H
-
-#include "ue2common.h"
-#include "som/som.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-struct Grey;
-struct raw_som_dfa;
-
-#define HAIG_FINAL_DFA_STATE_LIMIT 16383
-#define HAIG_HARD_DFA_STATE_LIMIT 8192
-
-/* unordered_som_triggers being true indicates that a live haig may be subjected
- * to later tops arriving with earlier soms (without the haig going dead in
- * between)
- */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for Haig SOM DFA.
+ */
+
+#ifndef NG_HAIG_H
+#define NG_HAIG_H
+
+#include "ue2common.h"
+#include "som/som.h"
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+struct Grey;
+struct raw_som_dfa;
+
+#define HAIG_FINAL_DFA_STATE_LIMIT 16383
+#define HAIG_HARD_DFA_STATE_LIMIT 8192
+
+/* unordered_som_triggers being true indicates that a live haig may be subjected
+ * to later tops arriving with earlier soms (without the haig going dead in
+ * between)
+ */
+
std::unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const std::vector<std::vector<CharReach>> &triggers,
const Grey &grey, bool unordered_som_triggers = false);
-
-std::unique_ptr<raw_som_dfa>
-attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,
- u32 limit = HAIG_HARD_DFA_STATE_LIMIT);
-
-} // namespace ue2
-
-#endif
+
+std::unique_ptr<raw_som_dfa>
+attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,
+ u32 limit = HAIG_HARD_DFA_STATE_LIMIT);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
index a2fbb28863..cbe2aadc25 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
@@ -1,98 +1,98 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ng_holder.h"
-
-#include "ng_util.h"
-#include "ue2common.h"
-
-using namespace std;
-
-namespace ue2 {
-
-// internal use only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ng_holder.h"
+
+#include "ng_util.h"
+#include "ue2common.h"
+
+using namespace std;
+
+namespace ue2 {
+
+// internal use only
static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) {
NFAVertex v(add_vertex(g));
- g[v].index = id;
- return v;
-}
-
-NGHolder::NGHolder(nfa_kind k)
+ g[v].index = id;
+ return v;
+}
+
+NGHolder::NGHolder(nfa_kind k)
: kind (k),
- // add initial special nodes
+ // add initial special nodes
start(addSpecialVertex(*this, NODE_START)),
startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)),
accept(addSpecialVertex(*this, NODE_ACCEPT)),
acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) {
-
- // wire up some fake edges for the stylized bits of the NFA
- add_edge(start, startDs, *this);
- add_edge(startDs, startDs, *this);
- add_edge(accept, acceptEod, *this);
-
+
+ // wire up some fake edges for the stylized bits of the NFA
+ add_edge(start, startDs, *this);
+ add_edge(startDs, startDs, *this);
+ add_edge(accept, acceptEod, *this);
+
(*this)[start].char_reach.setall();
(*this)[startDs].char_reach.setall();
-}
-
-NGHolder::~NGHolder(void) {
- DEBUG_PRINTF("destroying holder @ %p\n", this);
-}
-
-void clear_graph(NGHolder &h) {
+}
+
+NGHolder::~NGHolder(void) {
+ DEBUG_PRINTF("destroying holder @ %p\n", this);
+}
+
+void clear_graph(NGHolder &h) {
NGHolder::vertex_iterator vi, ve;
- for (tie(vi, ve) = vertices(h); vi != ve;) {
- NFAVertex v = *vi;
- ++vi;
-
- clear_vertex(v, h);
- if (!is_special(v, h)) {
- remove_vertex(v, h);
- }
- }
-
- assert(num_vertices(h) == N_SPECIALS);
+ for (tie(vi, ve) = vertices(h); vi != ve;) {
+ NFAVertex v = *vi;
+ ++vi;
+
+ clear_vertex(v, h);
+ if (!is_special(v, h)) {
+ remove_vertex(v, h);
+ }
+ }
+
+ assert(num_vertices(h) == N_SPECIALS);
renumber_vertices(h); /* ensure that we reset our next allocated index */
renumber_edges(h);
-
- // Recreate special stylised edges.
- add_edge(h.start, h.startDs, h);
- add_edge(h.startDs, h.startDs, h);
- add_edge(h.accept, h.acceptEod, h);
-}
-
-NFAVertex NGHolder::getSpecialVertex(u32 id) const {
- switch (id) {
+
+ // Recreate special stylised edges.
+ add_edge(h.start, h.startDs, h);
+ add_edge(h.startDs, h.startDs, h);
+ add_edge(h.accept, h.acceptEod, h);
+}
+
+NFAVertex NGHolder::getSpecialVertex(u32 id) const {
+ switch (id) {
case NODE_START: return start;
case NODE_START_DOTSTAR: return startDs;
case NODE_ACCEPT: return accept;
case NODE_ACCEPT_EOD: return acceptEod;
default: return null_vertex();
- }
-}
-
-}
+ }
+}
+
+}
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
index 36cf62447b..8edc534835 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Definition of the NGHolder type used for to represent general nfa
* graphs as well as all associated types (vertex and edge properties, etc).
@@ -34,17 +34,17 @@
* accepts.
*/
-#ifndef NG_HOLDER_H
-#define NG_HOLDER_H
-
-#include "ue2common.h"
-#include "nfa/nfa_kind.h"
+#ifndef NG_HOLDER_H
+#define NG_HOLDER_H
+
+#include "ue2common.h"
+#include "nfa/nfa_kind.h"
#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
/** \brief Properties associated with each vertex in an NFAGraph. */
struct NFAGraphVertexProps {
/** \brief Set of characters on which this vertex is reachable. */
@@ -95,139 +95,139 @@ enum SpecialNodes {
N_SPECIALS
};
-/** \brief Encapsulates an NFAGraph, stores special vertices and other
- * metadata.
- *
- * When constructed, the graph will have the following stylised "special"
- * edges:
- *
- * - (start, startDs)
- * - (startDs, startDs) (self-loop)
- * - (accept, acceptEod)
- */
+/** \brief Encapsulates an NFAGraph, stores special vertices and other
+ * metadata.
+ *
+ * When constructed, the graph will have the following stylised "special"
+ * edges:
+ *
+ * - (start, startDs)
+ * - (startDs, startDs) (self-loop)
+ * - (accept, acceptEod)
+ */
class NGHolder : public ue2_graph<NGHolder, NFAGraphVertexProps,
NFAGraphEdgeProps> {
-public:
- explicit NGHolder(nfa_kind kind);
+public:
+ explicit NGHolder(nfa_kind kind);
NGHolder(void) : NGHolder(NFA_OUTFIX) {};
- virtual ~NGHolder(void);
-
+ virtual ~NGHolder(void);
+
nfa_kind kind; /* Role that this plays in Rose */
-
+
static const size_t N_SPECIAL_VERTICES = N_SPECIALS;
public:
const vertex_descriptor start; //!< Anchored start vertex.
const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex.
const vertex_descriptor accept; //!< Accept vertex.
const vertex_descriptor acceptEod; //!< Accept at EOD vertex.
-
+
vertex_descriptor getSpecialVertex(u32 id) const;
};
-
+
typedef NGHolder::vertex_descriptor NFAVertex;
typedef NGHolder::edge_descriptor NFAEdge;
-
-/** \brief True if the vertex \p v is one of our special vertices. */
-template <typename GraphT>
+
+/** \brief True if the vertex \p v is one of our special vertices. */
+template <typename GraphT>
bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) {
- return g[v].index < N_SPECIALS;
-}
-
-/**
- * \brief Clears all non-special vertices and edges from the graph.
- *
- * Note: not the same as the BGL's clear() function, which removes all vertices
- * and edges.
- */
-void clear_graph(NGHolder &h);
-
-/*
- * \brief Clear and remove all of the vertices pointed to by the given iterator
- * range.
- *
- * If renumber is false, no renumbering of vertex indices is done.
- *
- * Note: should not be called with iterators that will be invalidated by vertex
- * removal (such as NFAGraph::vertex_iterator).
- */
-template <class Iter>
-void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
- if (begin == end) {
- return;
- }
-
- for (Iter it = begin; it != end; ++it) {
- NFAVertex v = *it;
- if (!is_special(v, h)) {
- clear_vertex(v, h);
- remove_vertex(v, h);
- } else {
- assert(0);
- }
- }
-
- if (renumber) {
+ return g[v].index < N_SPECIALS;
+}
+
+/**
+ * \brief Clears all non-special vertices and edges from the graph.
+ *
+ * Note: not the same as the BGL's clear() function, which removes all vertices
+ * and edges.
+ */
+void clear_graph(NGHolder &h);
+
+/*
+ * \brief Clear and remove all of the vertices pointed to by the given iterator
+ * range.
+ *
+ * If renumber is false, no renumbering of vertex indices is done.
+ *
+ * Note: should not be called with iterators that will be invalidated by vertex
+ * removal (such as NFAGraph::vertex_iterator).
+ */
+template <class Iter>
+void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
+ if (begin == end) {
+ return;
+ }
+
+ for (Iter it = begin; it != end; ++it) {
+ NFAVertex v = *it;
+ if (!is_special(v, h)) {
+ clear_vertex(v, h);
+ remove_vertex(v, h);
+ } else {
+ assert(0);
+ }
+ }
+
+ if (renumber) {
renumber_edges(h);
renumber_vertices(h);
- }
-}
-
-/** \brief Clear and remove all of the vertices pointed to by the vertex
- * descriptors in the given container.
- *
- * This is a convenience wrapper around the iterator variant above.
- */
-template <class Container>
-void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) {
- remove_vertices(c.begin(), c.end(), h, renumber);
-}
-
-/*
- * \brief Clear and remove all of the edges pointed to by the given iterator
- * range.
- *
- * If renumber is false, no renumbering of vertex indices is done.
- *
- * Note: should not be called with iterators that will be invalidated by vertex
- * removal (such as NFAGraph::edge_iterator).
- */
-template <class Iter>
-void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
- if (begin == end) {
- return;
- }
-
- for (Iter it = begin; it != end; ++it) {
- const NFAEdge &e = *it;
- remove_edge(e, h);
- }
-
- if (renumber) {
+ }
+}
+
+/** \brief Clear and remove all of the vertices pointed to by the vertex
+ * descriptors in the given container.
+ *
+ * This is a convenience wrapper around the iterator variant above.
+ */
+template <class Container>
+void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) {
+ remove_vertices(c.begin(), c.end(), h, renumber);
+}
+
+/*
+ * \brief Clear and remove all of the edges pointed to by the given iterator
+ * range.
+ *
+ * If renumber is false, no renumbering of vertex indices is done.
+ *
+ * Note: should not be called with iterators that will be invalidated by vertex
+ * removal (such as NFAGraph::edge_iterator).
+ */
+template <class Iter>
+void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
+ if (begin == end) {
+ return;
+ }
+
+ for (Iter it = begin; it != end; ++it) {
+ const NFAEdge &e = *it;
+ remove_edge(e, h);
+ }
+
+ if (renumber) {
renumber_edges(h);
- }
-}
-
+ }
+}
+
#define DEFAULT_TOP 0U
-/** \brief Clear and remove all of the edges pointed to by the edge descriptors
- * in the given container.
- *
- * This is a convenience wrapper around the iterator variant above.
- */
-template <class Container>
-void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
- remove_edges(c.begin(), c.end(), h, renumber);
-}
-
+/** \brief Clear and remove all of the edges pointed to by the edge descriptors
+ * in the given container.
+ *
+ * This is a convenience wrapper around the iterator variant above.
+ */
+template <class Container>
+void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
+ remove_edges(c.begin(), c.end(), h, renumber);
+}
+
inline
-bool is_triggered(const NGHolder &g) {
- return is_triggered(g.kind);
-}
-
+bool is_triggered(const NGHolder &g) {
+ return is_triggered(g.kind);
+}
+
inline
-bool generates_callbacks(const NGHolder &g) {
- return generates_callbacks(g.kind);
-}
+bool generates_callbacks(const NGHolder &g) {
+ return generates_callbacks(g.kind);
+}
inline
bool has_managed_reports(const NGHolder &g) {
@@ -239,6 +239,6 @@ bool inspects_states_for_accepts(const NGHolder &g) {
return inspects_states_for_accepts(g.kind);
}
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
index 35a09d0ea2..3e013ad5cc 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
@@ -1,78 +1,78 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Loose equality testing for NGHolder graphs.
- *
- * Loose equality check for holders' graph structure and vertex_index,
- * vertex_char_reach and (optionally reports).
- */
-#include "ng_is_equal.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Loose equality testing for NGHolder graphs.
+ *
+ * Loose equality check for holders' graph structure and vertex_index,
+ * vertex_char_reach and (optionally reports).
+ */
+#include "ng_is_equal.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-struct check_report {
- virtual ~check_report() {}
- virtual bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const = 0;
-};
-
-struct full_check_report : public check_report {
- bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const override {
- return reports_a == reports_b;
- }
-};
-
-struct equiv_check_report : public check_report {
- equiv_check_report(ReportID a_in, ReportID b_in)
- : a_rep(a_in), b_rep(b_in) {}
-
- bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const override {
- return contains(reports_a, a_rep) == contains(reports_b, b_rep);
- }
-private:
- ReportID a_rep;
- ReportID b_rep;
-};
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+struct check_report {
+ virtual ~check_report() {}
+ virtual bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const = 0;
+};
+
+struct full_check_report : public check_report {
+ bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const override {
+ return reports_a == reports_b;
+ }
+};
+
+struct equiv_check_report : public check_report {
+ equiv_check_report(ReportID a_in, ReportID b_in)
+ : a_rep(a_in), b_rep(b_in) {}
+
+ bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const override {
+ return contains(reports_a, a_rep) == contains(reports_b, b_rep);
+ }
+private:
+ ReportID a_rep;
+ ReportID b_rep;
+};
/** Comparison functor used to sort by vertex_index. */
template<typename Graph>
@@ -91,141 +91,141 @@ template<typename Graph>
static
VertexIndexOrdering<Graph> make_index_ordering(const Graph &g) {
return VertexIndexOrdering<Graph>(g);
+}
+
}
-}
-
-static
-bool is_equal_i(const NGHolder &a, const NGHolder &b,
- const check_report &check_rep) {
- assert(hasCorrectlyNumberedVertices(a));
- assert(hasCorrectlyNumberedVertices(b));
-
- size_t num_verts = num_vertices(a);
- if (num_verts != num_vertices(b)) {
- return false;
- }
-
- vector<NFAVertex> vert_a;
- vector<NFAVertex> vert_b;
- vector<NFAVertex> adj_a;
- vector<NFAVertex> adj_b;
-
- vert_a.reserve(num_verts);
- vert_b.reserve(num_verts);
- adj_a.reserve(num_verts);
- adj_b.reserve(num_verts);
-
- insert(&vert_a, vert_a.end(), vertices(a));
- insert(&vert_b, vert_b.end(), vertices(b));
-
- sort(vert_a.begin(), vert_a.end(), make_index_ordering(a));
- sort(vert_b.begin(), vert_b.end(), make_index_ordering(b));
-
- for (size_t i = 0; i < vert_a.size(); i++) {
- NFAVertex va = vert_a[i];
- NFAVertex vb = vert_b[i];
+static
+bool is_equal_i(const NGHolder &a, const NGHolder &b,
+ const check_report &check_rep) {
+ assert(hasCorrectlyNumberedVertices(a));
+ assert(hasCorrectlyNumberedVertices(b));
+
+ size_t num_verts = num_vertices(a);
+ if (num_verts != num_vertices(b)) {
+ return false;
+ }
+
+ vector<NFAVertex> vert_a;
+ vector<NFAVertex> vert_b;
+ vector<NFAVertex> adj_a;
+ vector<NFAVertex> adj_b;
+
+ vert_a.reserve(num_verts);
+ vert_b.reserve(num_verts);
+ adj_a.reserve(num_verts);
+ adj_b.reserve(num_verts);
+
+ insert(&vert_a, vert_a.end(), vertices(a));
+ insert(&vert_b, vert_b.end(), vertices(b));
+
+ sort(vert_a.begin(), vert_a.end(), make_index_ordering(a));
+ sort(vert_b.begin(), vert_b.end(), make_index_ordering(b));
+
+ for (size_t i = 0; i < vert_a.size(); i++) {
+ NFAVertex va = vert_a[i];
+ NFAVertex vb = vert_b[i];
DEBUG_PRINTF("vertex %zu\n", a[va].index);
-
- // Vertex index must be the same.
- if (a[va].index != b[vb].index) {
- DEBUG_PRINTF("bad index\n");
- return false;
- }
-
- // Reach must be the same.
- if (a[va].char_reach != b[vb].char_reach) {
- DEBUG_PRINTF("bad reach\n");
- return false;
- }
-
- if (!check_rep(a[va].reports, b[vb].reports)) {
- DEBUG_PRINTF("bad reports\n");
- return false;
- }
-
- // Other vertex properties may vary.
-
- /* Check successors */
- adj_a.clear();
- adj_b.clear();
- insert(&adj_a, adj_a.end(), adjacent_vertices(va, a));
- insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b));
-
- if (adj_a.size() != adj_b.size()) {
- DEBUG_PRINTF("bad adj\n");
- return false;
- }
-
- sort(adj_a.begin(), adj_a.end(), make_index_ordering(a));
- sort(adj_b.begin(), adj_b.end(), make_index_ordering(b));
-
- for (size_t j = 0; j < adj_a.size(); j++) {
- if (a[adj_a[j]].index != b[adj_b[j]].index) {
- DEBUG_PRINTF("bad adj\n");
- return false;
- }
- }
- }
-
- /* check top for edges out of start */
+
+ // Vertex index must be the same.
+ if (a[va].index != b[vb].index) {
+ DEBUG_PRINTF("bad index\n");
+ return false;
+ }
+
+ // Reach must be the same.
+ if (a[va].char_reach != b[vb].char_reach) {
+ DEBUG_PRINTF("bad reach\n");
+ return false;
+ }
+
+ if (!check_rep(a[va].reports, b[vb].reports)) {
+ DEBUG_PRINTF("bad reports\n");
+ return false;
+ }
+
+ // Other vertex properties may vary.
+
+ /* Check successors */
+ adj_a.clear();
+ adj_b.clear();
+ insert(&adj_a, adj_a.end(), adjacent_vertices(va, a));
+ insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b));
+
+ if (adj_a.size() != adj_b.size()) {
+ DEBUG_PRINTF("bad adj\n");
+ return false;
+ }
+
+ sort(adj_a.begin(), adj_a.end(), make_index_ordering(a));
+ sort(adj_b.begin(), adj_b.end(), make_index_ordering(b));
+
+ for (size_t j = 0; j < adj_a.size(); j++) {
+ if (a[adj_a[j]].index != b[adj_b[j]].index) {
+ DEBUG_PRINTF("bad adj\n");
+ return false;
+ }
+ }
+ }
+
+ /* check top for edges out of start */
vector<pair<u32, flat_set<u32>>> top_a;
vector<pair<u32, flat_set<u32>>> top_b;
-
- for (const auto &e : out_edges_range(a.start, a)) {
+
+ for (const auto &e : out_edges_range(a.start, a)) {
top_a.emplace_back(a[target(e, a)].index, a[e].tops);
- }
- for (const auto &e : out_edges_range(b.start, b)) {
+ }
+ for (const auto &e : out_edges_range(b.start, b)) {
top_b.emplace_back(b[target(e, b)].index, b[e].tops);
- }
-
- sort(top_a.begin(), top_a.end());
- sort(top_b.begin(), top_b.end());
-
- if (top_a != top_b) {
- DEBUG_PRINTF("bad top\n");
- return false;
- }
-
- DEBUG_PRINTF("good\n");
- return true;
-}
-
-/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
-u64a hash_holder(const NGHolder &g) {
- size_t rv = 0;
-
- for (auto v : vertices_range(g)) {
+ }
+
+ sort(top_a.begin(), top_a.end());
+ sort(top_b.begin(), top_b.end());
+
+ if (top_a != top_b) {
+ DEBUG_PRINTF("bad top\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("good\n");
+ return true;
+}
+
+/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
+u64a hash_holder(const NGHolder &g) {
+ size_t rv = 0;
+
+ for (auto v : vertices_range(g)) {
hash_combine(rv, g[v].index);
hash_combine(rv, g[v].char_reach);
-
- for (auto w : adjacent_vertices_range(v, g)) {
+
+ for (auto w : adjacent_vertices_range(v, g)) {
hash_combine(rv, g[w].index);
- }
- }
-
- return rv;
-}
-
-bool is_equal(const NGHolder &a, const NGHolder &b) {
- DEBUG_PRINTF("testing %p %p\n", &a, &b);
-
- if (&a == &b) {
- return true;
- }
-
- return is_equal_i(a, b, full_check_report());
-}
-
-bool is_equal(const NGHolder &a, ReportID a_rep,
- const NGHolder &b, ReportID b_rep) {
- DEBUG_PRINTF("testing %p %p\n", &a, &b);
-
- if (&a == &b && a_rep == b_rep) {
- return true;
- }
-
- return is_equal_i(a, b, equiv_check_report(a_rep, b_rep));
-}
-
-} // namespace ue2
+ }
+ }
+
+ return rv;
+}
+
+bool is_equal(const NGHolder &a, const NGHolder &b) {
+ DEBUG_PRINTF("testing %p %p\n", &a, &b);
+
+ if (&a == &b) {
+ return true;
+ }
+
+ return is_equal_i(a, b, full_check_report());
+}
+
+bool is_equal(const NGHolder &a, ReportID a_rep,
+ const NGHolder &b, ReportID b_rep) {
+ DEBUG_PRINTF("testing %p %p\n", &a, &b);
+
+ if (&a == &b && a_rep == b_rep) {
+ return true;
+ }
+
+ return is_equal_i(a, b, equiv_check_report(a_rep, b_rep));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
index d8046270ff..baddc494b0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Loose equality testing for NGHolder graphs.
- *
- * Loose equality check for holders' graph structure and vertex_index,
- * vertex_char_reach and (optionally reports).
- */
-
-#ifndef NG_IS_EQUAL_H
-#define NG_IS_EQUAL_H
-
-#include "ue2common.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
-
-bool is_equal(const NGHolder &a, const NGHolder &b);
-bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r);
-
-u64a hash_holder(const NGHolder &g);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Loose equality testing for NGHolder graphs.
+ *
+ * Loose equality check for holders' graph structure and vertex_index,
+ * vertex_char_reach and (optionally reports).
+ */
+
+#ifndef NG_IS_EQUAL_H
+#define NG_IS_EQUAL_H
+
+#include "ue2common.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
+
+bool is_equal(const NGHolder &a, const NGHolder &b);
+bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r);
+
+u64a hash_holder(const NGHolder &g);
+
// Util Functors
struct NGHolderHasher {
size_t operator()(const std::shared_ptr<const NGHolder> &h) const {
@@ -59,13 +59,13 @@ struct NGHolderHasher {
}
};
-struct NGHolderEqual {
- bool operator()(const std::shared_ptr<const NGHolder> &a,
- const std::shared_ptr<const NGHolder> &b) const {
- return is_equal(*a, *b);
- }
-};
-
-} // namespace ue2
-
-#endif // NG_IS_EQUAL_H
+struct NGHolderEqual {
+ bool operator()(const std::shared_ptr<const NGHolder> &a,
+ const std::shared_ptr<const NGHolder> &b) const {
+ return is_equal(*a, *b);
+ }
+};
+
+} // namespace ue2
+
+#endif // NG_IS_EQUAL_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
index d8ba503ce6..e6526a2414 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
@@ -1,349 +1,349 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Large Bounded Repeat (LBR) engine build code.
- */
-
-#include "ng_lbr.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
+ * \brief Large Bounded Repeat (LBR) engine build code.
+ */
+
+#include "ng_lbr.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
#include "nfa/castlecompile.h"
-#include "nfa/lbr_internal.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/repeatcompile.h"
+#include "nfa/lbr_internal.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/repeatcompile.h"
#include "nfa/shufticompile.h"
#include "nfa/trufflecompile.h"
-#include "util/alloc.h"
-#include "util/bitutils.h" // for lg2
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/dump_charclass.h"
+#include "util/alloc.h"
+#include "util/bitutils.h" // for lg2
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/dump_charclass.h"
#include "util/report_manager.h"
-#include "util/verify_types.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-u32 depth_to_u32(const depth &d) {
- assert(d.is_reachable());
- if (d.is_infinite()) {
- return REPEAT_INF;
- }
-
- u32 d_val = d;
- assert(d_val < REPEAT_INF);
- return d_val;
-}
-
-template<class LbrStruct> static
-u64a* getTable(NFA *nfa) {
- char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
- sizeof(RepeatInfo);
- ptr = ROUNDUP_PTR(ptr, alignof(u64a));
- return (u64a *)ptr;
-}
-
-template <class LbrStruct> static
-void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
- assert(nfa);
-
- RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
-
- DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
- repeatTypeName(rtype), repeatMin.str().c_str(),
- repeatMax.str().c_str());
-
- // Fill the lbr_common structure first. Note that the RepeatInfo structure
- // directly follows the LbrStruct.
- const u32 info_offset = sizeof(LbrStruct);
- c->repeatInfoOffset = info_offset;
- c->report = report;
-
- RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
- info->type = verify_u8(rtype);
- info->repeatMin = depth_to_u32(repeatMin);
- info->repeatMax = depth_to_u32(repeatMax);
- info->stateSize = rsi.stateSize;
- info->packedCtrlSize = rsi.packedCtrlSize;
- info->horizon = rsi.horizon;
- info->minPeriod = minPeriod;
- copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
- info->patchCount = rsi.patchCount;
- info->patchSize = rsi.patchSize;
- info->encodingSize = rsi.encodingSize;
- info->patchesOffset = rsi.patchesOffset;
-
- // Fill the NFA structure.
- nfa->nPositions = repeatMin;
- nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
- nfa->scratchStateSize = (u32)sizeof(lbr_state);
- nfa->minWidth = verify_u32(repeatMin);
- nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
-
- // Fill the lbr table for sparse lbr model.
- if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
- u64a *table = getTable<LbrStruct>(nfa);
- // Adjust table length according to the optimal patch length.
- size_t len = nfa->length;
- assert((u32)repeatMax >= rsi.patchSize);
- len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
- nfa->length = verify_u32(len);
- info->length = verify_u32(sizeof(RepeatInfo)
- + sizeof(u64a) * (rsi.patchSize + 1));
- copy_bytes(table, rsi.table);
- }
-}
-
-template <class LbrStruct> static
+#include "util/verify_types.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+u32 depth_to_u32(const depth &d) {
+ assert(d.is_reachable());
+ if (d.is_infinite()) {
+ return REPEAT_INF;
+ }
+
+ u32 d_val = d;
+ assert(d_val < REPEAT_INF);
+ return d_val;
+}
+
+template<class LbrStruct> static
+u64a* getTable(NFA *nfa) {
+ char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
+ sizeof(RepeatInfo);
+ ptr = ROUNDUP_PTR(ptr, alignof(u64a));
+ return (u64a *)ptr;
+}
+
+template <class LbrStruct> static
+void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
+ assert(nfa);
+
+ RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
+
+ DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
+ repeatTypeName(rtype), repeatMin.str().c_str(),
+ repeatMax.str().c_str());
+
+ // Fill the lbr_common structure first. Note that the RepeatInfo structure
+ // directly follows the LbrStruct.
+ const u32 info_offset = sizeof(LbrStruct);
+ c->repeatInfoOffset = info_offset;
+ c->report = report;
+
+ RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
+ info->type = verify_u8(rtype);
+ info->repeatMin = depth_to_u32(repeatMin);
+ info->repeatMax = depth_to_u32(repeatMax);
+ info->stateSize = rsi.stateSize;
+ info->packedCtrlSize = rsi.packedCtrlSize;
+ info->horizon = rsi.horizon;
+ info->minPeriod = minPeriod;
+ copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
+ info->patchCount = rsi.patchCount;
+ info->patchSize = rsi.patchSize;
+ info->encodingSize = rsi.encodingSize;
+ info->patchesOffset = rsi.patchesOffset;
+
+ // Fill the NFA structure.
+ nfa->nPositions = repeatMin;
+ nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
+ nfa->scratchStateSize = (u32)sizeof(lbr_state);
+ nfa->minWidth = verify_u32(repeatMin);
+ nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
+
+ // Fill the lbr table for sparse lbr model.
+ if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
+ u64a *table = getTable<LbrStruct>(nfa);
+ // Adjust table length according to the optimal patch length.
+ size_t len = nfa->length;
+ assert((u32)repeatMax >= rsi.patchSize);
+ len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
+ nfa->length = verify_u32(len);
+ info->length = verify_u32(sizeof(RepeatInfo)
+ + sizeof(u64a) * (rsi.patchSize + 1));
+ copy_bytes(table, rsi.table);
+ }
+}
+
+template <class LbrStruct> static
bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype,
const depth &repeatMax) {
- size_t tableLen = 0;
- if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
- tableLen = sizeof(u64a) * (repeatMax + 1);
- }
- size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
- tableLen + sizeof(u64a);
+ size_t tableLen = 0;
+ if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
+ tableLen = sizeof(u64a) * (repeatMax + 1);
+ }
+ size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
+ tableLen + sizeof(u64a);
auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
- nfa->type = verify_u8(nfa_type);
- nfa->length = verify_u32(len);
- return nfa;
-}
-
-static
+ nfa->type = verify_u8(nfa_type);
+ nfa->length = verify_u32(len);
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- if (!cr.all()) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ if (!cr.all()) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax);
- struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built dot lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built dot lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- const CharReach escapes(~cr);
-
- if (escapes.count() != 1) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ const CharReach escapes(~cr);
+
+ if (escapes.count() != 1) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax);
- struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
- lv->c = escapes.find_first();
-
- fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built verm lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
+ lv->c = escapes.find_first();
+
+ fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built verm lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- const CharReach escapes(cr);
-
- if (escapes.count() != 1) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ const CharReach escapes(cr);
+
+ if (escapes.count() != 1) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax);
- struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
- lv->c = escapes.find_first();
-
- fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built negated verm lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
+ lv->c = escapes.find_first();
+
+ fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built negated verm lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax);
- struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
+ struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
- return nullptr;
- }
-
- DEBUG_PRINTF("built shuf lbr\n");
- return nfa;
-}
-
-static
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("built shuf lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax);
- struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
+ struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2);
-
- DEBUG_PRINTF("built truffle lbr\n");
- return nfa;
-}
-
-static
+
+ DEBUG_PRINTF("built truffle lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
- repeatMin.str().c_str(), repeatMax.str().c_str(),
- describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
- report);
- assert(repeatMin <= repeatMax);
- assert(repeatMax.is_reachable());
-
+ DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
+ repeatMin.str().c_str(), repeatMax.str().c_str(),
+ describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
+ report);
+ assert(repeatMin <= repeatMax);
+ assert(repeatMax.is_reachable());
+
auto nfa =
buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);
-
- if (!nfa) {
- nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
-
- if (!nfa) {
- assert(0);
- return nullptr;
- }
-
- return nfa;
-}
-
+
+ if (!nfa) {
+ nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+
+ if (!nfa) {
+ assert(0);
+ return nullptr;
+ }
+
+ return nfa;
+}
+
bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
const vector<vector<CharReach>> &triggers,
const CompileContext &cc,
const ReportManager &rm) {
- if (!cc.grey.allowLbr) {
- return nullptr;
- }
-
+ if (!cc.grey.allowLbr) {
+ return nullptr;
+ }
+
if (proto.repeats.size() != 1) {
return nullptr;
}
const PureRepeat &repeat = proto.repeats.begin()->second;
- assert(!repeat.reach.none());
-
- if (repeat.reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- return nullptr;
- }
-
- bool is_reset;
- u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
-
- if (depth(min_period) > repeat.bounds.max) {
- DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
- is_reset = true;
- }
-
- ReportID report = *repeat.reports.begin();
+ assert(!repeat.reach.none());
+
+ if (repeat.reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ return nullptr;
+ }
+
+ bool is_reset;
+ u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
+
+ if (depth(min_period) > repeat.bounds.max) {
+ DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
+ is_reset = true;
+ }
+
+ ReportID report = *repeat.reports.begin();
if (has_managed_reports(proto.kind)) {
report = rm.getProgramOffset(report);
}
-
- DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
- return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
- min_period, is_reset, report);
-}
-
-/** \brief Construct an LBR engine from the given graph \p g. */
+
+ DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
+ return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
+ min_period, is_reset, report);
+}
+
+/** \brief Construct an LBR engine from the given graph \p g. */
bytecode_ptr<NFA> constructLBR(const NGHolder &g,
const vector<vector<CharReach>> &triggers,
const CompileContext &cc,
const ReportManager &rm) {
- if (!cc.grey.allowLbr) {
- return nullptr;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(g, repeat)) {
+ if (!cc.grey.allowLbr) {
+ return nullptr;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(g, repeat)) {
+ return nullptr;
+ }
+ if (repeat.reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
return nullptr;
- }
- if (repeat.reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- return nullptr;
- }
-
+ }
+
CastleProto proto(g.kind, repeat);
return constructLBR(proto, triggers, cc, rm);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
index c181dbb9e7..55a77fcd1e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
@@ -1,71 +1,71 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Large Bounded Repeat (LBR) engine build code.
- */
-
-#ifndef NG_LBR_H
-#define NG_LBR_H
-
-#include "ue2common.h"
+ * \brief Large Bounded Repeat (LBR) engine build code.
+ */
+
+#ifndef NG_LBR_H
+#define NG_LBR_H
+
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-
-#include <memory>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-class ReportManager;
+
+#include <memory>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+class ReportManager;
struct CastleProto;
-struct CompileContext;
-struct Grey;
-
-/** \brief Construct an LBR engine from the given graph \p g. */
+struct CompileContext;
+struct Grey;
+
+/** \brief Construct an LBR engine from the given graph \p g. */
bytecode_ptr<NFA>
-constructLBR(const NGHolder &g,
- const std::vector<std::vector<CharReach>> &triggers,
+constructLBR(const NGHolder &g,
+ const std::vector<std::vector<CharReach>> &triggers,
const CompileContext &cc, const ReportManager &rm);
-
+
/**
* \brief Construct an LBR engine from the given CastleProto, which should
* contain only one repeat.
*/
bytecode_ptr<NFA>
constructLBR(const CastleProto &proto,
- const std::vector<std::vector<CharReach>> &triggers,
+ const std::vector<std::vector<CharReach>> &triggers,
const CompileContext &cc, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_LBR_H
+
+} // namespace ue2
+
+#endif // NG_LBR_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
index 2f0a55eab9..0f939f122f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
@@ -1,198 +1,198 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Limex NFA construction code.
- */
-
-#include "ng_limex.h"
-
-#include "grey.h"
-#include "ng_equivalence.h"
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
-#include "ng_restructuring.h"
-#include "ng_squash.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
-#include "nfa/limex_compile.h"
-#include "nfa/limex_limits.h"
-#include "nfa/nfa_internal.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ * \brief Limex NFA construction code.
+ */
+
+#include "ng_limex.h"
+
+#include "grey.h"
+#include "ng_equivalence.h"
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
+#include "ng_restructuring.h"
+#include "ng_squash.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
+#include "nfa/limex_compile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/nfa_internal.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/report_manager.h"
#include "util/flat_containers.h"
-#include "util/verify_types.h"
-
+#include "util/verify_types.h"
+
#include <algorithm>
-#include <map>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
+#include <vector>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_values;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-#ifndef NDEBUG
-// Some sanity checking for the graph; returns false if something is wrong.
-// Only used in assertions.
-static
-bool sanityCheckGraph(const NGHolder &g,
+
+namespace ue2 {
+
+#ifndef NDEBUG
+// Some sanity checking for the graph; returns false if something is wrong.
+// Only used in assertions.
+static
+bool sanityCheckGraph(const NGHolder &g,
const unordered_map<NFAVertex, u32> &state_ids) {
unordered_set<u32> seen_states;
-
- for (auto v : vertices_range(g)) {
- // Non-specials should have non-empty reachability.
- if (!is_special(v, g)) {
- if (g[v].char_reach.none()) {
+
+ for (auto v : vertices_range(g)) {
+ // Non-specials should have non-empty reachability.
+ if (!is_special(v, g)) {
+ if (g[v].char_reach.none()) {
DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index);
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
// Vertices with edges to accept or acceptEod must have reports and
// other vertices must not have them.
- if (is_match_vertex(v, g) && v != g.accept) {
- if (g[v].reports.empty()) {
+ if (is_match_vertex(v, g) && v != g.accept) {
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index);
- return false;
- }
+ return false;
+ }
} else if (!g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has reports but no accept edge\n",
g[v].index);
return false;
- }
-
- // Participant vertices should have distinct state indices.
- if (!contains(state_ids, v)) {
+ }
+
+ // Participant vertices should have distinct state indices.
+ if (!contains(state_ids, v)) {
DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index);
- return false;
- }
- u32 s = state_ids.at(v);
- if (s != NO_STATE && !seen_states.insert(s).second) {
+ return false;
+ }
+ u32 s = state_ids.at(v);
+ if (s != NO_STATE && !seen_states.insert(s).second) {
DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s);
- return false;
- }
- }
-
- return true;
-}
-#endif
-
-static
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
+static
unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
const vector<BoundedRepeatData> &repeats) {
auto squashMap = findSquashers(g);
- filterSquashers(g, squashMap);
-
- /* We also filter out the cyclic states representing bounded repeats, as
+ filterSquashers(g, squashMap);
+
+ /* We also filter out the cyclic states representing bounded repeats, as
* they are not really cyclic -- they may turn off unexpectedly. */
- for (const auto &br : repeats) {
+ for (const auto &br : repeats) {
if (br.repeatMax.is_finite()) {
squashMap.erase(br.cyclic);
}
- }
+ }
return squashMap;
-}
-
-/**
- * \brief Drop edges from start to vertices that also have an edge from
- * startDs.
- *
- * Note that this also includes the (start, startDs) edge, which is not
- * necessary for actual NFA implementation (and is actually something we don't
- * want to affect state numbering, etc).
- */
-static
-void dropRedundantStartEdges(NGHolder &g) {
- remove_out_edge_if(g.start, [&](const NFAEdge &e) {
- return edge(g.startDs, target(e, g), g).second;
- }, g);
-
- // Ensure that we always remove (start, startDs), even if startDs has had
- // its self-loop removed as an optimization.
- remove_edge(g.start, g.startDs, g);
-}
-
-static
+}
+
+/**
+ * \brief Drop edges from start to vertices that also have an edge from
+ * startDs.
+ *
+ * Note that this also includes the (start, startDs) edge, which is not
+ * necessary for actual NFA implementation (and is actually something we don't
+ * want to affect state numbering, etc).
+ */
+static
+void dropRedundantStartEdges(NGHolder &g) {
+ remove_out_edge_if(g.start, [&](const NFAEdge &e) {
+ return edge(g.startDs, target(e, g), g).second;
+ }, g);
+
+ // Ensure that we always remove (start, startDs), even if startDs has had
+ // its self-loop removed as an optimization.
+ remove_edge(g.start, g.startDs, g);
+}
+
+static
CharReach calcTopVertexReach(const flat_set<u32> &tops,
const map<u32, CharReach> &top_reach) {
CharReach top_cr;
for (u32 t : tops) {
- if (contains(top_reach, t)) {
+ if (contains(top_reach, t)) {
top_cr |= top_reach.at(t);
- } else {
- top_cr = CharReach::dot();
+ } else {
+ top_cr = CharReach::dot();
break;
- }
+ }
}
return top_cr;
}
-
+
static
NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops,
const flat_set<NFAVertex> &succs,
const map<u32, CharReach> &top_reach) {
assert(!succs.empty());
assert(!tops.empty());
-
+
bool reporter = false;
-
+
NFAVertex u = add_vertex(g[g.start], g);
CharReach top_cr = calcTopVertexReach(tops, top_reach);
g[u].char_reach = top_cr;
-
+
for (auto v : succs) {
if (v == g.accept || v == g.acceptEod) {
reporter = true;
}
add_edge(u, v, g);
}
-
+
// Only retain reports (which we copied on add_vertex above) for new top
// vertices connected to accepts.
if (!reporter) {
g[u].reports.clear();
}
-
+
return u;
}
@@ -208,11 +208,11 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
if (best == top_succs.end()
|| it->second.size() < best->second.size()) {
best = it;
- }
- }
+ }
+ }
assert(best != top_succs.end());
assert(!best->second.empty()); /* should already been pruned */
-
+
*picked_tops = { best->first };
*picked_succs = best->second;
} else {
@@ -224,16 +224,16 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
|| (it->second.size() == best->second.size()
&& it->second < best->second)) {
best = it;
- }
- }
+ }
+ }
assert(best != succ_tops.end());
assert(!best->second.empty()); /* should already been pruned */
*picked_succs = { best->first };
*picked_tops = best->second;
- }
+ }
}
-
+
static
void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
const map<u32, flat_set<NFAVertex>> &top_succs,
@@ -462,153 +462,153 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
}
assert(unhandled_top_succs.empty());
- // We are completely replacing the start vertex, so clear its reports.
- clear_out_edges(g.start, g);
- add_edge(g.start, g.startDs, g);
- g[g.start].reports.clear();
-}
-
-static
-set<NFAVertex> findZombies(const NGHolder &h,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ // We are completely replacing the start vertex, so clear its reports.
+ clear_out_edges(g.start, g);
+ add_edge(g.start, g.startDs, g);
+ g[g.start].reports.clear();
+}
+
+static
+set<NFAVertex> findZombies(const NGHolder &h,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
const unordered_map<NFAVertex, u32> &state_ids,
- const CompileContext &cc) {
- set<NFAVertex> zombies;
- if (!cc.grey.allowZombies) {
- return zombies;
- }
-
- // We only use zombie masks in streaming mode.
- if (!cc.streaming) {
- return zombies;
- }
-
- if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
+ const CompileContext &cc) {
+ set<NFAVertex> zombies;
+ if (!cc.grey.allowZombies) {
+ return zombies;
+ }
+
+ // We only use zombie masks in streaming mode.
+ if (!cc.streaming) {
+ return zombies;
+ }
+
+ if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
DEBUG_PRINTF("cannot be made undead - bad reports\n");
- return zombies;
- }
-
- for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
- assert(h[u].reports.size() == 1);
- for (auto v : adjacent_vertices_range(u, h)) {
- if (edge(v, h.accept, h).second
- && h[v].char_reach.all()) {
- if (!contains(br_cyclic, v)) {
- goto ok;
- }
-
- const BoundedRepeatSummary &sum = br_cyclic.at(v);
-
- if (u == v && sum.repeatMax.is_infinite()) {
- goto ok;
- }
-
- }
- }
- DEBUG_PRINTF("does not go to dot accept\n");
- return zombies;
- ok:;
- }
-
- for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
- if (state_ids.at(v) != NO_STATE) {
- zombies.insert(v);
- }
- }
- return zombies;
-}
-
-static
+ return zombies;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
+ assert(h[u].reports.size() == 1);
+ for (auto v : adjacent_vertices_range(u, h)) {
+ if (edge(v, h.accept, h).second
+ && h[v].char_reach.all()) {
+ if (!contains(br_cyclic, v)) {
+ goto ok;
+ }
+
+ const BoundedRepeatSummary &sum = br_cyclic.at(v);
+
+ if (u == v && sum.repeatMax.is_infinite()) {
+ goto ok;
+ }
+
+ }
+ }
+ DEBUG_PRINTF("does not go to dot accept\n");
+ return zombies;
+ ok:;
+ }
+
+ for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
+ if (state_ids.at(v) != NO_STATE) {
+ zombies.insert(v);
+ }
+ }
+ return zombies;
+}
+
+static
void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
- vector<NFAVertex> ordering;
- for (auto &e : state_ids) {
- if (e.second == NO_STATE) {
- continue;
- }
- ordering.push_back(e.first);
- }
-
- // Sort in reverse order by state ID.
- sort(ordering.begin(), ordering.end(),
- [&state_ids](NFAVertex a, NFAVertex b) {
- return state_ids.at(a) > state_ids.at(b);
- });
-
- u32 stateNum = 0;
-
- for (const auto &v : ordering) {
- DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
- state_ids[v] = stateNum++;
- }
-}
-
-static
-map<u32, CharReach>
-findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
- map<u32, CharReach> top_reach;
-
- for (const auto &m : triggers) {
- const auto top = m.first;
- CharReach cr;
- for (const auto &trigger : m.second) {
- if (trigger.empty()) {
- // We don't know anything about this trigger. Assume it can
- // have any reach.
- cr.setall();
- break;
- }
- cr |= *trigger.rbegin();
- }
-
- top_reach.emplace(top, cr);
- }
-
- return top_reach;
-}
-
-static
-unique_ptr<NGHolder>
-prepareGraph(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- bool impl_test_only, const CompileContext &cc,
+ vector<NFAVertex> ordering;
+ for (auto &e : state_ids) {
+ if (e.second == NO_STATE) {
+ continue;
+ }
+ ordering.push_back(e.first);
+ }
+
+ // Sort in reverse order by state ID.
+ sort(ordering.begin(), ordering.end(),
+ [&state_ids](NFAVertex a, NFAVertex b) {
+ return state_ids.at(a) > state_ids.at(b);
+ });
+
+ u32 stateNum = 0;
+
+ for (const auto &v : ordering) {
+ DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
+ state_ids[v] = stateNum++;
+ }
+}
+
+static
+map<u32, CharReach>
+findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
+ map<u32, CharReach> top_reach;
+
+ for (const auto &m : triggers) {
+ const auto top = m.first;
+ CharReach cr;
+ for (const auto &trigger : m.second) {
+ if (trigger.empty()) {
+ // We don't know anything about this trigger. Assume it can
+ // have any reach.
+ cr.setall();
+ break;
+ }
+ cr |= *trigger.rbegin();
+ }
+
+ top_reach.emplace(top, cr);
+ }
+
+ return top_reach;
+}
+
+static
+unique_ptr<NGHolder>
+prepareGraph(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ bool impl_test_only, const CompileContext &cc,
unordered_map<NFAVertex, u32> &state_ids,
vector<BoundedRepeatData> &repeats,
map<u32, set<NFAVertex>> &tops) {
- assert(is_triggered(h_in) || fixed_depth_tops.empty());
-
- unique_ptr<NGHolder> h = cloneHolder(h_in);
-
- // Bounded repeat handling.
- analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
- impl_test_only, cc.grey);
-
- // If we're building a rose/suffix, do the top dance.
+ assert(is_triggered(h_in) || fixed_depth_tops.empty());
+
+ unique_ptr<NGHolder> h = cloneHolder(h_in);
+
+ // Bounded repeat handling.
+ analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
+ impl_test_only, cc.grey);
+
+ // If we're building a rose/suffix, do the top dance.
flat_set<NFAVertex> topVerts;
- if (is_triggered(*h)) {
- makeTopStates(*h, tops, findTopReach(triggers));
+ if (is_triggered(*h)) {
+ makeTopStates(*h, tops, findTopReach(triggers));
for (const auto &vv : tops | map_values) {
insert(&topVerts, vv);
}
- }
-
- dropRedundantStartEdges(*h);
-
- // Do state numbering
+ }
+
+ dropRedundantStartEdges(*h);
+
+ // Do state numbering
state_ids = numberStates(*h, topVerts);
-
- // In debugging, we sometimes like to reverse the state numbering to stress
- // the NFA construction code.
- if (cc.grey.numberNFAStatesWrong) {
- reverseStateOrdering(state_ids);
- }
-
- assert(sanityCheckGraph(*h, state_ids));
- return h;
-}
-
-static
+
+ // In debugging, we sometimes like to reverse the state numbering to stress
+ // the NFA construction code.
+ if (cc.grey.numberNFAStatesWrong) {
+ reverseStateOrdering(state_ids);
+ }
+
+ assert(sanityCheckGraph(*h, state_ids));
+ return h;
+}
+
+static
void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
for (const auto &v : vertices_range(h)) {
auto &reports = h[v].reports;
@@ -629,234 +629,234 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
static
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool do_accel, bool impl_test_only,
bool &fast, u32 hint, const CompileContext &cc) {
if (!has_managed_reports(h_in)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
-
- // Quick exit: if we've got an embarrassment of riches, i.e. more states
- // than we can implement in our largest NFA model, bail here.
+ unique_ptr<NGHolder> h
+ = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+
+ // Quick exit: if we've got an embarrassment of riches, i.e. more states
+ // than we can implement in our largest NFA model, bail here.
u32 numStates = countStates(state_ids);
- if (numStates > NFA_MAX_STATES) {
- DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
- return nullptr;
- }
-
- map<NFAVertex, BoundedRepeatSummary> br_cyclic;
- for (const auto &br : repeats) {
- br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
- }
-
+ if (numStates > NFA_MAX_STATES) {
+ DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
+ return nullptr;
+ }
+
+ map<NFAVertex, BoundedRepeatSummary> br_cyclic;
+ for (const auto &br : repeats) {
+ br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
+ }
+
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
-
- // build map of squashed and squashers
- if (cc.grey.squashNFA) {
+
+ // build map of squashed and squashers
+ if (cc.grey.squashNFA) {
squashMap = findSquashStates(*h, repeats);
-
- if (rm && cc.grey.highlanderSquash) {
- reportSquashMap = findHighlanderSquashers(*h, *rm);
- }
- }
-
- set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
-
+
+ if (rm && cc.grey.highlanderSquash) {
+ reportSquashMap = findHighlanderSquashers(*h, *rm);
+ }
+ }
+
+ set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
+
if (has_managed_reports(*h)) {
assert(rm);
remapReportsToPrograms(*h, *rm);
}
- if (!cc.streaming || !cc.grey.compressNFAState) {
- compress_state = false;
- }
-
- return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
+ if (!cc.streaming || !cc.grey.compressNFAState) {
+ compress_state = false;
+ }
+
+ return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, do_accel, compress_state, fast, hint, cc);
-}
-
+}
+
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool &fast, const CompileContext &cc) {
- const u32 hint = INVALID_NFA;
- const bool do_accel = cc.grey.accelerateNFA;
- const bool impl_test_only = false;
- return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
+ const u32 hint = INVALID_NFA;
+ const bool do_accel = cc.grey.accelerateNFA;
+ const bool impl_test_only = false;
+ return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
do_accel, impl_test_only, fast, hint, cc);
-}
-
-#ifndef RELEASE_BUILD
-// Variant that allows a hint to be specified.
+}
+
+#ifndef RELEASE_BUILD
+// Variant that allows a hint to be specified.
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool &fast, u32 hint, const CompileContext &cc) {
- const bool do_accel = cc.grey.accelerateNFA;
- const bool impl_test_only = false;
+ const bool do_accel = cc.grey.accelerateNFA;
+ const bool impl_test_only = false;
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
do_accel, impl_test_only, fast, hint, cc);
-}
-#endif // RELEASE_BUILD
-
-static
+}
+#endif // RELEASE_BUILD
+
+static
bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
const CompileContext &cc) {
- // Make a mutable copy of the graph that we can renumber etc.
- NGHolder h;
- cloneHolder(h, h_in);
- assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
-
- // Do state numbering.
+ // Make a mutable copy of the graph that we can renumber etc.
+ NGHolder h;
+ cloneHolder(h, h_in);
+ assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
+
+ // Do state numbering.
auto state_ids = numberStates(h, {});
-
- // Quick exit: if we've got an embarrassment of riches, i.e. more states
- // than we can implement in our largest NFA model, bail here.
+
+ // Quick exit: if we've got an embarrassment of riches, i.e. more states
+ // than we can implement in our largest NFA model, bail here.
u32 numStates = countStates(state_ids);
- if (numStates > NFA_MAX_STATES) {
- DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
- return nullptr;
- }
-
- assert(sanityCheckGraph(h, state_ids));
-
+ if (numStates > NFA_MAX_STATES) {
+ DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
+ return nullptr;
+ }
+
+ assert(sanityCheckGraph(h, state_ids));
+
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
- set<NFAVertex> zombies;
- vector<BoundedRepeatData> repeats;
+ set<NFAVertex> zombies;
+ vector<BoundedRepeatData> repeats;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
UNUSED bool fast = false;
-
- return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
+
+ return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, false, false, fast, hint, cc);
-}
-
+}
+
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
const CompileContext &cc) {
- u32 hint = INVALID_NFA; // no hint
- return constructReversedNFA_i(h_in, hint, cc);
-}
-
-#ifndef RELEASE_BUILD
-// Variant that allows a hint to be specified.
+ u32 hint = INVALID_NFA; // no hint
+ return constructReversedNFA_i(h_in, hint, cc);
+}
+
+#ifndef RELEASE_BUILD
+// Variant that allows a hint to be specified.
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
const CompileContext &cc) {
- return constructReversedNFA_i(h_in, hint, cc);
-}
-#endif // RELEASE_BUILD
-
-u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc) {
+ return constructReversedNFA_i(h_in, hint, cc);
+}
+#endif // RELEASE_BUILD
+
+u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc) {
if (!cc.grey.allowLimExNFA) {
return false;
}
assert(!can_never_match(g));
- // Quick check: we can always implement an NFA with less than NFA_MAX_STATES
- // states. Note that top masks can generate extra states, so we account for
- // those here too.
+ // Quick check: we can always implement an NFA with less than NFA_MAX_STATES
+ // states. Note that top masks can generate extra states, so we account for
+ // those here too.
if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) {
- return true;
- }
-
+ return true;
+ }
+
if (!has_managed_reports(g)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
- // The BEST way to tell if an NFA is implementable is to implement it!
- const bool impl_test_only = true;
- const map<u32, u32> fixed_depth_tops; // empty
- const map<u32, vector<vector<CharReach>>> triggers; // empty
-
- /* Perform the first part of the construction process and see if the
- * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
- * implement it as an NFA. */
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
+ // The BEST way to tell if an NFA is implementable is to implement it!
+ const bool impl_test_only = true;
+ const map<u32, u32> fixed_depth_tops; // empty
+ const map<u32, vector<vector<CharReach>>> triggers; // empty
+
+ /* Perform the first part of the construction process and see if the
+ * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
+ * implement it as an NFA. */
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
- assert(h);
+ unique_ptr<NGHolder> h
+ = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+ assert(h);
u32 numStates = countStates(state_ids);
- if (numStates <= NFA_MAX_STATES) {
- return numStates;
- }
-
- return 0;
-}
-
-void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
- const CompileContext &cc) {
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- reduceGraphEquivalences(g, cc);
-
- removeRedundancy(g, som);
-
+ if (numStates <= NFA_MAX_STATES) {
+ return numStates;
+ }
+
+ return 0;
+}
+
+void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
+ const CompileContext &cc) {
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ reduceGraphEquivalences(g, cc);
+
+ removeRedundancy(g, som);
+
if (rm && has_managed_reports(g)) {
- pruneHighlanderDominated(g, *rm);
- }
-
- if (!isImplementableNFA(g, rm, cc)) {
- DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- }
-}
-
-u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc) {
+ pruneHighlanderDominated(g, *rm);
+ }
+
+ if (!isImplementableNFA(g, rm, cc)) {
+ DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ }
+}
+
+u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc) {
if (!has_managed_reports(g)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
- const bool impl_test_only = true;
- const map<u32, u32> fixed_depth_tops; // empty
- const map<u32, vector<vector<CharReach>>> triggers; // empty
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
+ const bool impl_test_only = true;
+ const map<u32, u32> fixed_depth_tops; // empty
+ const map<u32, vector<vector<CharReach>>> triggers; // empty
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
-
+ unique_ptr<NGHolder> h
+ = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+
if (!h || countStates(state_ids) > NFA_MAX_STATES) {
- DEBUG_PRINTF("not constructible\n");
- return NFA_MAX_ACCEL_STATES + 1;
- }
-
- assert(h->kind == g.kind);
-
- // Should have no bearing on accel calculation, so we leave these empty.
- const set<NFAVertex> zombies;
+ DEBUG_PRINTF("not constructible\n");
+ return NFA_MAX_ACCEL_STATES + 1;
+ }
+
+ assert(h->kind == g.kind);
+
+ // Should have no bearing on accel calculation, so we leave these empty.
+ const set<NFAVertex> zombies;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
-
- return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
- tops, zombies, cc);
-}
-
-} // namespace ue2
+
+ return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
+ tops, zombies, cc);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
index 7eba2eff06..58a05ecb3e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
@@ -1,147 +1,147 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Limex NFA construction code.
- */
-
-#ifndef NG_LIMEX_H
-#define NG_LIMEX_H
-
-#include "ue2common.h"
-#include "som/som.h"
+ * \brief Limex NFA construction code.
+ */
+
+#ifndef NG_LIMEX_H
+#define NG_LIMEX_H
+
+#include "ue2common.h"
+#include "som/som.h"
#include "util/bytecode_ptr.h"
-
-#include <map>
-#include <memory>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NG;
-class NGHolder;
-class ReportManager;
-struct CompileContext;
-
+
+#include <map>
+#include <memory>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NG;
+class NGHolder;
+class ReportManager;
+struct CompileContext;
+
/**
* \brief Determine if the given graph is implementable as an NFA.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- */
-u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc);
-
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ */
+u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc);
+
/**
* \brief Late-stage graph reductions.
- *
- * This will call \ref removeRedundancy and apply its changes to the given
+ *
+ * This will call \ref removeRedundancy and apply its changes to the given
* holder only if it is implementable afterwards.
*/
void reduceImplementableGraph(NGHolder &g, som_type som,
const ReportManager *rm,
- const CompileContext &cc);
-
-/**
- * \brief For a given graph, count the number of accel states it will have in
- * an implementation.
- *
- * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
- * implementation would not be constructible.
- */
-u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc);
-
+ const CompileContext &cc);
+
+/**
+ * \brief For a given graph, count the number of accel states it will have in
+ * an implementation.
+ *
+ * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
+ * implementation would not be constructible.
+ */
+u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc);
+
/**
* \brief Construct an NFA from the given graph.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- *
- * Note: this variant of the function allows a model to be specified with the
- * \a hint parameter.
- */
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ *
+ * Note: this variant of the function allows a model to be specified with the
+ * \a hint parameter.
+ */
bytecode_ptr<NFA>
-constructNFA(const NGHolder &g, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &g, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
bool compress_state, bool &fast, const CompileContext &cc);
-
+
/**
* \brief Build a reverse NFA from the graph given, which should have already
- * been reversed.
- *
- * Used for reverse NFAs used in SOM mode.
- */
+ * been reversed.
+ *
+ * Used for reverse NFAs used in SOM mode.
+ */
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h,
const CompileContext &cc);
-
-#ifndef RELEASE_BUILD
-
+
+#ifndef RELEASE_BUILD
+
/**
* \brief Construct an NFA (with model type hint) from the given graph.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- *
- * Note: this variant of the function allows a model to be specified with the
- * \a hint parameter.
- */
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ *
+ * Note: this variant of the function allows a model to be specified with the
+ * \a hint parameter.
+ */
bytecode_ptr<NFA>
-constructNFA(const NGHolder &g, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &g, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
bool compress_state, bool &fast, u32 hint, const CompileContext &cc);
-
+
/**
* \brief Build a reverse NFA (with model type hint) from the graph given,
- * which should have already been reversed.
- *
- * Used for reverse NFAs used in SOM mode.
- */
+ * which should have already been reversed.
+ *
+ * Used for reverse NFAs used in SOM mode.
+ */
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint,
const CompileContext &cc);
-
-#endif // RELEASE_BUILD
-
-} // namespace ue2
-
-#endif // NG_METEOR_H
+
+#endif // RELEASE_BUILD
+
+} // namespace ue2
+
+#endif // NG_METEOR_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
index f1f829f2c1..ca393131bc 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
@@ -1,141 +1,141 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA acceleration analysis code.
- */
-#include "ng_limex_accel.h"
-
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_util.h"
-#include "ue2common.h"
-
-#include "nfa/accel.h"
-
-#include "util/bitutils.h" // for CASE_CLEAR
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA acceleration analysis code.
+ */
+#include "ng_limex_accel.h"
+
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_util.h"
+#include "ue2common.h"
+
+#include "nfa/accel.h"
+
+#include "util/bitutils.h" // for CASE_CLEAR
+#include "util/charreach.h"
#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/small_vector.h"
#include "util/target_info.h"
-
-#include <algorithm>
-#include <map>
-
+
+#include <algorithm>
+#include <map>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-#define WIDE_FRIEND_MIN 200
-
-static
-void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
- const flat_set<NFAVertex> &cands,
- const flat_set<NFAVertex> &preds,
- flat_set<NFAVertex> *next_cands,
- flat_set<NFAVertex> *next_preds,
- flat_set<NFAVertex> *friends) {
- for (auto v : cands) {
- if (contains(preds, v)) {
- continue;
- }
-
- const CharReach &acr = g[v].char_reach;
+
+namespace ue2 {
+
+#define WIDE_FRIEND_MIN 200
+
+static
+void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
+ const flat_set<NFAVertex> &cands,
+ const flat_set<NFAVertex> &preds,
+ flat_set<NFAVertex> *next_cands,
+ flat_set<NFAVertex> *next_preds,
+ flat_set<NFAVertex> *friends) {
+ for (auto v : cands) {
+ if (contains(preds, v)) {
+ continue;
+ }
+
+ const CharReach &acr = g[v].char_reach;
DEBUG_PRINTF("checking %zu\n", g[v].index);
-
- if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
- DEBUG_PRINTF("bad reach %zu\n", acr.count());
- continue;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!contains(preds, u)) {
- DEBUG_PRINTF("bad pred\n");
- goto next_cand;
- }
- }
-
- next_preds->insert(v);
- insert(next_cands, adjacent_vertices(v, g));
-
+
+ if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("bad reach %zu\n", acr.count());
+ continue;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!contains(preds, u)) {
+ DEBUG_PRINTF("bad pred\n");
+ goto next_cand;
+ }
+ }
+
+ next_preds->insert(v);
+ insert(next_cands, adjacent_vertices(v, g));
+
DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
- friends->insert(v);
- next_cand:;
- }
-}
-
-void findAccelFriends(const NGHolder &g, NFAVertex v,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- u32 offset, flat_set<NFAVertex> *friends) {
- /* A friend of an accel state is a successor state which can only be on when
- * the accel is on. This requires that it has a subset of the accel state's
- * preds and a charreach which is a subset of the accel state.
- *
- * A friend can be safely ignored when accelerating provided there is
- * sufficient back-off. A friend is useful if it has a wide reach.
- */
-
- /* BR cyclic states which may go stale cannot have friends as they may
- * suddenly turn off leading their so-called friends stranded and alone.
- * TODO: restrict to only stale going BR cyclics
- */
- if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
- return;
- }
-
- u32 friend_depth = offset + 1;
-
- flat_set<NFAVertex> preds;
- insert(&preds, inv_adjacent_vertices(v, g));
- const CharReach &cr = g[v].char_reach;
-
- flat_set<NFAVertex> cands;
- insert(&cands, adjacent_vertices(v, g));
-
- flat_set<NFAVertex> next_preds;
- flat_set<NFAVertex> next_cands;
- for (u32 i = 0; i < friend_depth; i++) {
- findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
- friends);
- preds.insert(next_preds.begin(), next_preds.end());
- next_preds.clear();
- cands.swap(next_cands);
- next_cands.clear();
- }
-}
-
-static
+ friends->insert(v);
+ next_cand:;
+ }
+}
+
+void findAccelFriends(const NGHolder &g, NFAVertex v,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ u32 offset, flat_set<NFAVertex> *friends) {
+ /* A friend of an accel state is a successor state which can only be on when
+ * the accel is on. This requires that it has a subset of the accel state's
+ * preds and a charreach which is a subset of the accel state.
+ *
+ * A friend can be safely ignored when accelerating provided there is
+ * sufficient back-off. A friend is useful if it has a wide reach.
+ */
+
+ /* BR cyclic states which may go stale cannot have friends as they may
+ * suddenly turn off leading their so-called friends stranded and alone.
+ * TODO: restrict to only stale going BR cyclics
+ */
+ if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
+ return;
+ }
+
+ u32 friend_depth = offset + 1;
+
+ flat_set<NFAVertex> preds;
+ insert(&preds, inv_adjacent_vertices(v, g));
+ const CharReach &cr = g[v].char_reach;
+
+ flat_set<NFAVertex> cands;
+ insert(&cands, adjacent_vertices(v, g));
+
+ flat_set<NFAVertex> next_preds;
+ flat_set<NFAVertex> next_cands;
+ for (u32 i = 0; i < friend_depth; i++) {
+ findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
+ friends);
+ preds.insert(next_preds.begin(), next_preds.end());
+ next_preds.clear();
+ cands.swap(next_cands);
+ next_cands.clear();
+ }
+}
+
+static
void findPaths(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,
vector<vector<CharReach>> *paths,
@@ -149,30 +149,30 @@ void findPaths(const NGHolder &g, NFAVertex v,
paths->push_back({});
if (!generates_callbacks(g) || v == g.acceptEod) {
paths->back().push_back(CharReach()); /* red tape options */
- }
+ }
return;
- }
-
+ }
+
/* for the escape 'literals' we want to use the minimal cr so we
* can be more selective */
const CharReach &cr = refined_cr[g[v].index];
-
+
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|| hasSelfLoop(v, g)) {
/* give up on pushing past this point */
paths->push_back({cr});
return;
- }
-
+ }
+
vector<vector<CharReach>> curr;
- for (auto w : adjacent_vertices_range(v, g)) {
+ for (auto w : adjacent_vertices_range(v, g)) {
if (contains(forbidden, w)) {
/* path has looped back to one of the active+boring acceleration
* states. We can ignore this path if we have sufficient back-
* off. */
paths->push_back({cr});
- continue;
- }
+ continue;
+ }
u32 new_depth = depth - 1;
do {
@@ -183,55 +183,55 @@ void findPaths(const NGHolder &g, NFAVertex v,
for (auto &c : curr) {
c.push_back(cr);
paths->push_back(std::move(c));
- }
- }
-}
-
+ }
+ }
+}
+
namespace {
struct SAccelScheme {
SAccelScheme(CharReach cr_in, u32 offset_in)
: cr(std::move(cr_in)), offset(offset_in) {
assert(offset <= MAX_ACCEL_DEPTH);
- }
-
+ }
+
SAccelScheme() {}
-
+
bool operator<(const SAccelScheme &b) const {
const SAccelScheme &a = *this;
const size_t a_count = cr.count(), b_count = b.cr.count();
if (a_count != b_count) {
return a_count < b_count;
- }
-
+ }
+
/* TODO: give bonus if one is a 'caseless' character */
ORDER_CHECK(offset);
ORDER_CHECK(cr);
- return false;
- }
-
+ return false;
+ }
+
CharReach cr = CharReach::dot();
u32 offset = MAX_ACCEL_DEPTH + 1;
};
-}
-
+}
+
/**
* \brief Limit on the number of (recursive) calls to findBestInternal().
*/
static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
-static
+static
void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
vector<vector<CharReach>>::const_iterator pe,
size_t *num_calls, const SAccelScheme &curr,
SAccelScheme *best) {
assert(curr.offset <= MAX_ACCEL_DEPTH);
-
+
if (++(*num_calls) > MAX_FINDBEST_CALLS) {
DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
return;
- }
-
+ }
+
DEBUG_PRINTF("paths left %zu\n", pe - pb);
if (pb == pe) {
if (curr < *best) {
@@ -241,10 +241,10 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
best->offset);
}
return;
- }
-
+ }
+
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
-
+
small_vector<SAccelScheme, 10> priority_path;
priority_path.reserve(pb->size());
u32 i = 0;
@@ -255,8 +255,8 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
continue;
}
priority_path.push_back(move(as));
- }
-
+ }
+
sort(priority_path.begin(), priority_path.end());
for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
auto jt = next(it);
@@ -267,9 +267,9 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
}
priority_path.erase(next(it), jt);
DEBUG_PRINTF("||%zu\n", it->cr.count());
- }
+ }
DEBUG_PRINTF("---\n");
-
+
for (const SAccelScheme &in : priority_path) {
DEBUG_PRINTF("in: count %zu\n", in.cr.count());
if (*best < in) {
@@ -277,14 +277,14 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
continue;
}
findBestInternal(pb + 1, pe, num_calls, in, best);
-
+
if (curr.cr == best->cr) {
return; /* could only get better by offset */
}
- }
-}
-
-static
+ }
+}
+
+static
SAccelScheme findBest(const vector<vector<CharReach>> &paths,
const CharReach &terminating) {
SAccelScheme curr(terminating, 0U);
@@ -296,52 +296,52 @@ SAccelScheme findBest(const vector<vector<CharReach>> &paths,
best.cr.count(), describeClass(best.cr).c_str(), best.offset);
return best;
}
-
+
namespace {
struct DAccelScheme {
DAccelScheme(CharReach cr_in, u32 offset_in)
: double_cr(std::move(cr_in)), double_offset(offset_in) {
assert(double_offset <= MAX_ACCEL_DEPTH);
}
-
+
bool operator<(const DAccelScheme &b) const {
const DAccelScheme &a = *this;
-
+
size_t a_dcount = a.double_cr.count();
size_t b_dcount = b.double_cr.count();
-
+
assert(!a.double_byte.empty() || a_dcount || a.double_offset);
assert(!b.double_byte.empty() || b_dcount || b.double_offset);
-
+
if (a_dcount != b_dcount) {
return a_dcount < b_dcount;
}
-
+
if (!a_dcount) {
bool cd_a = buildDvermMask(a.double_byte);
bool cd_b = buildDvermMask(b.double_byte);
if (cd_a != cd_b) {
return cd_a > cd_b;
- }
- }
-
+ }
+ }
+
ORDER_CHECK(double_byte.size());
ORDER_CHECK(double_offset);
-
+
/* TODO: give bonus if one is a 'caseless' character */
ORDER_CHECK(double_byte);
ORDER_CHECK(double_cr);
-
+
return false;
- }
-
+ }
+
flat_set<pair<u8, u8>> double_byte;
CharReach double_cr;
u32 double_offset = 0;
};
-}
-
-static
+}
+
+static
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
const CharReach &cr_2_in, u32 offset_in) {
cr_1 &= ~as.double_cr;
@@ -352,29 +352,29 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
DEBUG_PRINTF("empty first element\n");
ENSURE_AT_LEAST(&as.double_offset, offset);
return as;
- }
+ }
if (cr_2_in != cr_2 || cr_2.none()) {
offset = offset_in + 1;
- }
-
+ }
+
size_t two_count = cr_1.count() * cr_2.count();
-
+
DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
if (!two_count) {
DEBUG_PRINTF("empty element\n");
ENSURE_AT_LEAST(&as.double_offset, offset);
return as;
- }
-
+ }
+
if (two_count > DOUBLE_SHUFTI_LIMIT) {
if (cr_2.count() < cr_1.count()) {
as.double_cr |= cr_2;
offset = offset_in + 1;
} else {
as.double_cr |= cr_1;
- }
+ }
} else {
for (auto i = cr_1.find_first(); i != CharReach::npos;
i = cr_1.find_next(i)) {
@@ -382,145 +382,145 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
j = cr_2.find_next(j)) {
as.double_byte.emplace(i, j);
}
- }
- }
-
+ }
+ }
+
ENSURE_AT_LEAST(&as.double_offset, offset);
DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
as.double_byte.size(), as.double_cr.count(), as.double_offset);
return as;
-}
-
-static
+}
+
+static
void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
- vector<vector<CharReach> >::const_iterator pe,
+ vector<vector<CharReach> >::const_iterator pe,
const DAccelScheme &curr, DAccelScheme *best) {
assert(curr.double_offset <= MAX_ACCEL_DEPTH);
- DEBUG_PRINTF("paths left %zu\n", pe - pb);
+ DEBUG_PRINTF("paths left %zu\n", pe - pb);
DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
curr.double_byte.size(), curr.double_cr.count(),
curr.double_offset);
- if (pb == pe) {
+ if (pb == pe) {
if (curr < *best) {
*best = curr;
DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
best->double_byte.size(), best->double_cr.count(),
best->double_offset);
}
- return;
- }
-
- DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
-
+ return;
+ }
+
+ DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
+
small_vector<DAccelScheme, 10> priority_path;
priority_path.reserve(pb->size());
- u32 i = 0;
+ u32 i = 0;
for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
- ++p, i++) {
+ ++p, i++) {
DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
if (*best < as) {
DEBUG_PRINTF("worse\n");
continue;
}
priority_path.push_back(move(as));
- }
-
- sort(priority_path.begin(), priority_path.end());
+ }
+
+ sort(priority_path.begin(), priority_path.end());
DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
best->double_byte.size(), best->double_cr.count(),
best->double_offset);
-
+
for (const DAccelScheme &in : priority_path) {
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
in.double_byte.size(), in.double_cr.count(),
in.double_offset);
if (*best < in) {
- DEBUG_PRINTF("worse\n");
- continue;
- }
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
findDoubleBest(pb + 1, pe, in, best);
- }
-}
-
-#ifdef DEBUG
-static
+ }
+}
+
+#ifdef DEBUG
+static
void dumpPaths(const vector<vector<CharReach>> &paths) {
for (const auto &path : paths) {
- DEBUG_PRINTF("path: [");
+ DEBUG_PRINTF("path: [");
for (const auto &cr : path) {
- printf(" [");
+ printf(" [");
describeClass(stdout, cr, 20, CC_OUT_TEXT);
- printf("]");
- }
- printf(" ]\n");
- }
-}
-#endif
-
-static
+ printf("]");
+ }
+ printf(" ]\n");
+ }
+}
+#endif
+
+static
void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
- /* paths segments which are a superset of an earlier segment should never be
- * picked as an acceleration segment -> to improve processing just replace
- * with dot */
+ /* paths segments which are a superset of an earlier segment should never be
+ * picked as an acceleration segment -> to improve processing just replace
+ * with dot */
for (auto &p : paths) {
for (auto it = p.begin(); it != p.end(); ++it) {
for (auto jt = next(it); jt != p.end(); ++jt) {
- if (it->isSubsetOf(*jt)) {
- *jt = CharReach::dot();
- }
- }
- }
- }
-}
-
-static
+ if (it->isSubsetOf(*jt)) {
+ *jt = CharReach::dot();
+ }
+ }
+ }
+ }
+}
+
+static
void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
- /* try to unify paths which only differ in the last segment */
+ /* try to unify paths which only differ in the last segment */
for (vector<vector<CharReach> >::iterator p = paths.begin();
p != paths.end() && p + 1 != paths.end();) {
- vector<CharReach> &a = *p;
- vector<CharReach> &b = *(p + 1);
-
- if (a.size() != b.size()) {
- ++p;
- continue;
- }
-
- u32 i = 0;
- for (; i < a.size() - 1; i++) {
- if (a[i] != b[i]) {
- break;
- }
- }
- if (i == a.size() - 1) {
- /* we can unify these paths */
- a[i] |= b[i];
+ vector<CharReach> &a = *p;
+ vector<CharReach> &b = *(p + 1);
+
+ if (a.size() != b.size()) {
+ ++p;
+ continue;
+ }
+
+ u32 i = 0;
+ for (; i < a.size() - 1; i++) {
+ if (a[i] != b[i]) {
+ break;
+ }
+ }
+ if (i == a.size() - 1) {
+ /* we can unify these paths */
+ a[i] |= b[i];
paths.erase(p + 1);
- } else {
- ++p;
- }
- }
-}
-
-static
+ } else {
+ ++p;
+ }
+ }
+}
+
+static
void improvePaths(vector<vector<CharReach> > &paths) {
-#ifdef DEBUG
- DEBUG_PRINTF("orig paths\n");
+#ifdef DEBUG
+ DEBUG_PRINTF("orig paths\n");
dumpPaths(paths);
-#endif
- blowoutPathsLessStrictSegment(paths);
-
+#endif
+ blowoutPathsLessStrictSegment(paths);
+
sort(paths.begin(), paths.end());
-
- unifyPathsLastSegment(paths);
-
-#ifdef DEBUG
- DEBUG_PRINTF("opt paths\n");
+
+ unifyPathsLastSegment(paths);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("opt paths\n");
dumpPaths(paths);
-#endif
-}
-
+#endif
+}
+
#define MAX_DOUBLE_ACCEL_PATHS 10
static
@@ -611,227 +611,227 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
return rv;
}
-AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
- const vector<CharReach> &refined_cr,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
+ const vector<CharReach> &refined_cr,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
bool allow_wide, bool look_for_double_byte) {
- CharReach terminating;
- for (auto v : verts) {
- if (!hasSelfLoop(v, g)) {
- DEBUG_PRINTF("no self loop\n");
- return AccelScheme(); /* invalid scheme */
- }
-
- // check that this state is reachable on most characters
- terminating |= ~g[v].char_reach;
- }
-
- DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
- size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
- : ACCEL_MAX_STOP_CHAR;
- if (terminating.count() > limit) {
- return AccelScheme(); /* invalid scheme */
- }
-
+ CharReach terminating;
+ for (auto v : verts) {
+ if (!hasSelfLoop(v, g)) {
+ DEBUG_PRINTF("no self loop\n");
+ return AccelScheme(); /* invalid scheme */
+ }
+
+ // check that this state is reachable on most characters
+ terminating |= ~g[v].char_reach;
+ }
+
+ DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
+ size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
+ : ACCEL_MAX_STOP_CHAR;
+ if (terminating.count() > limit) {
+ return AccelScheme(); /* invalid scheme */
+ }
+
vector<vector<CharReach>> paths;
- flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
-
- /* Note: we can not in general (TODO: ignore when possible) ignore entries
- * into the bounded repeat cyclic states as that is when the magic happens
- */
+ flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
+
+ /* Note: we can not in general (TODO: ignore when possible) ignore entries
+ * into the bounded repeat cyclic states as that is when the magic happens
+ */
for (auto v : br_cyclic | map_keys) {
- /* TODO: can allow if repeatMin <= 1 ? */
+ /* TODO: can allow if repeatMin <= 1 ? */
ignore_vert_set.erase(v);
- }
-
- for (auto v : verts) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v) {
- findPaths(g, w, refined_cr, &paths, ignore_vert_set,
- MAX_ACCEL_DEPTH);
- }
- }
- }
-
- /* paths built wrong: reverse them */
+ }
+
+ for (auto v : verts) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v) {
+ findPaths(g, w, refined_cr, &paths, ignore_vert_set,
+ MAX_ACCEL_DEPTH);
+ }
+ }
+ }
+
+ /* paths built wrong: reverse them */
for (auto &path : paths) {
reverse(path.begin(), path.end());
- }
-
+ }
+
return findBestAccelScheme(std::move(paths), terminating,
look_for_double_byte);
-}
-
-NFAVertex get_sds_or_proxy(const NGHolder &g) {
- DEBUG_PRINTF("looking for sds proxy\n");
- if (proper_out_degree(g.startDs, g)) {
- return g.startDs;
- }
-
+}
+
+NFAVertex get_sds_or_proxy(const NGHolder &g) {
+ DEBUG_PRINTF("looking for sds proxy\n");
+ if (proper_out_degree(g.startDs, g)) {
+ return g.startDs;
+ }
+
NFAVertex v = NGHolder::null_vertex();
- for (auto w : adjacent_vertices_range(g.start, g)) {
- if (w != g.startDs) {
- if (!v) {
- v = w;
- } else {
- return g.startDs;
- }
- }
- }
-
- if (!v) {
- return g.startDs;
- }
-
- while (true) {
- if (hasSelfLoop(v, g)) {
+ for (auto w : adjacent_vertices_range(g.start, g)) {
+ if (w != g.startDs) {
+ if (!v) {
+ v = w;
+ } else {
+ return g.startDs;
+ }
+ }
+ }
+
+ if (!v) {
+ return g.startDs;
+ }
+
+ while (true) {
+ if (hasSelfLoop(v, g)) {
DEBUG_PRINTF("woot %zu\n", g[v].index);
- return v;
- }
- if (out_degree(v, g) != 1) {
- break;
- }
- NFAVertex u = getSoleDestVertex(g, v);
- if (!g[u].char_reach.all()) {
- break;
- }
- v = u;
- }
-
- return g.startDs;
-}
-
-/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
-bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
- const vector<CharReach> &refined_cr,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- AccelScheme *as, bool allow_wide) {
- // For a state to be accelerable, our current criterion is that it be a
- // large character class with a self-loop and narrow set of possible other
- // successors (i.e. no special successors, union of successor reachability
- // is small).
- if (!hasSelfLoop(v, g)) {
- return false;
- }
-
- // check that this state is reachable on most characters
- /* we want to use the maximal reach here (in the graph) */
- CharReach terminating = g[v].char_reach;
- terminating.flip();
-
+ return v;
+ }
+ if (out_degree(v, g) != 1) {
+ break;
+ }
+ NFAVertex u = getSoleDestVertex(g, v);
+ if (!g[u].char_reach.all()) {
+ break;
+ }
+ v = u;
+ }
+
+ return g.startDs;
+}
+
+/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
+bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
+ const vector<CharReach> &refined_cr,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ AccelScheme *as, bool allow_wide) {
+ // For a state to be accelerable, our current criterion is that it be a
+ // large character class with a self-loop and narrow set of possible other
+ // successors (i.e. no special successors, union of successor reachability
+ // is small).
+ if (!hasSelfLoop(v, g)) {
+ return false;
+ }
+
+ // check that this state is reachable on most characters
+ /* we want to use the maximal reach here (in the graph) */
+ CharReach terminating = g[v].char_reach;
+ terminating.flip();
+
DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
- g[v].index, terminating.count(),
- allow_wide ? " (w)" : "");
-
- size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
- : ACCEL_MAX_STOP_CHAR;
- if (terminating.count() > limit) {
- DEBUG_PRINTF("too leaky\n");
- return false;
- }
-
- flat_set<NFAVertex> curr, next;
-
- insert(&curr, adjacent_vertices(v, g));
- curr.erase(v); // erase self-loop
-
- // We consider offsets of zero through three; this is fairly arbitrary at
- // present and could probably be increased (FIXME)
- /* WARNING: would/could do horrible things to compile time */
- bool stop = false;
- vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
- unsigned int depth;
- for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
- CharReach &cr = depthReach[depth];
- for (auto t : curr) {
- if (is_special(t, g)) {
- // We've bumped into the edge of the graph, so we should stop
- // searching.
- // Exception: iff our cyclic state is not a dot, than we can
- // safely accelerate towards an EOD accept.
-
- /* Exception: nfas that don't generate callbacks so accepts are
- * fine too */
- if (t == g.accept && !generates_callbacks(g)) {
- stop = true; // don't search beyond this depth
- continue;
- } else if (t == g.accept) {
- goto depth_done;
- }
-
- assert(t == g.acceptEod);
- stop = true; // don't search beyond this depth
- } else {
- // Non-special vertex
- insert(&next, adjacent_vertices(t, g));
- /* for the escape 'literals' we want to use the minimal cr so we
- * can be more selective */
- cr |= refined_cr[g[t].index];
- }
- }
-
- cr |= terminating;
- DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
-
- curr.swap(next);
- next.clear();
- }
-
-depth_done:
-
- if (depth == 0) {
- return false;
- }
-
- DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
-
- /* Look for the most awesome acceleration evar */
- for (unsigned int i = 0; i < depth; i++) {
- if (depthReach[i].none()) {
- DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
+ g[v].index, terminating.count(),
+ allow_wide ? " (w)" : "");
+
+ size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
+ : ACCEL_MAX_STOP_CHAR;
+ if (terminating.count() > limit) {
+ DEBUG_PRINTF("too leaky\n");
+ return false;
+ }
+
+ flat_set<NFAVertex> curr, next;
+
+ insert(&curr, adjacent_vertices(v, g));
+ curr.erase(v); // erase self-loop
+
+ // We consider offsets of zero through three; this is fairly arbitrary at
+ // present and could probably be increased (FIXME)
+ /* WARNING: would/could do horrible things to compile time */
+ bool stop = false;
+ vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
+ unsigned int depth;
+ for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
+ CharReach &cr = depthReach[depth];
+ for (auto t : curr) {
+ if (is_special(t, g)) {
+ // We've bumped into the edge of the graph, so we should stop
+ // searching.
+ // Exception: iff our cyclic state is not a dot, than we can
+ // safely accelerate towards an EOD accept.
+
+ /* Exception: nfas that don't generate callbacks so accepts are
+ * fine too */
+ if (t == g.accept && !generates_callbacks(g)) {
+ stop = true; // don't search beyond this depth
+ continue;
+ } else if (t == g.accept) {
+ goto depth_done;
+ }
+
+ assert(t == g.acceptEod);
+ stop = true; // don't search beyond this depth
+ } else {
+ // Non-special vertex
+ insert(&next, adjacent_vertices(t, g));
+ /* for the escape 'literals' we want to use the minimal cr so we
+ * can be more selective */
+ cr |= refined_cr[g[t].index];
+ }
+ }
+
+ cr |= terminating;
+ DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
+
+ curr.swap(next);
+ next.clear();
+ }
+
+depth_done:
+
+ if (depth == 0) {
+ return false;
+ }
+
+ DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
+
+ /* Look for the most awesome acceleration evar */
+ for (unsigned int i = 0; i < depth; i++) {
+ if (depthReach[i].none()) {
+ DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
*as = AccelScheme();
as->offset = i;
as->cr = CharReach();
- return true;
- }
- }
-
- // First, loop over our depths and see if we have a suitable 2-byte
- // caseful vermicelli option: this is the (second) fastest accel we have
- if (depth > 1) {
- for (unsigned int i = 0; i < (depth - 1); i++) {
- const CharReach &cra = depthReach[i];
- const CharReach &crb = depthReach[i + 1];
- if ((cra.count() == 1 && crb.count() == 1)
- || (cra.count() == 2 && crb.count() == 2
- && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
- DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
+ return true;
+ }
+ }
+
+ // First, loop over our depths and see if we have a suitable 2-byte
+ // caseful vermicelli option: this is the (second) fastest accel we have
+ if (depth > 1) {
+ for (unsigned int i = 0; i < (depth - 1); i++) {
+ const CharReach &cra = depthReach[i];
+ const CharReach &crb = depthReach[i + 1];
+ if ((cra.count() == 1 && crb.count() == 1)
+ || (cra.count() == 2 && crb.count() == 2
+ && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
+ DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
*as = AccelScheme();
as->offset = i;
- return true;
- }
- }
- }
-
- // Second option: a two-byte shufti (i.e. less than eight 2-byte
- // literals)
- if (depth > 1) {
- for (unsigned int i = 0; i < (depth - 1); i++) {
+ return true;
+ }
+ }
+ }
+
+ // Second option: a two-byte shufti (i.e. less than eight 2-byte
+ // literals)
+ if (depth > 1) {
+ for (unsigned int i = 0; i < (depth - 1); i++) {
if (depthReach[i].count() * depthReach[i+1].count()
<= DOUBLE_SHUFTI_LIMIT) {
- DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
+ DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
*as = AccelScheme();
as->offset = i;
- return true;
- }
- }
- }
-
+ return true;
+ }
+ }
+ }
+
// Look for offset accel schemes verm/shufti;
- vector<NFAVertex> verts(1, v);
+ vector<NFAVertex> verts(1, v);
*as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
- DEBUG_PRINTF("as width %zu\n", as->cr.count());
- return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("as width %zu\n", as->cr.count());
+ return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
index f6f7f1b3cb..766cfabbe6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA acceleration analysis code.
- */
-
-#ifndef NG_LIMEX_ACCEL_H
-#define NG_LIMEX_ACCEL_H
-
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA acceleration analysis code.
+ */
+
+#ifndef NG_LIMEX_ACCEL_H
+#define NG_LIMEX_ACCEL_H
+
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ue2common.h"
#include "nfa/accelcompile.h"
#include "util/accel_scheme.h"
-#include "util/charreach.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
-#include "util/order_check.h"
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-/* compile time accel defs */
-#define MAX_MERGED_ACCEL_STOPS 200
-#define ACCEL_MAX_STOP_CHAR 24
-#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
-
+#include "util/order_check.h"
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+/* compile time accel defs */
+#define MAX_MERGED_ACCEL_STOPS 200
+#define ACCEL_MAX_STOP_CHAR 24
+#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
+
// forward-declaration of CompileContext
struct CompileContext;
-void findAccelFriends(const NGHolder &g, NFAVertex v,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+void findAccelFriends(const NGHolder &g, NFAVertex v,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
u32 offset, flat_set<NFAVertex> *friends);
-
+
#define DOUBLE_SHUFTI_LIMIT 20
-
-NFAVertex get_sds_or_proxy(const NGHolder &g);
-
-AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
- const std::vector<CharReach> &refined_cr,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+
+NFAVertex get_sds_or_proxy(const NGHolder &g);
+
+AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
+ const std::vector<CharReach> &refined_cr,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
bool allow_wide, bool look_for_double_byte = false);
-
+
AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
const CharReach &terminating,
bool look_for_double_byte = false);
@@ -75,12 +75,12 @@ AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a
* single byte accel scheme is found it is placed into *as
*/
-bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
- const std::vector<CharReach> &refined_cr,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- AccelScheme *as, bool allow_wide);
-
-
-} // namespace ue2
-
-#endif
+bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
+ const std::vector<CharReach> &refined_cr,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ AccelScheme *as, bool allow_wide);
+
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
index d25ac43e87..3b8c17eaf9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
@@ -1,87 +1,87 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal analysis and scoring.
- */
-#include "ng_literal_analysis.h"
-
-#include "ng_holder.h"
-#include "ng_split.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "rose/rose_common.h"
-#include "util/compare.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal analysis and scoring.
+ */
+#include "ng_literal_analysis.h"
+
+#include "ng_holder.h"
+#include "ng_split.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "rose/rose_common.h"
+#include "util/compare.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/ue2_graph.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <fstream>
-#include <queue>
-
-#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Maximum number of paths to generate. */
-static const u32 MAX_WIDTH = 11;
-
-/** Scoring adjustment for 'uniqueness' in literal. */
-static const u64a WEIGHT_OF_UNIQUENESS = 250;
-
-namespace {
-
-/* Small literal graph type used for the suffix tree used in
- * compressAndScore. */
-
-struct LitGraphVertexProps {
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <fstream>
+#include <queue>
+
+#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Maximum number of paths to generate. */
+static const u32 MAX_WIDTH = 11;
+
+/** Scoring adjustment for 'uniqueness' in literal. */
+static const u64a WEIGHT_OF_UNIQUENESS = 250;
+
+namespace {
+
+/* Small literal graph type used for the suffix tree used in
+ * compressAndScore. */
+
+struct LitGraphVertexProps {
LitGraphVertexProps() = default;
explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
- ue2_literal::elem c; // string element (char + bool)
+ ue2_literal::elem c; // string element (char + bool)
size_t index = 0; // managed by ue2_graph
-};
-
-struct LitGraphEdgeProps {
+};
+
+struct LitGraphEdgeProps {
LitGraphEdgeProps() = default;
- explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
- u64a score = NO_LITERAL_AT_EDGE_SCORE;
+ explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
+ u64a score = NO_LITERAL_AT_EDGE_SCORE;
size_t index = 0; // managed by ue2_graph
-};
-
+};
+
struct LitGraph
: public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> {
-
+
LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {}
const vertex_descriptor root;
@@ -91,399 +91,399 @@ struct LitGraph
typedef LitGraph::vertex_descriptor LitVertex;
typedef LitGraph::edge_descriptor LitEdge;
-typedef pair<LitVertex, NFAVertex> VertexPair;
-typedef std::queue<VertexPair> LitVertexQ;
-
-} // namespace
-
-#ifdef DUMP_SUPPORT
-
-/** \brief Dump the literal graph in Graphviz format. */
-static UNUSED
+typedef pair<LitVertex, NFAVertex> VertexPair;
+typedef std::queue<VertexPair> LitVertexQ;
+
+} // namespace
+
+#ifdef DUMP_SUPPORT
+
+/** \brief Dump the literal graph in Graphviz format. */
+static UNUSED
void dumpGraph(const char *filename, const LitGraph &lg) {
- ofstream fout(filename);
-
- fout << "digraph G {" << endl;
-
- for (auto v : vertices_range(lg)) {
+ ofstream fout(filename);
+
+ fout << "digraph G {" << endl;
+
+ for (auto v : vertices_range(lg)) {
fout << lg[v].index;
if (v == lg.root) {
- fout << "[label=\"ROOT\"];";
+ fout << "[label=\"ROOT\"];";
} else if (v == lg.sink) {
- fout << "[label=\"SINK\"];";
- } else {
- ue2_literal s;
- s.push_back(lg[v].c);
- fout << "[label=\"" << dumpString(s) << "\"];";
- }
- fout << endl;
- }
-
- for (const auto &e : edges_range(lg)) {
- LitVertex u = source(e, lg), v = target(e, lg);
+ fout << "[label=\"SINK\"];";
+ } else {
+ ue2_literal s;
+ s.push_back(lg[v].c);
+ fout << "[label=\"" << dumpString(s) << "\"];";
+ }
+ fout << endl;
+ }
+
+ for (const auto &e : edges_range(lg)) {
+ LitVertex u = source(e, lg), v = target(e, lg);
fout << lg[u].index << " -> " << lg[v].index << "[label=\""
<< lg[e].score << "\"]"
<< ";" << endl;
- }
-
- fout << "}" << endl;
-}
-
-#endif // DUMP_SUPPORT
-
-static
-bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
- if (numItems == 0) {
- return false;
- }
-
- if (numItems + totalPathsSoFar > MAX_WIDTH) {
- return false;
- }
-
- return true;
-}
-
-static
+ }
+
+ fout << "}" << endl;
+}
+
+#endif // DUMP_SUPPORT
+
+static
+bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
+ if (numItems == 0) {
+ return false;
+ }
+
+ if (numItems + totalPathsSoFar > MAX_WIDTH) {
+ return false;
+ }
+
+ return true;
+}
+
+static
LitVertex addToLitGraph(LitGraph &lg, LitVertex pred,
const ue2_literal::elem &c) {
- // Check if we already have this in the graph.
- for (auto v : adjacent_vertices_range(pred, lg)) {
+ // Check if we already have this in the graph.
+ for (auto v : adjacent_vertices_range(pred, lg)) {
if (v == lg.sink) {
- continue;
- }
- if (lg[v].c == c) {
- return v;
- }
- }
-
- LitVertex lv = add_vertex(LitGraphVertexProps(c), lg);
- add_edge(pred, lv, lg);
- return lv;
-}
-
-static
+ continue;
+ }
+ if (lg[v].c == c) {
+ return v;
+ }
+ }
+
+ LitVertex lv = add_vertex(LitGraphVertexProps(c), lg);
+ add_edge(pred, lv, lg);
+ return lv;
+}
+
+static
void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred,
const CharReach &cr, NFAVertex v) {
for (size_t i = cr.find_first(); i != CharReach::npos;
i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- ue2_literal::elem c((char)i, nocase);
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ ue2_literal::elem c((char)i, nocase);
LitVertex lv = addToLitGraph(lg, pred, c);
- workQ.push(VertexPair(lv, v));
- }
-}
-
-static
+ workQ.push(VertexPair(lv, v));
+ }
+}
+
+static
void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g,
const NFAEdge &e) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- const CharReach &cr = g[v].char_reach;
-
- if (!allowExpand(cr.count(), 0)) {
- return;
- }
-
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ const CharReach &cr = g[v].char_reach;
+
+ if (!allowExpand(cr.count(), 0)) {
+ return;
+ }
+
addToQueue(workQ, lg, lg.root, cr, u);
-}
-
-static
-u32 crCardinality(const CharReach &cr) {
- // Special-case for handling dots, much faster than running the find_next
- // loop below.
- if (cr.all()) {
- return 230; // [^A-Z]
- }
-
- u32 rv = 0;
+}
+
+static
+u32 crCardinality(const CharReach &cr) {
+ // Special-case for handling dots, much faster than running the find_next
+ // loop below.
+ if (cr.all()) {
+ return 230; // [^A-Z]
+ }
+
+ u32 rv = 0;
for (size_t i = cr.find_first(); i != CharReach::npos;
i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
- rv++;
- }
-
- return rv;
-}
-
-/** Filter out literals that include other literals as suffixes. We do this by
- * identifying vertices connected to the sink and removing their other
- * out-edges. */
-static
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+ rv++;
+ }
+
+ return rv;
+}
+
+/** Filter out literals that include other literals as suffixes. We do this by
+ * identifying vertices connected to the sink and removing their other
+ * out-edges. */
+static
void filterLitGraph(LitGraph &lg) {
for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) {
remove_out_edge_if(v, [&lg](const LitEdge &e) {
return target(e, lg) != lg.sink;
- }, lg);
- }
-
- // We could do a DFS-and-prune here, if we wanted. Right now, we just
- // handle it in extractLiterals by throwing away paths that don't run all
- // the way from sink to root.
-}
-
-/** Extracts all the literals from the given literal graph. Walks the graph
- * from each predecessor of the sink (note: it's a suffix tree except for this
- * convenience) towards the source, storing each string as we go. */
-static
+ }, lg);
+ }
+
+ // We could do a DFS-and-prune here, if we wanted. Right now, we just
+ // handle it in extractLiterals by throwing away paths that don't run all
+ // the way from sink to root.
+}
+
+/** Extracts all the literals from the given literal graph. Walks the graph
+ * from each predecessor of the sink (note: it's a suffix tree except for this
+ * convenience) towards the source, storing each string as we go. */
+static
void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) {
- ue2_literal lit;
-
+ ue2_literal lit;
+
for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) {
- lit.clear();
+ lit.clear();
while (u != lg.root) {
- lit.push_back(lg[u].c);
- assert(in_degree(u, lg) <= 1);
- LitGraph::inv_adjacency_iterator ai2, ae2;
- tie(ai2, ae2) = inv_adjacent_vertices(u, lg);
- if (ai2 == ae2) {
- // Path has been cut, time for the next literal.
- goto next_literal;
- }
- u = *ai2;
- }
- s.insert(lit);
-next_literal:
- ;
- }
-}
-
-#ifndef NDEBUG
-static
-bool hasSuffixLiterals(const set<ue2_literal> &s) {
- for (auto it = s.begin(), ite = s.end(); it != ite; ++it) {
- for (auto jt = std::next(it); jt != ite; ++jt) {
- if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) {
- DEBUG_PRINTF("'%s' and '%s' have suffix issues\n",
- dumpString(*it).c_str(),
- dumpString(*jt).c_str());
- return true;
- }
- }
- }
- return false;
-}
-#endif
-
-static
-void processWorkQueue(const NGHolder &g, const NFAEdge &e,
- set<ue2_literal> &s) {
- if (is_special(target(e, g), g)) {
- return;
- }
-
- LitGraph lg;
-
- LitVertexQ workQ;
+ lit.push_back(lg[u].c);
+ assert(in_degree(u, lg) <= 1);
+ LitGraph::inv_adjacency_iterator ai2, ae2;
+ tie(ai2, ae2) = inv_adjacent_vertices(u, lg);
+ if (ai2 == ae2) {
+ // Path has been cut, time for the next literal.
+ goto next_literal;
+ }
+ u = *ai2;
+ }
+ s.insert(lit);
+next_literal:
+ ;
+ }
+}
+
+#ifndef NDEBUG
+static
+bool hasSuffixLiterals(const set<ue2_literal> &s) {
+ for (auto it = s.begin(), ite = s.end(); it != ite; ++it) {
+ for (auto jt = std::next(it); jt != ite; ++jt) {
+ if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) {
+ DEBUG_PRINTF("'%s' and '%s' have suffix issues\n",
+ dumpString(*it).c_str(),
+ dumpString(*jt).c_str());
+ return true;
+ }
+ }
+ }
+ return false;
+}
+#endif
+
+static
+void processWorkQueue(const NGHolder &g, const NFAEdge &e,
+ set<ue2_literal> &s) {
+ if (is_special(target(e, g), g)) {
+ return;
+ }
+
+ LitGraph lg;
+
+ LitVertexQ workQ;
initWorkQueue(workQ, lg, g, e);
-
- while (!workQ.empty()) {
- const LitVertex lv = workQ.front().first;
- const NFAVertex &t = workQ.front().second;
- const CharReach &cr = g[t].char_reach;
-
- u32 cr_card = crCardinality(cr);
- size_t numItems = cr_card * in_degree(t, g);
+
+ while (!workQ.empty()) {
+ const LitVertex lv = workQ.front().first;
+ const NFAVertex &t = workQ.front().second;
+ const CharReach &cr = g[t].char_reach;
+
+ u32 cr_card = crCardinality(cr);
+ size_t numItems = cr_card * in_degree(t, g);
size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1;
-
- if (g[t].index == NODE_START) {
- // reached start, add to literal set
+
+ if (g[t].index == NODE_START) {
+ // reached start, add to literal set
add_edge_if_not_present(lv, lg.sink, lg);
- goto next_work_elem;
- }
-
- // Expand next vertex
- if (allowExpand(numItems, committed_count)) {
- for (auto u : inv_adjacent_vertices_range(t, g)) {
+ goto next_work_elem;
+ }
+
+ // Expand next vertex
+ if (allowExpand(numItems, committed_count)) {
+ for (auto u : inv_adjacent_vertices_range(t, g)) {
addToQueue(workQ, lg, lv, cr, u);
- }
- goto next_work_elem;
- }
-
- // Expand this vertex
- if (allowExpand(cr_card, committed_count)) {
- for (size_t i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- ue2_literal::elem c((char)i, nocase);
+ }
+ goto next_work_elem;
+ }
+
+ // Expand this vertex
+ if (allowExpand(cr_card, committed_count)) {
+ for (size_t i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ ue2_literal::elem c((char)i, nocase);
LitVertex lt = addToLitGraph(lg, lv, c);
add_edge_if_not_present(lt, lg.sink, lg);
- }
- goto next_work_elem;
- }
-
- // add to literal set
+ }
+ goto next_work_elem;
+ }
+
+ // add to literal set
add_edge_if_not_present(lv, lg.sink, lg);
- next_work_elem:
- workQ.pop();
- }
-
+ next_work_elem:
+ workQ.pop();
+ }
+
filterLitGraph(lg);
//dumpGraph("litgraph.dot", lg);
extractLiterals(lg, s);
-
- // Our literal set should contain no literal that is a suffix of another.
- assert(!hasSuffixLiterals(s));
-
+
+ // Our literal set should contain no literal that is a suffix of another.
+ assert(!hasSuffixLiterals(s));
+
DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index,
- g[source(e, g)].index, g[target(e, g)].index, s.size());
-}
-
+ g[source(e, g)].index, g[target(e, g)].index, s.size());
+}
+
bool bad_mixed_sensitivity(const ue2_literal &s) {
/* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end,
* we should be able to handle it */
return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH;
}
-static
-u64a litUniqueness(const string &s) {
- CharReach seen(s);
- return seen.count();
-}
-
-/** Count the significant bits of this literal (i.e. seven for nocase alpha,
- * eight for everything else). */
-static
-u64a litCountBits(const ue2_literal &lit) {
- u64a n = 0;
- for (const auto &c : lit) {
- n += c.nocase ? 7 : 8;
- }
- return n;
-}
-
-/** Returns a fairly arbitrary score for the given literal, used to compare the
- * suitability of different candidates. */
-static
-u64a scoreLiteral(const ue2_literal &s) {
- // old scoring scheme: SUM(s in S: 1/s.len()^2)
- // now weight (currently 75/25) with number of unique chars
- // in the string
- u64a len = litCountBits(s);
- u64a lenUnique = litUniqueness(s.get_string()) * 8;
-
- u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len +
- WEIGHT_OF_UNIQUENESS * lenUnique;
- weightedLen /= 8;
-
- DEBUG_PRINTF("scored literal '%s' %llu\n",
- escapeString(s.get_string()).c_str(), weightedLen);
-
- return weightedLen;
-}
-
-
-/**
- * calculateScore has the following properties:
- * - score of literal is the same as the score of the reversed literal;
- * - score of substring of literal is worse than the original literal's score;
- * - score of any literal should be non-zero.
- */
-static
-u64a calculateScore(const ue2_literal &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- u64a weightedLen = scoreLiteral(s);
-
- DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen);
- u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen);
-
- if (!rv) {
- rv = 1;
- }
- DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv);
- return rv;
-}
-
-/** Adds a literal in reverse order, building up a suffix tree. */
-static
+static
+u64a litUniqueness(const string &s) {
+ CharReach seen(s);
+ return seen.count();
+}
+
+/** Count the significant bits of this literal (i.e. seven for nocase alpha,
+ * eight for everything else). */
+static
+u64a litCountBits(const ue2_literal &lit) {
+ u64a n = 0;
+ for (const auto &c : lit) {
+ n += c.nocase ? 7 : 8;
+ }
+ return n;
+}
+
+/** Returns a fairly arbitrary score for the given literal, used to compare the
+ * suitability of different candidates. */
+static
+u64a scoreLiteral(const ue2_literal &s) {
+ // old scoring scheme: SUM(s in S: 1/s.len()^2)
+ // now weight (currently 75/25) with number of unique chars
+ // in the string
+ u64a len = litCountBits(s);
+ u64a lenUnique = litUniqueness(s.get_string()) * 8;
+
+ u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len +
+ WEIGHT_OF_UNIQUENESS * lenUnique;
+ weightedLen /= 8;
+
+ DEBUG_PRINTF("scored literal '%s' %llu\n",
+ escapeString(s.get_string()).c_str(), weightedLen);
+
+ return weightedLen;
+}
+
+
+/**
+ * calculateScore has the following properties:
+ * - score of literal is the same as the score of the reversed literal;
+ * - score of substring of literal is worse than the original literal's score;
+ * - score of any literal should be non-zero.
+ */
+static
+u64a calculateScore(const ue2_literal &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ u64a weightedLen = scoreLiteral(s);
+
+ DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen);
+ u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen);
+
+ if (!rv) {
+ rv = 1;
+ }
+ DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv);
+ return rv;
+}
+
+/** Adds a literal in reverse order, building up a suffix tree. */
+static
void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) {
- DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
- ue2_literal suffix;
+ DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
+ ue2_literal suffix;
LitVertex v = lg.root;
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- suffix.push_back(*it);
- LitVertex w;
- for (auto v2 : adjacent_vertices_range(v, lg)) {
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ suffix.push_back(*it);
+ LitVertex w;
+ for (auto v2 : adjacent_vertices_range(v, lg)) {
if (v2 != lg.sink && lg[v2].c == *it) {
- w = v2;
- goto next_char;
- }
- }
- w = add_vertex(LitGraphVertexProps(*it), lg);
- add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg);
-next_char:
- v = w;
- }
-
- // Wire the last vertex to the sink.
+ w = v2;
+ goto next_char;
+ }
+ }
+ w = add_vertex(LitGraphVertexProps(*it), lg);
+ add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg);
+next_char:
+ v = w;
+ }
+
+ // Wire the last vertex to the sink.
add_edge(v, lg.sink, lg);
-}
-
-static
-void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
+}
+
+static
+void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
set<ue2_literal> &s) {
- for (const auto &e : cutset) {
+ for (const auto &e : cutset) {
LitVertex u = source(e, lg);
LitVertex v = target(e, lg);
- ue2_literal lit;
- lit.push_back(lg[v].c);
+ ue2_literal lit;
+ lit.push_back(lg[v].c);
while (u != lg.root) {
- lit.push_back(lg[u].c);
- assert(in_degree(u, lg) == 1);
- LitGraph::inv_adjacency_iterator ai, ae;
- tie(ai, ae) = inv_adjacent_vertices(u, lg);
- if (ai == ae) {
- // Path has been cut, time for the next literal.
- goto next_literal;
- }
- u = *ai;
- }
- DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str());
- s.insert(lit);
-next_literal:
- ;
- }
-}
-
-#ifdef DEBUG
-static UNUSED
+ lit.push_back(lg[u].c);
+ assert(in_degree(u, lg) == 1);
+ LitGraph::inv_adjacency_iterator ai, ae;
+ tie(ai, ae) = inv_adjacent_vertices(u, lg);
+ if (ai == ae) {
+ // Path has been cut, time for the next literal.
+ goto next_literal;
+ }
+ u = *ai;
+ }
+ DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str());
+ s.insert(lit);
+next_literal:
+ ;
+ }
+}
+
+#ifdef DEBUG
+static UNUSED
const char *describeColor(small_color c) {
- switch (c) {
+ switch (c) {
case small_color::white:
- return "white";
+ return "white";
case small_color::gray:
- return "gray";
+ return "gray";
case small_color::black:
- return "black";
- default:
- return "unknown";
- }
-}
-#endif
-
-/**
- * The BGL's boykov_kolmogorov_max_flow requires that all edges have their
+ return "black";
+ default:
+ return "unknown";
+ }
+}
+#endif
+
+/**
+ * The BGL's boykov_kolmogorov_max_flow requires that all edges have their
* reverse edge in the graph. This function adds them, returning a vector
* mapping edge index to reverse edge. Note: LitGraph should be a DAG so there
* should be no existing reverse_edges.
- */
-static
+ */
+static
vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
const size_t edge_count = num_edges(lg);
vector<LitEdge> fwd_edges;
@@ -491,137 +491,137 @@ vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
for (const auto &e : edges_range(lg)) {
fwd_edges.push_back(e);
}
-
+
vector<LitEdge> rev_map(2 * edge_count);
-
+
for (const auto &e : fwd_edges) {
LitVertex u = source(e, lg);
LitVertex v = target(e, lg);
-
+
assert(!edge(v, u, lg).second);
-
+
LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first;
rev_map[lg[e].index] = rev;
rev_map[lg[rev].index] = e;
- }
-
+ }
+
return rev_map;
-}
-
-static
+}
+
+static
void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) {
- cutset.clear();
-
+ cutset.clear();
+
//dumpGraph("litgraph.dot", lg);
-
+
assert(!in_degree(lg.root, lg));
assert(!out_degree(lg.sink, lg));
size_t num_real_edges = num_edges(lg);
-
- // Add reverse edges for the convenience of the BGL's max flow algorithm.
+
+ // Add reverse edges for the convenience of the BGL's max flow algorithm.
vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg);
-
+
const auto v_index_map = get(&LitGraphVertexProps::index, lg);
const auto e_index_map = get(&LitGraphEdgeProps::index, lg);
- const size_t num_verts = num_vertices(lg);
+ const size_t num_verts = num_vertices(lg);
auto colors = make_small_color_map(lg);
- vector<s32> distances(num_verts);
- vector<LitEdge> predecessors(num_verts);
+ vector<s32> distances(num_verts);
+ vector<LitEdge> predecessors(num_verts);
vector<u64a> residuals(num_edges(lg));
-
- UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
- get(&LitGraphEdgeProps::score, lg),
+
+ UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
+ get(&LitGraphEdgeProps::score, lg),
make_iterator_property_map(residuals.begin(), e_index_map),
make_iterator_property_map(rev_edges.begin(), e_index_map),
- make_iterator_property_map(predecessors.begin(), v_index_map),
+ make_iterator_property_map(predecessors.begin(), v_index_map),
colors,
- make_iterator_property_map(distances.begin(), v_index_map),
+ make_iterator_property_map(distances.begin(), v_index_map),
v_index_map, lg.root, lg.sink);
- DEBUG_PRINTF("done, flow = %llu\n", flow);
-
+ DEBUG_PRINTF("done, flow = %llu\n", flow);
+
/* remove reverse edges */
remove_edge_if([&](const LitEdge &e) {
return lg[e].index >= num_real_edges;
}, lg);
-
- vector<LitEdge> white_cut, black_cut;
- u64a white_flow = 0, black_flow = 0;
-
- for (const auto &e : edges_range(lg)) {
- const LitVertex u = source(e, lg), v = target(e, lg);
+
+ vector<LitEdge> white_cut, black_cut;
+ u64a white_flow = 0, black_flow = 0;
+
+ for (const auto &e : edges_range(lg)) {
+ const LitVertex u = source(e, lg), v = target(e, lg);
const auto ucolor = get(colors, u);
const auto vcolor = get(colors, v);
-
+
DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index,
describeColor(ucolor), lg[v].index, describeColor(vcolor),
- lg[e].score);
-
+ lg[e].score);
+
if (ucolor != small_color::white && vcolor == small_color::white) {
assert(v != lg.sink);
- white_cut.push_back(e);
- white_flow += lg[e].score;
- }
+ white_cut.push_back(e);
+ white_flow += lg[e].score;
+ }
if (ucolor == small_color::black && vcolor != small_color::black) {
assert(v != lg.sink);
- black_cut.push_back(e);
- black_flow += lg[e].score;
- }
- }
-
- DEBUG_PRINTF("white flow = %llu, black flow = %llu\n",
- white_flow, black_flow);
- assert(white_flow && black_flow);
-
- if (white_flow <= black_flow) {
- DEBUG_PRINTF("selected white cut\n");
- cutset.swap(white_cut);
- } else {
- DEBUG_PRINTF("selected black cut\n");
- cutset.swap(black_cut);
- }
-
- DEBUG_PRINTF("min cut has %zu edges\n", cutset.size());
- assert(!cutset.empty());
-}
-
-/** Takes a set of literals and derives a better one from them, returning its
- * score. Literals with a common suffix S will be replaced with S. (for
- * example, {foobar, fooobar} -> {oobar}).
- */
-u64a compressAndScore(set<ue2_literal> &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- if (s.size() == 1) {
- return calculateScore(*s.begin());
- }
-
- UNUSED u64a initialScore = scoreSet(s);
- DEBUG_PRINTF("begin, initial literals have score %llu\n",
- initialScore);
-
- LitGraph lg;
-
- for (const auto &lit : s) {
+ black_cut.push_back(e);
+ black_flow += lg[e].score;
+ }
+ }
+
+ DEBUG_PRINTF("white flow = %llu, black flow = %llu\n",
+ white_flow, black_flow);
+ assert(white_flow && black_flow);
+
+ if (white_flow <= black_flow) {
+ DEBUG_PRINTF("selected white cut\n");
+ cutset.swap(white_cut);
+ } else {
+ DEBUG_PRINTF("selected black cut\n");
+ cutset.swap(black_cut);
+ }
+
+ DEBUG_PRINTF("min cut has %zu edges\n", cutset.size());
+ assert(!cutset.empty());
+}
+
+/** Takes a set of literals and derives a better one from them, returning its
+ * score. Literals with a common suffix S will be replaced with S. (for
+ * example, {foobar, fooobar} -> {oobar}).
+ */
+u64a compressAndScore(set<ue2_literal> &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ if (s.size() == 1) {
+ return calculateScore(*s.begin());
+ }
+
+ UNUSED u64a initialScore = scoreSet(s);
+ DEBUG_PRINTF("begin, initial literals have score %llu\n",
+ initialScore);
+
+ LitGraph lg;
+
+ for (const auto &lit : s) {
addReversedLiteral(lit, lg);
- }
-
- DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
- num_vertices(lg), num_edges(lg));
-
- vector<LitEdge> cutset;
+ }
+
+ DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
+ num_vertices(lg), num_edges(lg));
+
+ vector<LitEdge> cutset;
findMinCut(lg, cutset);
-
- s.clear();
+
+ s.clear();
extractLiterals(cutset, lg, s);
-
- u64a score = scoreSet(s);
- DEBUG_PRINTF("compressed score is %llu\n", score);
- assert(score <= initialScore);
- return score;
-}
-
+
+ u64a score = scoreSet(s);
+ DEBUG_PRINTF("compressed score is %llu\n", score);
+ assert(score <= initialScore);
+ return score;
+}
+
/* like compressAndScore, but replaces long mixed sensitivity literals with
* something weaker. */
u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
@@ -664,191 +664,191 @@ u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
return compressAndScore(lits);
}
-u64a scoreSet(const set<ue2_literal> &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- u64a score = 1ULL;
-
- for (const auto &lit : s) {
- score += calculateScore(lit);
- }
-
- return score;
-}
-
-set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) {
- set<ue2_literal> s;
- processWorkQueue(g, e, s);
- return s;
-}
-
-set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
- bool only_first_encounter) {
- set<ue2_literal> s;
-
- if (is_special(v, g)) {
- return s;
- }
-
- set<ue2_literal> ls;
-
- for (const auto &e : in_edges_range(v, g)) {
- if (source(e, g) == v && only_first_encounter) {
- continue; /* ignore self loop on root vertex as we are interested in
- * the first time we visit the vertex on the way to
- * accept. In fact, we can ignore any back edges - but
- * they would require a bit of effort to discover. */
- }
-
- ls = getLiteralSet(g, e);
- if (ls.empty()) {
- s.clear();
- return s;
- } else {
- s.insert(ls.begin(), ls.end());
- }
- }
-
- return s;
-}
-
+u64a scoreSet(const set<ue2_literal> &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ u64a score = 1ULL;
+
+ for (const auto &lit : s) {
+ score += calculateScore(lit);
+ }
+
+ return score;
+}
+
+set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) {
+ set<ue2_literal> s;
+ processWorkQueue(g, e, s);
+ return s;
+}
+
+set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
+ bool only_first_encounter) {
+ set<ue2_literal> s;
+
+ if (is_special(v, g)) {
+ return s;
+ }
+
+ set<ue2_literal> ls;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ if (source(e, g) == v && only_first_encounter) {
+ continue; /* ignore self loop on root vertex as we are interested in
+ * the first time we visit the vertex on the way to
+ * accept. In fact, we can ignore any back edges - but
+ * they would require a bit of effort to discover. */
+ }
+
+ ls = getLiteralSet(g, e);
+ if (ls.empty()) {
+ s.clear();
+ return s;
+ } else {
+ s.insert(ls.begin(), ls.end());
+ }
+ }
+
+ return s;
+}
+
vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
- assert(hasCorrectlyNumberedEdges(g));
-
- vector<u64a> scores(num_edges(g));
-
- for (const auto &e : edges_range(g)) {
- u32 eidx = g[e].index;
- assert(eidx < scores.size());
+ assert(hasCorrectlyNumberedEdges(g));
+
+ vector<u64a> scores(num_edges(g));
+
+ for (const auto &e : edges_range(g)) {
+ u32 eidx = g[e].index;
+ assert(eidx < scores.size());
if (contains(known_bad, e)) {
scores[eidx] = NO_LITERAL_AT_EDGE_SCORE;
} else {
set<ue2_literal> ls = getLiteralSet(g, e);
scores[eidx] = compressAndScore(ls);
}
- }
-
- return scores;
-}
-
+ }
+
+ return scores;
+}
+
bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
NGHolder *rhs) {
DEBUG_PRINTF("looking for leading floating literal\n");
set<NFAVertex> s_succ;
insert(&s_succ, adjacent_vertices(g.start, g));
-
+
set<NFAVertex> sds_succ;
insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
+
bool floating = is_subset_of(s_succ, sds_succ);
if (!floating) {
DEBUG_PRINTF("not floating\n");
return false;
}
-
+
sds_succ.erase(g.startDs);
if (sds_succ.size() != 1) {
DEBUG_PRINTF("branchy root\n");
return false;
}
-
+
NFAVertex u = g.startDs;
NFAVertex v = *sds_succ.begin();
-
- while (true) {
+
+ while (true) {
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
-
- assert(v != g.acceptEod && v != g.accept);
-
- const CharReach &cr = g[v].char_reach;
- if (cr.count() != 1 && !cr.isCaselessChar()) {
- break;
- }
-
- // Rose can only handle mixed-sensitivity literals up to the max mask
- // length.
- if (lit_out->length() >= MAX_MASK2_WIDTH) {
- if (mixed_sensitivity(*lit_out)) {
- DEBUG_PRINTF("long and mixed sensitivity\n");
- break;
- }
- if (ourisalpha((char)cr.find_first())) {
- if (cr.isCaselessChar() != lit_out->any_nocase()) {
- DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n",
- (char)cr.find_first());
- break;
- }
- }
- }
-
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
- DEBUG_PRINTF("connection to accept\n");
- break;
- }
-
- lit_out->push_back(cr.find_first(), cr.isCaselessChar());
- u = v;
-
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("out_degree != 1\n");
- break;
- }
-
- v = *adjacent_vertices(v, g).first;
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
- * to case accept (large cycles),
- * ensures term */
- break;
- }
- }
-
- if (lit_out->empty()) {
- return false;
- }
- assert(u != g.startDs);
-
+
+ assert(v != g.acceptEod && v != g.accept);
+
+ const CharReach &cr = g[v].char_reach;
+ if (cr.count() != 1 && !cr.isCaselessChar()) {
+ break;
+ }
+
+ // Rose can only handle mixed-sensitivity literals up to the max mask
+ // length.
+ if (lit_out->length() >= MAX_MASK2_WIDTH) {
+ if (mixed_sensitivity(*lit_out)) {
+ DEBUG_PRINTF("long and mixed sensitivity\n");
+ break;
+ }
+ if (ourisalpha((char)cr.find_first())) {
+ if (cr.isCaselessChar() != lit_out->any_nocase()) {
+ DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n",
+ (char)cr.find_first());
+ break;
+ }
+ }
+ }
+
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ DEBUG_PRINTF("connection to accept\n");
+ break;
+ }
+
+ lit_out->push_back(cr.find_first(), cr.isCaselessChar());
+ u = v;
+
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("out_degree != 1\n");
+ break;
+ }
+
+ v = *adjacent_vertices(v, g).first;
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
+ * to case accept (large cycles),
+ * ensures term */
+ break;
+ }
+ }
+
+ if (lit_out->empty()) {
+ return false;
+ }
+ assert(u != g.startDs);
+
unordered_map<NFAVertex, NFAVertex> rhs_map;
vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g));
- splitRHS(g, pivots, rhs, &rhs_map);
-
- DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
- lit_out->length());
- assert(is_triggered(*rhs));
- return true;
-}
-
-bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
- if (in_degree(g.acceptEod, g) != 1) {
- return false;
- }
-
- NFAVertex v = getSoleSourceVertex(g, g.accept);
-
- if (!v) {
- return false;
- }
-
- set<ue2_literal> s = getLiteralSet(g, v, false);
-
- if (s.size() != 1) {
- return false;
- }
-
- const ue2_literal &lit = *s.begin();
-
- if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n");
- return false;
- }
-
- *lit_out = lit;
- return true;
-}
-
+ splitRHS(g, pivots, rhs, &rhs_map);
+
+ DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
+ lit_out->length());
+ assert(is_triggered(*rhs));
+ return true;
+}
+
+bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
+ if (in_degree(g.acceptEod, g) != 1) {
+ return false;
+ }
+
+ NFAVertex v = getSoleSourceVertex(g, g.accept);
+
+ if (!v) {
+ return false;
+ }
+
+ set<ue2_literal> s = getLiteralSet(g, v, false);
+
+ if (s.size() != 1) {
+ return false;
+ }
+
+ const ue2_literal &lit = *s.begin();
+
+ if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n");
+ return false;
+ }
+
+ *lit_out = lit;
+ return true;
+}
+
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
NFAVertex v = g.accept;
@@ -894,4 +894,4 @@ bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
return true;
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
index 6bb8755610..943a6d33c9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal analysis and scoring.
- */
-
-#ifndef NG_LITERAL_ANALYSIS_H
-#define NG_LITERAL_ANALYSIS_H
-
-#include <set>
-#include <vector>
-
-#include "ng_holder.h"
-#include "util/ue2string.h"
-
-namespace ue2 {
-
-#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal analysis and scoring.
+ */
+
+#ifndef NG_LITERAL_ANALYSIS_H
+#define NG_LITERAL_ANALYSIS_H
+
+#include <set>
+#include <vector>
+
+#include "ng_holder.h"
+#include "util/ue2string.h"
+
+namespace ue2 {
+
+#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */
-
-class NGHolder;
-
-/**
- * Fetch the literal set for a given vertex, returning it in \p s. Note: does
- * NOT take into account any constraints due to streaming mode requirements.
- *
- * if only_first_encounter is requested, the output set may drop literals
- * generated by revisiting the destination vertex.
- */
-std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
- bool only_first_encounter = true);
-std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
-
+
+class NGHolder;
+
+/**
+ * Fetch the literal set for a given vertex, returning it in \p s. Note: does
+ * NOT take into account any constraints due to streaming mode requirements.
+ *
+ * if only_first_encounter is requested, the output set may drop literals
+ * generated by revisiting the destination vertex.
+ */
+std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
+ bool only_first_encounter = true);
+std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
+
/**
* Returns true if we are unable to use a mixed sensitivity literal in rose (as
* our literal matchers are generally either case sensitive or not).
@@ -68,31 +68,31 @@ bool bad_mixed_sensitivity(const ue2_literal &s);
/**
* Score all the edges in the given graph, returning them in \p scores indexed
- * by edge_index. */
+ * by edge_index. */
std::vector<u64a> scoreEdges(const NGHolder &h,
const flat_set<NFAEdge> &known_bad = {});
-
-/** Returns a score for a literal set. Lower scores are better. */
-u64a scoreSet(const std::set<ue2_literal> &s);
-
-/** Compress a literal set to fewer literals. */
-u64a compressAndScore(std::set<ue2_literal> &s);
-
+
+/** Returns a score for a literal set. Lower scores are better. */
+u64a scoreSet(const std::set<ue2_literal> &s);
+
+/** Compress a literal set to fewer literals. */
+u64a compressAndScore(std::set<ue2_literal> &s);
+
/**
* Compress a literal set to fewer literals and replace any long mixed
* sensitivity literals with supported literals.
*/
u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s);
-bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
- NGHolder *rhs);
-
-bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
-
+bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
+ NGHolder *rhs);
+
+bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
+
/** \brief Returns true if the given literal is the only thing in the graph,
* from (start or startDs) to accept. */
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
index 4d3965dfe2..4e085d9913 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
@@ -1,227 +1,227 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal Component Splitting. Identifies literals that span the
- * graph and moves them into Rose.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal Component Splitting. Identifies literals that span the
+ * graph and moves them into Rose.
+ */
#include "ng_literal_component.h"
-#include "grey.h"
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "grey.h"
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "rose/rose_build.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
+#include "rose/rose_build.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
#include <unordered_set>
-using namespace std;
-
-namespace ue2 {
-
-static
+using namespace std;
+
+namespace ue2 {
+
+static
bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
bool &casefixed) {
- const CharReach &cr = g[v].char_reach;
- const size_t num = cr.count();
- if (num > 2) {
- return false; // char class
- }
-
- if (!casefixed) {
- if (num == 2 && cr.isCaselessChar()) {
- nocase = true;
- casefixed = true;
- return true;
- } else if (num == 1) {
- if (cr.isAlpha()) {
- nocase = false;
- casefixed = true;
- }
- // otherwise, still acceptable but we can't fix caselessness yet
- return true;
- }
- } else {
- // nocase property is fixed
- if (nocase) {
- if ((num == 2 && cr.isCaselessChar()) ||
- (num == 1 && !cr.isAlpha())) {
- return true;
- }
- } else {
- return (num == 1);
- }
- }
-
- return false;
-}
-
-static
-void addToString(string &s, const NGHolder &g, NFAVertex v) {
- const CharReach &cr = g[v].char_reach;
- assert(cr.count() == 1 || cr.isCaselessChar());
-
- char c = (char)cr.find_first();
- s.push_back(c);
-}
-
-static
+ const CharReach &cr = g[v].char_reach;
+ const size_t num = cr.count();
+ if (num > 2) {
+ return false; // char class
+ }
+
+ if (!casefixed) {
+ if (num == 2 && cr.isCaselessChar()) {
+ nocase = true;
+ casefixed = true;
+ return true;
+ } else if (num == 1) {
+ if (cr.isAlpha()) {
+ nocase = false;
+ casefixed = true;
+ }
+ // otherwise, still acceptable but we can't fix caselessness yet
+ return true;
+ }
+ } else {
+ // nocase property is fixed
+ if (nocase) {
+ if ((num == 2 && cr.isCaselessChar()) ||
+ (num == 1 && !cr.isAlpha())) {
+ return true;
+ }
+ } else {
+ return (num == 1);
+ }
+ }
+
+ return false;
+}
+
+static
+void addToString(string &s, const NGHolder &g, NFAVertex v) {
+ const CharReach &cr = g[v].char_reach;
+ assert(cr.count() == 1 || cr.isCaselessChar());
+
+ char c = (char)cr.find_first();
+ s.push_back(c);
+}
+
+static
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
- set<NFAVertex> &dead) {
+ set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
- bool nocase = false, casefixed = false;
-
- assert(!is_special(v, g));
-
- size_t reqInDegree;
- if (anchored) {
- reqInDegree = 1;
- assert(edge(g.start, v, g).second);
- } else {
- reqInDegree = 2;
- assert(edge(g.start, v, g).second);
- assert(edge(g.startDs, v, g).second);
- }
+ bool nocase = false, casefixed = false;
+
+ assert(!is_special(v, g));
+
+ size_t reqInDegree;
+ if (anchored) {
+ reqInDegree = 1;
+ assert(edge(g.start, v, g).second);
+ } else {
+ reqInDegree = 2;
+ assert(edge(g.start, v, g).second);
+ assert(edge(g.startDs, v, g).second);
+ }
if (in_degree(v, g) > reqInDegree) {
- DEBUG_PRINTF("extra in-edges\n");
- return false;
- }
-
- if (!isLiteralChar(g, v, nocase, casefixed)) {
- DEBUG_PRINTF("not literal\n");
- return false;
- }
-
- string literal;
- addToString(literal, g, v);
-
- // Remaining vertices must come in a chain, each with one in-edge and one
- // out-edge only.
- NFAVertex u;
- while (1) {
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("branches, not literal\n");
- return false;
- }
-
- u = v; // previous vertex
- v = *(adjacent_vertices(v, g).first);
-
+ DEBUG_PRINTF("extra in-edges\n");
+ return false;
+ }
+
+ if (!isLiteralChar(g, v, nocase, casefixed)) {
+ DEBUG_PRINTF("not literal\n");
+ return false;
+ }
+
+ string literal;
+ addToString(literal, g, v);
+
+ // Remaining vertices must come in a chain, each with one in-edge and one
+ // out-edge only.
+ NFAVertex u;
+ while (1) {
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("branches, not literal\n");
+ return false;
+ }
+
+ u = v; // previous vertex
+ v = *(adjacent_vertices(v, g).first);
+
DEBUG_PRINTF("loop, v=%zu\n", g[v].index);
-
- if (is_special(v, g)) {
- if (v == g.accept || v == g.acceptEod) {
- break; // OK
- } else {
- assert(0); // start?
- return false;
- }
- } else {
- // Ordinary, must be literal
- if (!isLiteralChar(g, v, nocase, casefixed)) {
- DEBUG_PRINTF("not literal\n");
- return false;
- }
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("branches, not literal\n");
- return false;
- }
- }
-
- addToString(literal, g, v);
- }
-
- // Successfully found a literal; there might be multiple report IDs, in
- // which case we add all the reports.
- assert(!is_special(u, g));
- bool eod = v == g.acceptEod;
- assert(eod || v == g.accept);
-
- DEBUG_PRINTF("success: found %s literal '%s'\n",
- anchored ? "anchored" : "unanchored",
- escapeString(literal).c_str());
-
- // Literals of length 1 are better served going through later optimisation
- // passes, where they might be combined together into a character class.
- if (literal.length() == 1) {
- DEBUG_PRINTF("skipping literal of length 1\n");
- return false;
- }
-
- ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports);
-
- // Remove the terminal vertex. Later, we rely on pruneUseless to remove the
- // other vertices in this chain, since they'll no longer lead to an accept.
- dead.insert(u);
-
- return true;
-}
-
-/** \brief Split off literals. True if any changes were made to the graph. */
+
+ if (is_special(v, g)) {
+ if (v == g.accept || v == g.acceptEod) {
+ break; // OK
+ } else {
+ assert(0); // start?
+ return false;
+ }
+ } else {
+ // Ordinary, must be literal
+ if (!isLiteralChar(g, v, nocase, casefixed)) {
+ DEBUG_PRINTF("not literal\n");
+ return false;
+ }
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("branches, not literal\n");
+ return false;
+ }
+ }
+
+ addToString(literal, g, v);
+ }
+
+ // Successfully found a literal; there might be multiple report IDs, in
+ // which case we add all the reports.
+ assert(!is_special(u, g));
+ bool eod = v == g.acceptEod;
+ assert(eod || v == g.accept);
+
+ DEBUG_PRINTF("success: found %s literal '%s'\n",
+ anchored ? "anchored" : "unanchored",
+ escapeString(literal).c_str());
+
+ // Literals of length 1 are better served going through later optimisation
+ // passes, where they might be combined together into a character class.
+ if (literal.length() == 1) {
+ DEBUG_PRINTF("skipping literal of length 1\n");
+ return false;
+ }
+
+ ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports);
+
+ // Remove the terminal vertex. Later, we rely on pruneUseless to remove the
+ // other vertices in this chain, since they'll no longer lead to an accept.
+ dead.insert(u);
+
+ return true;
+}
+
+/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGHolder &g) {
if (!ng.cc.grey.allowLiteral) {
- return false;
- }
-
- bool changed = false;
- set<NFAVertex> dead;
-
+ return false;
+ }
+
+ bool changed = false;
+ set<NFAVertex> dead;
+
unordered_set<NFAVertex> unanchored; // for faster lookup.
- insert(&unanchored, adjacent_vertices(g.startDs, g));
-
- // Anchored literals.
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (!is_special(v, g) && !contains(unanchored, v)) {
- changed |= splitOffLiteral(ng, g, v, true, dead);
- }
- }
-
- // Unanchored literals.
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (!is_special(v, g)) {
- changed |= splitOffLiteral(ng, g, v, false, dead);
- }
- }
-
- if (changed) {
- remove_vertices(dead, g);
- pruneUseless(g);
- return true;
- }
-
- return false;
-}
-
-} // namespace ue2
+ insert(&unanchored, adjacent_vertices(g.startDs, g));
+
+ // Anchored literals.
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (!is_special(v, g) && !contains(unanchored, v)) {
+ changed |= splitOffLiteral(ng, g, v, true, dead);
+ }
+ }
+
+ // Unanchored literals.
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (!is_special(v, g)) {
+ changed |= splitOffLiteral(ng, g, v, false, dead);
+ }
+ }
+
+ if (changed) {
+ remove_vertices(dead, g);
+ pruneUseless(g);
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
index 1f284ce367..0cd8422ae7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal Component Splitting. Identifies literals that span the
- * graph and moves them into Rose.
- */
-
-#ifndef NG_LITERAL_COMPONENT_H
-#define NG_LITERAL_COMPONENT_H
-
-namespace ue2 {
-
-class NG;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal Component Splitting. Identifies literals that span the
+ * graph and moves them into Rose.
+ */
+
+#ifndef NG_LITERAL_COMPONENT_H
+#define NG_LITERAL_COMPONENT_H
+
+namespace ue2 {
+
+class NG;
class NGHolder;
-
-/** \brief Split off literals. True if any changes were made to the graph. */
+
+/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGHolder &g);
-
-} // namespace ue2
-
-#endif // NG_LITERAL_COMPONENT_H
+
+} // namespace ue2
+
+#endif // NG_LITERAL_COMPONENT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
index 61a31dbf34..5d2f4ca5df 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
@@ -1,252 +1,252 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis for literals decorated by leading/trailing assertions or
- * character classes.
- */
-#include "ng_literal_decorated.h"
-
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_util.h"
-#include "rose/rose_build.h"
-#include "rose/rose_in_graph.h"
-#include "rose/rose_in_util.h"
-#include "util/compile_context.h"
-#include "util/dump_charclass.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <memory>
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** \brief Max fixed-width paths to generate from a graph. */
-static constexpr size_t MAX_PATHS = 10;
-
-/** \brief Max degree for any non-special vertex in the graph. */
-static constexpr size_t MAX_VERTEX_DEGREE = 6;
-
-using Path = vector<NFAVertex>;
-
-} // namespace
-
-static
-bool findPaths(const NGHolder &g, vector<Path> &paths) {
- vector<NFAVertex> order = getTopoOrdering(g);
-
- vector<size_t> read_count(num_vertices(g));
- vector<vector<Path>> built(num_vertices(g));
-
- for (auto it = order.rbegin(); it != order.rend(); ++it) {
- NFAVertex v = *it;
- auto &out = built[g[v].index];
- assert(out.empty());
-
- read_count[g[v].index] = out_degree(v, g);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis for literals decorated by leading/trailing assertions or
+ * character classes.
+ */
+#include "ng_literal_decorated.h"
+
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_util.h"
+#include "rose/rose_build.h"
+#include "rose/rose_in_graph.h"
+#include "rose/rose_in_util.h"
+#include "util/compile_context.h"
+#include "util/dump_charclass.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <memory>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** \brief Max fixed-width paths to generate from a graph. */
+static constexpr size_t MAX_PATHS = 10;
+
+/** \brief Max degree for any non-special vertex in the graph. */
+static constexpr size_t MAX_VERTEX_DEGREE = 6;
+
+using Path = vector<NFAVertex>;
+
+} // namespace
+
+static
+bool findPaths(const NGHolder &g, vector<Path> &paths) {
+ vector<NFAVertex> order = getTopoOrdering(g);
+
+ vector<size_t> read_count(num_vertices(g));
+ vector<vector<Path>> built(num_vertices(g));
+
+ for (auto it = order.rbegin(); it != order.rend(); ++it) {
+ NFAVertex v = *it;
+ auto &out = built[g[v].index];
+ assert(out.empty());
+
+ read_count[g[v].index] = out_degree(v, g);
+
DEBUG_PRINTF("setting read_count to %zu for %zu\n",
- read_count[g[v].index], g[v].index);
-
- if (v == g.start || v == g.startDs) {
- out.push_back({v});
- continue;
- }
-
- // The paths to v are the paths to v's predecessors, with v added to
- // the end of each.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- // We have a stylized connection from start -> startDs, but we
- // don't need anchored and unanchored versions of the same path.
- if (u == g.start && edge(g.startDs, v, g).second) {
- continue;
- }
-
- // Similarly, avoid the accept->acceptEod edge.
- if (u == g.accept) {
- assert(v == g.acceptEod);
- continue;
- }
-
- assert(!built[g[u].index].empty());
- assert(read_count[g[u].index]);
-
- for (const auto &p : built[g[u].index]) {
- out.push_back(p);
- out.back().push_back(v);
-
- if (out.size() > MAX_PATHS) {
- // All these paths should eventually end up at a sink, so
- // we've blown past our limit.
- DEBUG_PRINTF("path limit exceeded\n");
- return false;
- }
- }
-
- read_count[g[u].index]--;
- if (!read_count[g[u].index]) {
+ read_count[g[v].index], g[v].index);
+
+ if (v == g.start || v == g.startDs) {
+ out.push_back({v});
+ continue;
+ }
+
+ // The paths to v are the paths to v's predecessors, with v added to
+ // the end of each.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ // We have a stylized connection from start -> startDs, but we
+ // don't need anchored and unanchored versions of the same path.
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ continue;
+ }
+
+ // Similarly, avoid the accept->acceptEod edge.
+ if (u == g.accept) {
+ assert(v == g.acceptEod);
+ continue;
+ }
+
+ assert(!built[g[u].index].empty());
+ assert(read_count[g[u].index]);
+
+ for (const auto &p : built[g[u].index]) {
+ out.push_back(p);
+ out.back().push_back(v);
+
+ if (out.size() > MAX_PATHS) {
+ // All these paths should eventually end up at a sink, so
+ // we've blown past our limit.
+ DEBUG_PRINTF("path limit exceeded\n");
+ return false;
+ }
+ }
+
+ read_count[g[u].index]--;
+ if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
- built[g[u].index].clear();
- built[g[u].index].shrink_to_fit();
- }
- }
- }
-
- insert(&paths, paths.end(), built[NODE_ACCEPT]);
- insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
-
- DEBUG_PRINTF("%zu paths generated\n", paths.size());
-
- return paths.size() <= MAX_PATHS;
-}
-
-static
-bool hasLargeDegreeVertex(const NGHolder &g) {
- for (const auto &v : vertices_range(g)) {
- if (is_special(v, g)) { // specials can have large degree
- continue;
- }
+ built[g[u].index].clear();
+ built[g[u].index].shrink_to_fit();
+ }
+ }
+ }
+
+ insert(&paths, paths.end(), built[NODE_ACCEPT]);
+ insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
+
+ DEBUG_PRINTF("%zu paths generated\n", paths.size());
+
+ return paths.size() <= MAX_PATHS;
+}
+
+static
+bool hasLargeDegreeVertex(const NGHolder &g) {
+ for (const auto &v : vertices_range(g)) {
+ if (is_special(v, g)) { // specials can have large degree
+ continue;
+ }
if (degree(v, g) > MAX_VERTEX_DEGREE) {
DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index,
degree(v, g));
- return true;
- }
- }
- return false;
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-static UNUSED
-string dumpPath(const NGHolder &g, const Path &path) {
- ostringstream oss;
- for (const auto &v : path) {
- switch (g[v].index) {
- case NODE_START:
- oss << "<start>";
- break;
- case NODE_START_DOTSTAR:
- oss << "<startDs>";
- break;
- case NODE_ACCEPT:
- oss << "<accept>";
- break;
- case NODE_ACCEPT_EOD:
- oss << "<acceptEod>";
- break;
- default:
- oss << describeClass(g[v].char_reach);
- break;
- }
- }
- return oss.str();
-}
-#endif
-
-struct PathMask {
- PathMask(const NGHolder &g, const Path &path)
- : is_anchored(path.front() == g.start),
- is_eod(path.back() == g.acceptEod) {
- assert(path.size() >= 2);
- mask.reserve(path.size() - 2);
- for (const auto &v : path) {
- if (is_special(v, g)) {
- continue;
- }
- mask.push_back(g[v].char_reach);
- }
-
- // Reports are attached to the second-to-last vertex.
+ return true;
+ }
+ }
+ return false;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+static UNUSED
+string dumpPath(const NGHolder &g, const Path &path) {
+ ostringstream oss;
+ for (const auto &v : path) {
+ switch (g[v].index) {
+ case NODE_START:
+ oss << "<start>";
+ break;
+ case NODE_START_DOTSTAR:
+ oss << "<startDs>";
+ break;
+ case NODE_ACCEPT:
+ oss << "<accept>";
+ break;
+ case NODE_ACCEPT_EOD:
+ oss << "<acceptEod>";
+ break;
+ default:
+ oss << describeClass(g[v].char_reach);
+ break;
+ }
+ }
+ return oss.str();
+}
+#endif
+
+struct PathMask {
+ PathMask(const NGHolder &g, const Path &path)
+ : is_anchored(path.front() == g.start),
+ is_eod(path.back() == g.acceptEod) {
+ assert(path.size() >= 2);
+ mask.reserve(path.size() - 2);
+ for (const auto &v : path) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ mask.push_back(g[v].char_reach);
+ }
+
+ // Reports are attached to the second-to-last vertex.
NFAVertex u = *std::next(path.rbegin());
reports = g[u].reports;
- assert(!reports.empty());
- }
-
- vector<CharReach> mask;
+ assert(!reports.empty());
+ }
+
+ vector<CharReach> mask;
flat_set<ReportID> reports;
- bool is_anchored;
- bool is_eod;
-};
-
-bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc) {
- if (!cc.grey.allowDecoratedLiteral) {
- return false;
- }
-
- if (!isAcyclic(g)) {
- DEBUG_PRINTF("not acyclic\n");
- return false;
- }
-
+ bool is_anchored;
+ bool is_eod;
+};
+
+bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc) {
+ if (!cc.grey.allowDecoratedLiteral) {
+ return false;
+ }
+
+ if (!isAcyclic(g)) {
+ DEBUG_PRINTF("not acyclic\n");
+ return false;
+ }
+
if (!hasNarrowReachVertex(g)) {
DEBUG_PRINTF("no narrow reach vertices\n");
return false;
}
- if (hasLargeDegreeVertex(g)) {
- DEBUG_PRINTF("large degree\n");
- return false;
- }
-
- vector<Path> paths;
- if (!findPaths(g, paths)) {
- DEBUG_PRINTF("couldn't split into a small number of paths\n");
- return false;
- }
-
- assert(!paths.empty());
- assert(paths.size() <= MAX_PATHS);
-
- vector<PathMask> masks;
- masks.reserve(paths.size());
-
- for (const auto &path : paths) {
- DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
- PathMask pm(g, path);
- if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
- pm.is_eod)) {
- DEBUG_PRINTF("failed validation\n");
- return false;
- }
- masks.push_back(move(pm));
- }
-
- for (const auto &pm : masks) {
- rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
- }
-
- DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
- return true;
-}
-
-} // namespace ue2
+ if (hasLargeDegreeVertex(g)) {
+ DEBUG_PRINTF("large degree\n");
+ return false;
+ }
+
+ vector<Path> paths;
+ if (!findPaths(g, paths)) {
+ DEBUG_PRINTF("couldn't split into a small number of paths\n");
+ return false;
+ }
+
+ assert(!paths.empty());
+ assert(paths.size() <= MAX_PATHS);
+
+ vector<PathMask> masks;
+ masks.reserve(paths.size());
+
+ for (const auto &path : paths) {
+ DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
+ PathMask pm(g, path);
+ if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
+ pm.is_eod)) {
+ DEBUG_PRINTF("failed validation\n");
+ return false;
+ }
+ masks.push_back(move(pm));
+ }
+
+ for (const auto &pm : masks) {
+ rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
+ }
+
+ DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
index ff18c7d746..603679e809 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
@@ -1,52 +1,52 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis for literals decorated by leading/trailing assertions or
- * character classes.
- */
-
-#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H
-#define NFAGRAPH_NG_LITERAL_DECORATED_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct CompileContext;
-
-/**
- * \brief If the graph contains only a decorated literal, feed it to the Rose
- * builder. Returns true on success.
- */
-bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NFAGRAPH_NG_LITERAL_DECORATED_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis for literals decorated by leading/trailing assertions or
+ * character classes.
+ */
+
+#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H
+#define NFAGRAPH_NG_LITERAL_DECORATED_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct CompileContext;
+
+/**
+ * \brief If the graph contains only a decorated literal, feed it to the Rose
+ * builder. Returns true on success.
+ */
+bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NFAGRAPH_NG_LITERAL_DECORATED_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
index 4ce5dc153b..7d84aabe30 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
@@ -1,352 +1,352 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for McClellan DFA.
- */
-#include "ng_mcclellan.h"
-
-#include "grey.h"
-#include "nfa/dfa_min.h"
-#include "nfa/rdfa.h"
-#include "ng_holder.h"
-#include "ng_mcclellan_internal.h"
-#include "ng_squash.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/bitfield.h"
-#include "util/determinise.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for McClellan DFA.
+ */
+#include "ng_mcclellan.h"
+
+#include "grey.h"
+#include "nfa/dfa_min.h"
+#include "nfa/rdfa.h"
+#include "ng_holder.h"
+#include "ng_mcclellan_internal.h"
+#include "ng_squash.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/bitfield.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/hash.h"
#include "util/hash_dynamic_bitset.h"
-#include "util/make_unique.h"
-#include "util/report_manager.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
+#include "util/make_unique.h"
+#include "util/report_manager.h"
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
#include <unordered_map>
-#include <vector>
-
-#include <boost/dynamic_bitset.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-#define FINAL_DFA_STATE_LIMIT 16383
-#define DFA_STATE_LIMIT 1024
-#define NFA_STATE_LIMIT 256
-
-u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
- array<u16, ALPHABET_SIZE> &alpha,
- array<u16, ALPHABET_SIZE> &unalpha) {
- u16 i = 0;
- for (; i < esets.size(); i++) {
- const CharReach &cr = esets[i];
-
-#ifdef DEBUG
- DEBUG_PRINTF("eq set: ");
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- printf("%02hhx ", (u8)s);
- }
- printf("-> %u\n", i);
-#endif
- u16 leader = cr.find_first();
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- alpha[s] = i;
- }
- unalpha[i] = leader;
- }
-
- for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
- alpha[j] = i;
- unalpha[i] = j;
- }
-
- return i; // alphabet size
-}
-
-void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha,
- array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) {
- vector<CharReach> esets(1, CharReach::dot());
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
-
- for (size_t i = 0; i < esets.size(); i++) {
- if (esets[i].count() == 1) {
- continue;
- }
-
- CharReach t = cr & esets[i];
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
- // for deterministic compiles
- sort(esets.begin(), esets.end());
-
- assert(alphasize);
- *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
-}
-
-static
-bool allExternalReports(const ReportManager &rm,
- const flat_set<ReportID> &reports) {
- for (auto report_id : reports) {
- if (!isExternalReport(rm.getReport(report_id))) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c,
- const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) {
- return dstates[c].next[alpha[s]];
-}
-
-void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
- dstate_id_t *out_table) {
- for (u32 i = 0; i < ALPHABET_SIZE; i++) {
- out_table[i] = successor(n.states, state, n.alpha_remap, i);
- }
-}
-
-template<typename stateset>
-static
+#include <vector>
+
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+#define FINAL_DFA_STATE_LIMIT 16383
+#define DFA_STATE_LIMIT 1024
+#define NFA_STATE_LIMIT 256
+
+u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
+ array<u16, ALPHABET_SIZE> &alpha,
+ array<u16, ALPHABET_SIZE> &unalpha) {
+ u16 i = 0;
+ for (; i < esets.size(); i++) {
+ const CharReach &cr = esets[i];
+
+#ifdef DEBUG
+ DEBUG_PRINTF("eq set: ");
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ printf("%02hhx ", (u8)s);
+ }
+ printf("-> %u\n", i);
+#endif
+ u16 leader = cr.find_first();
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ alpha[s] = i;
+ }
+ unalpha[i] = leader;
+ }
+
+ for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
+ alpha[j] = i;
+ unalpha[i] = j;
+ }
+
+ return i; // alphabet size
+}
+
+void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha,
+ array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) {
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ if (esets[i].count() == 1) {
+ continue;
+ }
+
+ CharReach t = cr & esets[i];
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+ // for deterministic compiles
+ sort(esets.begin(), esets.end());
+
+ assert(alphasize);
+ *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+}
+
+static
+bool allExternalReports(const ReportManager &rm,
+ const flat_set<ReportID> &reports) {
+ for (auto report_id : reports) {
+ if (!isExternalReport(rm.getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c,
+ const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) {
+ return dstates[c].next[alpha[s]];
+}
+
+void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
+ dstate_id_t *out_table) {
+ for (u32 i = 0; i < ALPHABET_SIZE; i++) {
+ out_table[i] = successor(n.states, state, n.alpha_remap, i);
+ }
+}
+
+template<typename stateset>
+static
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
- stateset *init, stateset *init_deep,
- vector<NFAVertex> *v_by_index) {
- for (auto v : vertices_range(g)) {
+ stateset *init, stateset *init_deep,
+ vector<NFAVertex> *v_by_index) {
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
-
- u32 vert_id = g[v].index;
- assert(vert_id < init->size());
-
- if (is_any_start(v, g)) {
- init->set(vert_id);
- if (hasSelfLoop(v, g) || is_triggered(g)) {
- DEBUG_PRINTF("setting %u\n", vert_id);
- init_deep->set(vert_id);
- }
- }
- }
-
- v_by_index->clear();
+ continue;
+ }
+
+ u32 vert_id = g[v].index;
+ assert(vert_id < init->size());
+
+ if (is_any_start(v, g)) {
+ init->set(vert_id);
+ if (hasSelfLoop(v, g) || is_triggered(g)) {
+ DEBUG_PRINTF("setting %u\n", vert_id);
+ init_deep->set(vert_id);
+ }
+ }
+ }
+
+ v_by_index->clear();
v_by_index->resize(num_vertices(g), NGHolder::null_vertex());
-
- for (auto v : vertices_range(g)) {
- u32 vert_id = g[v].index;
+
+ for (auto v : vertices_range(g)) {
+ u32 vert_id = g[v].index;
assert((*v_by_index)[vert_id] == NGHolder::null_vertex());
- (*v_by_index)[vert_id] = v;
- }
-
- if (is_triggered(g)) {
- *init_deep = *init;
- }
-}
-
-template<typename StateSet>
+ (*v_by_index)[vert_id] = v;
+ }
+
+ if (is_triggered(g)) {
+ *init_deep = *init;
+ }
+}
+
+template<typename StateSet>
void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- StateSet *accept, StateSet *acceptEod) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ StateSet *accept, StateSet *acceptEod) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (contains(unused, v)) {
continue;
- }
+ }
accept->set(g[v].index);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
if (contains(unused, v)) {
continue;
- }
+ }
acceptEod->set(g[v].index);
- }
-}
-
-static
-bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) {
- bool seen = false;
- u32 ekey = 0;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- for (auto report_id : g[v].reports) {
- const Report &ir = rm.getReport(report_id);
-
- if (!isSimpleExhaustible(ir)) {
- return false;
- }
-
- if (!seen) {
- seen = true;
- ekey = ir.ekey;
- } else if (ekey != ir.ekey) {
- return false;
- }
- }
- }
-
- /* need to check accept eod does not have any unseen reports as well */
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- for (auto report_id : g[v].reports) {
- const Report &ir = rm.getReport(report_id);
-
- if (!isSimpleExhaustible(ir)) {
- return false;
- }
-
- if (!seen) {
- seen = true;
- ekey = ir.ekey;
- } else if (ekey != ir.ekey) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
-bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers,
- vector<CharReach>::const_reverse_iterator itb,
- vector<CharReach>::const_reverse_iterator ite) {
- for (const auto &trigger : all_triggers) {
- vector<CharReach>::const_reverse_iterator it = itb;
- vector<CharReach>::const_reverse_iterator kt = trigger.rbegin();
- for (; it != ite && kt != trigger.rend(); ++it, ++kt) {
- if ((*it & *kt).none()) {
- /* this trigger does not match the overhang, try next */
- goto try_next_trigger;
- }
- }
-
- return true;
- try_next_trigger:;
- }
-
- return false; /* no trigger matches the over hang */
-}
-
-static
-bool triggerAllowed(const NGHolder &g, const NFAVertex v,
- const vector<vector<CharReach> > &all_triggers,
- const vector<CharReach> &trigger) {
+ }
+}
+
+static
+bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) {
+ bool seen = false;
+ u32 ekey = 0;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (auto report_id : g[v].reports) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (!isSimpleExhaustible(ir)) {
+ return false;
+ }
+
+ if (!seen) {
+ seen = true;
+ ekey = ir.ekey;
+ } else if (ekey != ir.ekey) {
+ return false;
+ }
+ }
+ }
+
+ /* need to check accept eod does not have any unseen reports as well */
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (auto report_id : g[v].reports) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (!isSimpleExhaustible(ir)) {
+ return false;
+ }
+
+ if (!seen) {
+ seen = true;
+ ekey = ir.ekey;
+ } else if (ekey != ir.ekey) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
+bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers,
+ vector<CharReach>::const_reverse_iterator itb,
+ vector<CharReach>::const_reverse_iterator ite) {
+ for (const auto &trigger : all_triggers) {
+ vector<CharReach>::const_reverse_iterator it = itb;
+ vector<CharReach>::const_reverse_iterator kt = trigger.rbegin();
+ for (; it != ite && kt != trigger.rend(); ++it, ++kt) {
+ if ((*it & *kt).none()) {
+ /* this trigger does not match the overhang, try next */
+ goto try_next_trigger;
+ }
+ }
+
+ return true;
+ try_next_trigger:;
+ }
+
+ return false; /* no trigger matches the over hang */
+}
+
+static
+bool triggerAllowed(const NGHolder &g, const NFAVertex v,
+ const vector<vector<CharReach> > &all_triggers,
+ const vector<CharReach> &trigger) {
flat_set<NFAVertex> curr({v});
flat_set<NFAVertex> next;
-
- for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) {
- next.clear();
-
- for (auto u : curr) {
- assert(u != g.startDs); /* triggered graphs should not use sds */
- if (u == g.start) {
- if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) {
- return true;
- }
- continue;
- }
-
- if ((g[u].char_reach & *it).none()) {
- continue;
- }
- insert(&next, inv_adjacent_vertices(u, g));
- }
-
- if (next.empty()) {
- return false;
- }
-
- next.swap(curr);
- }
-
- return true;
-}
-
+
+ for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) {
+ next.clear();
+
+ for (auto u : curr) {
+ assert(u != g.startDs); /* triggered graphs should not use sds */
+ if (u == g.start) {
+ if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) {
+ return true;
+ }
+ continue;
+ }
+
+ if ((g[u].char_reach & *it).none()) {
+ continue;
+ }
+ insert(&next, inv_adjacent_vertices(u, g));
+ }
+
+ if (next.empty()) {
+ return false;
+ }
+
+ next.swap(curr);
+ }
+
+ return true;
+}
+
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- bool single_trigger,
- const vector<vector<CharReach>> &triggers,
- dynamic_bitset<> *out) {
- if (single_trigger) {
- return; /* no live states can lead to new states */
- }
-
- for (auto v : vertices_range(g)) {
+ bool single_trigger,
+ const vector<vector<CharReach>> &triggers,
+ dynamic_bitset<> *out) {
+ if (single_trigger) {
+ return; /* no live states can lead to new states */
+ }
+
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
- for (const auto &trigger : triggers) {
- if (triggerAllowed(g, v, triggers, trigger)) {
+ continue;
+ }
+ for (const auto &trigger : triggers) {
+ if (triggerAllowed(g, v, triggers, trigger)) {
DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index);
out->set(g[v].index);
- break;
- }
- }
- }
-
- assert(out->test(g[g.start].index));
-}
-
-namespace {
-
+ break;
+ }
+ }
+ }
+
+ assert(out->test(g[g.start].index));
+}
+
+namespace {
+
template<typename Automaton_Traits>
class Automaton_Base {
-public:
+public:
using StateSet = typename Automaton_Traits::StateSet;
using StateMap = typename Automaton_Traits::StateMap;
-
+
Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in,
bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
@@ -362,117 +362,117 @@ public:
prunable(prunable_in) {
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, unused, &accept, &acceptEod);
-
- start_anchored = DEAD_STATE + 1;
- if (initDS == init) {
- start_floating = start_anchored;
- } else if (initDS.any()) {
- start_floating = start_anchored + 1;
- } else {
- start_floating = DEAD_STATE;
- }
-
- calculateAlphabet(graph, alpha, unalpha, &alphasize);
-
- for (const auto &sq : findSquashers(graph)) {
- NFAVertex v = sq.first;
- u32 vert_id = graph[v].index;
- squash.set(vert_id);
+
+ start_anchored = DEAD_STATE + 1;
+ if (initDS == init) {
+ start_floating = start_anchored;
+ } else if (initDS.any()) {
+ start_floating = start_anchored + 1;
+ } else {
+ start_floating = DEAD_STATE;
+ }
+
+ calculateAlphabet(graph, alpha, unalpha, &alphasize);
+
+ for (const auto &sq : findSquashers(graph)) {
+ NFAVertex v = sq.first;
+ u32 vert_id = graph[v].index;
+ squash.set(vert_id);
squash_mask[vert_id]
= Automaton_Traits::copy_states(std::move(sq.second),
numStates);
- }
-
- cr_by_index = populateCR(graph, v_by_index, alpha);
- if (is_triggered(graph)) {
+ }
+
+ cr_by_index = populateCR(graph, v_by_index, alpha);
+ if (is_triggered(graph)) {
dynamic_bitset<> temp(numStates);
markToppableStarts(graph, unused, single_trigger, triggers,
&temp);
toppable = Automaton_Traits::copy_states(std::move(temp),
numStates);
- }
- }
-
-public:
- void transition(const StateSet &in, StateSet *next) {
- transition_graph(*this, v_by_index, in, next);
- }
-
- const vector<StateSet> initial() {
+ }
+ }
+
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ transition_graph(*this, v_by_index, in, next);
+ }
+
+ const vector<StateSet> initial() {
vector<StateSet> rv = {init};
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(initDS);
- }
- return rv;
- }
-
-private:
- void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
- StateSet acc = in & (eod ? acceptEod : accept);
- for (size_t i = acc.find_first(); i != StateSet::npos;
- i = acc.find_next(i)) {
- NFAVertex v = v_by_index[i];
- DEBUG_PRINTF("marking report\n");
- const auto &my_reports = graph[v].reports;
- rv.insert(my_reports.begin(), my_reports.end());
- }
- }
-
-public:
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, false, rv);
- }
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, true, rv);
- }
-
- bool canPrune(const flat_set<ReportID> &test_reports) const {
- if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
- return false;
- }
- return allExternalReports(*rm, test_reports);
- }
-
-private:
- const ReportManager *rm;
-public:
- const NGHolder &graph;
- u32 numStates;
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(initDS);
+ }
+ return rv;
+ }
+
+private:
+ void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
+ StateSet acc = in & (eod ? acceptEod : accept);
+ for (size_t i = acc.find_first(); i != StateSet::npos;
+ i = acc.find_next(i)) {
+ NFAVertex v = v_by_index[i];
+ DEBUG_PRINTF("marking report\n");
+ const auto &my_reports = graph[v].reports;
+ rv.insert(my_reports.begin(), my_reports.end());
+ }
+ }
+
+public:
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, false, rv);
+ }
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, true, rv);
+ }
+
+ bool canPrune(const flat_set<ReportID> &test_reports) const {
+ if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
+ return false;
+ }
+ return allExternalReports(*rm, test_reports);
+ }
+
+private:
+ const ReportManager *rm;
+public:
+ const NGHolder &graph;
+ u32 numStates;
const flat_set<NFAVertex> unused;
- vector<NFAVertex> v_by_index;
- vector<CharReach> cr_by_index; /* pre alpha'ed */
- StateSet init;
- StateSet initDS;
- StateSet squash; /* states which allow us to mask out other states */
- StateSet accept;
- StateSet acceptEod;
- StateSet toppable; /* states which are allowed to be on when a top arrives,
- * triggered dfas only */
+ vector<NFAVertex> v_by_index;
+ vector<CharReach> cr_by_index; /* pre alpha'ed */
+ StateSet init;
+ StateSet initDS;
+ StateSet squash; /* states which allow us to mask out other states */
+ StateSet accept;
+ StateSet acceptEod;
+ StateSet toppable; /* states which are allowed to be on when a top arrives,
+ * triggered dfas only */
StateSet dead;
- map<u32, StateSet> squash_mask;
- bool prunable;
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
-
- u16 start_anchored;
- u16 start_floating;
-};
-
+ map<u32, StateSet> squash_mask;
+ bool prunable;
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+
+ u16 start_anchored;
+ u16 start_floating;
+};
+
struct Big_Traits {
using StateSet = dynamic_bitset<>;
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
-
+
static StateSet init_states(u32 num) {
return StateSet(num);
}
-
+
static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) {
assert(in.size() == num);
return in;
}
};
-
+
class Automaton_Big : public Automaton_Base<Big_Traits> {
public:
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
@@ -481,42 +481,42 @@ public:
: Automaton_Base(rm_in, graph_in, single_trigger, triggers,
prunable_in) {}
};
-
+
struct Graph_Traits {
using StateSet = bitfield<NFA_STATE_LIMIT>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
-
+
static StateSet init_states(UNUSED u32 num) {
assert(num <= NFA_STATE_LIMIT);
return StateSet();
- }
-
+ }
+
static StateSet copy_states(const dynamic_bitset<> &in, u32 num) {
StateSet out = init_states(num);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
};
-
+
class Automaton_Graph : public Automaton_Base<Graph_Traits> {
-public:
+public:
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
: Automaton_Base(rm_in, graph_in, single_trigger, triggers,
prunable_in) {}
};
-
+
} // namespace
-
+
static
bool startIsRedundant(const NGHolder &g) {
set<NFAVertex> start;
set<NFAVertex> startDs;
-
+
insert(&start, adjacent_vertices(g.start, g));
insert(&startDs, adjacent_vertices(g.startDs, g));
@@ -527,42 +527,42 @@ flat_set<NFAVertex> getRedundantStarts(const NGHolder &g) {
flat_set<NFAVertex> dead;
if (startIsRedundant(g)) {
dead.insert(g.start);
- }
+ }
if (proper_out_degree(g.startDs, g) == 0) {
dead.insert(g.startDs);
- }
+ }
return dead;
}
-
+
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
const ReportManager *rm, bool single_trigger,
- const vector<vector<CharReach>> &triggers,
- const Grey &grey, bool finalChance) {
- if (!grey.allowMcClellan) {
- return nullptr;
- }
-
+ const vector<vector<CharReach>> &triggers,
+ const Grey &grey, bool finalChance) {
+ if (!grey.allowMcClellan) {
+ return nullptr;
+ }
+
DEBUG_PRINTF("attempting to build %s mcclellan\n",
to_string(graph.kind).c_str());
- assert(allMatchStatesHaveReports(graph));
-
+ assert(allMatchStatesHaveReports(graph));
+
bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);
assert(rm || !has_managed_reports(graph));
if (!has_managed_reports(graph)) {
- rm = nullptr;
- }
-
- assert(triggers.empty() == !is_triggered(graph));
-
- /* We must be getting desperate if it is an outfix, so use the final chance
- * state limit logic */
- u32 state_limit
- = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT
- : DFA_STATE_LIMIT;
-
- const u32 numStates = num_vertices(graph);
- DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
-
+ rm = nullptr;
+ }
+
+ assert(triggers.empty() == !is_triggered(graph));
+
+ /* We must be getting desperate if it is an outfix, so use the final chance
+ * state limit logic */
+ u32 state_limit
+ = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT
+ : DFA_STATE_LIMIT;
+
+ const u32 numStates = num_vertices(graph);
+ DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
+
if (numStates > FINAL_DFA_STATE_LIMIT) {
DEBUG_PRINTF("rejecting nfa as too many vertices\n");
return nullptr;
@@ -570,47 +570,47 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
auto rdfa = ue2::make_unique<raw_dfa>(graph.kind);
- if (numStates <= NFA_STATE_LIMIT) {
- /* Fast path. Automaton_Graph uses a bitfield internally to represent
- * states and is quicker than Automaton_Big. */
+ if (numStates <= NFA_STATE_LIMIT) {
+ /* Fast path. Automaton_Graph uses a bitfield internally to represent
+ * states and is quicker than Automaton_Big. */
Automaton_Graph n(rm, graph, single_trigger, triggers, prunable);
if (!determinise(n, rdfa->states, state_limit)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
- } else {
- /* Slow path. Too many states to use Automaton_Graph. */
+ DEBUG_PRINTF("state limit exceeded\n");
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+ } else {
+ /* Slow path. Too many states to use Automaton_Graph. */
Automaton_Big n(rm, graph, single_trigger, triggers, prunable);
if (!determinise(n, rdfa->states, state_limit)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
- }
-
- minimize_hopcroft(*rdfa, grey);
-
- DEBUG_PRINTF("after determinised into %zu states, building impl dfa "
- "(a,f) = (%hu,%hu)\n", rdfa->states.size(),
- rdfa->start_anchored, rdfa->start_floating);
-
- return rdfa;
-}
-
-unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
- const Grey &grey) {
- assert(!is_triggered(g));
- vector<vector<CharReach>> triggers;
- return buildMcClellan(g, rm, false, triggers, grey);
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("state limit exceeded\n");
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+ }
+
+ minimize_hopcroft(*rdfa, grey);
+
+ DEBUG_PRINTF("after determinised into %zu states, building impl dfa "
+ "(a,f) = (%hu,%hu)\n", rdfa->states.size(),
+ rdfa->start_anchored, rdfa->start_floating);
+
+ return rdfa;
+}
+
+unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
+ const Grey &grey) {
+ assert(!is_triggered(g));
+ vector<vector<CharReach>> triggers;
+ return buildMcClellan(g, rm, false, triggers, grey);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
index 1a4042ce66..8183a0d2e7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
@@ -1,81 +1,81 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for McClellan DFA.
- */
-
-#ifndef NG_MCCLELLAN_H
-#define NG_MCCLELLAN_H
-
-#include "ue2common.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-class ReportManager;
-struct Grey;
-struct raw_dfa;
-
-/**
- * \brief Determinises an NFA Graph into a raw_dfa.
- *
- * \param g
- * The NGHolder.
- * \param rm
- * A pointer to the ReportManager, if managed reports are used (e.g.
- * for outfixes/suffixes). Otherwise nullptr.
- * \param single_trigger
- * True if it is known that the nfa will only ever be trigger once.
- * \param triggers
- * Representing when tops may arrive. Only used by NFA_INFIX and
- * NFA_SUFFIX, should be empty for other types.
- * \param grey
- * Grey box object.
- * \param finalChance
- * Allows us to build bigger DFAs as the only alternative is an outfix.
- *
- * \return A raw_dfa, or nullptr on failure (state limit blown).
- */
-std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
- const ReportManager *rm, bool single_trigger,
- const std::vector<std::vector<CharReach>> &triggers,
- const Grey &grey, bool finalChance = false);
-
-/** Convenience wrapper for non-triggered engines */
-std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
- const ReportManager *rm,
- const Grey &grey);
-
-} // namespace ue2
-
-#endif // NG_MCCLELLAN_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for McClellan DFA.
+ */
+
+#ifndef NG_MCCLELLAN_H
+#define NG_MCCLELLAN_H
+
+#include "ue2common.h"
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+class ReportManager;
+struct Grey;
+struct raw_dfa;
+
+/**
+ * \brief Determinises an NFA Graph into a raw_dfa.
+ *
+ * \param g
+ * The NGHolder.
+ * \param rm
+ * A pointer to the ReportManager, if managed reports are used (e.g.
+ * for outfixes/suffixes). Otherwise nullptr.
+ * \param single_trigger
+ * True if it is known that the nfa will only ever be trigger once.
+ * \param triggers
+ * Representing when tops may arrive. Only used by NFA_INFIX and
+ * NFA_SUFFIX, should be empty for other types.
+ * \param grey
+ * Grey box object.
+ * \param finalChance
+ * Allows us to build bigger DFAs as the only alternative is an outfix.
+ *
+ * \return A raw_dfa, or nullptr on failure (state limit blown).
+ */
+std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
+ const ReportManager *rm, bool single_trigger,
+ const std::vector<std::vector<CharReach>> &triggers,
+ const Grey &grey, bool finalChance = false);
+
+/** Convenience wrapper for non-triggered engines */
+std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
+ const ReportManager *rm,
+ const Grey &grey);
+
+} // namespace ue2
+
+#endif // NG_MCCLELLAN_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
index f069d7336f..bfe030b0aa 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shared build code for DFAs (McClellan, Haig).
- */
-
-#ifndef NG_MCCLELLAN_INTERNAL_H
-#define NG_MCCLELLAN_INTERNAL_H
-
-#include "ue2common.h"
-#include "nfa/mcclellancompile.h"
-#include "nfagraph/ng_holder.h"
-#include "util/charreach.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shared build code for DFAs (McClellan, Haig).
+ */
+
+#ifndef NG_MCCLELLAN_INTERNAL_H
+#define NG_MCCLELLAN_INTERNAL_H
+
+#include "ue2common.h"
+#include "nfa/mcclellancompile.h"
+#include "nfagraph/ng_holder.h"
+#include "util/charreach.h"
+#include "util/graph_range.h"
#include "util/flat_containers.h"
-
-#include <boost/dynamic_bitset.hpp>
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-struct raw_dfa;
-
-/** Fills alpha, unalpha and returns alphabet size. */
-u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
- std::array<u16, ALPHABET_SIZE> &alpha,
- std::array<u16, ALPHABET_SIZE> &unalpha);
-
-/** \brief Calculates an alphabet remapping based on the symbols which the
- * graph discriminates on. Throws in some special DFA symbols as well. */
-void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha,
- std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize);
-
-void getFullTransitionFromState(const raw_dfa &n, u16 state,
- u16 *out_table);
-
-/** produce a map of states on which it is valid to receive tops */
+
+#include <boost/dynamic_bitset.hpp>
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+struct raw_dfa;
+
+/** Fills alpha, unalpha and returns alphabet size. */
+u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
+ std::array<u16, ALPHABET_SIZE> &alpha,
+ std::array<u16, ALPHABET_SIZE> &unalpha);
+
+/** \brief Calculates an alphabet remapping based on the symbols which the
+ * graph discriminates on. Throws in some special DFA symbols as well. */
+void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha,
+ std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize);
+
+void getFullTransitionFromState(const raw_dfa &n, u16 state,
+ u16 *out_table);
+
+/** produce a map of states on which it is valid to receive tops */
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- bool single_trigger,
- const std::vector<std::vector<CharReach>> &triggers,
- boost::dynamic_bitset<> *out);
-
+ bool single_trigger,
+ const std::vector<std::vector<CharReach>> &triggers,
+ boost::dynamic_bitset<> *out);
+
/**
* \brief Returns a set of start vertices that will not participate in an
* implementation of this graph. These are either starts with no successors or
@@ -75,75 +75,75 @@ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
*/
flat_set<NFAVertex> getRedundantStarts(const NGHolder &g);
-template<typename autom>
-void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
- const typename autom::StateSet &in,
- typename autom::StateSet *next) {
- typedef typename autom::StateSet StateSet;
- const NGHolder &graph = nfa.graph;
+template<typename autom>
+void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
+ const typename autom::StateSet &in,
+ typename autom::StateSet *next) {
+ typedef typename autom::StateSet StateSet;
+ const NGHolder &graph = nfa.graph;
const auto &unused = nfa.unused;
- const auto &alpha = nfa.alpha;
- const StateSet &squash = nfa.squash;
- const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
- const std::vector<CharReach> &cr_by_index = nfa.cr_by_index;
-
- for (symbol_t s = 0; s < nfa.alphasize; s++) {
- next[s].reset();
- }
-
- /* generate top transitions, false -> top = selfloop */
- bool top_allowed = is_triggered(graph);
-
- StateSet succ = nfa.dead;
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- NFAVertex u = vByStateId[i];
-
- for (const auto &v : adjacent_vertices_range(u, graph)) {
+ const auto &alpha = nfa.alpha;
+ const StateSet &squash = nfa.squash;
+ const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
+ const std::vector<CharReach> &cr_by_index = nfa.cr_by_index;
+
+ for (symbol_t s = 0; s < nfa.alphasize; s++) {
+ next[s].reset();
+ }
+
+ /* generate top transitions, false -> top = selfloop */
+ bool top_allowed = is_triggered(graph);
+
+ StateSet succ = nfa.dead;
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ NFAVertex u = vByStateId[i];
+
+ for (const auto &v : adjacent_vertices_range(u, graph)) {
if (contains(unused, v)) {
- continue;
- }
- succ.set(graph[v].index);
- }
-
- if (top_allowed && !nfa.toppable.test(i)) {
- /* we don't need to generate a top at this location as we are in
- * an nfa state which cannot be on when a trigger arrives. */
- top_allowed = false;
- }
- }
-
- StateSet active_squash = succ & squash;
- if (active_squash.any()) {
- for (size_t j = active_squash.find_first(); j != active_squash.npos;
- j = active_squash.find_next(j)) {
- succ &= squash_mask.find(j)->second;
- }
- }
-
- for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
- const CharReach &cr = cr_by_index[j];
- for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
- next[s].set(j); /* already alpha'ed */
- }
- }
-
- next[alpha[TOP]] = in;
-
- if (top_allowed) {
- /* we don't add in the anchored starts as the only case as the only
- * time it is appropriate is if no characters have been consumed.*/
- next[alpha[TOP]] |= nfa.initDS;
-
- active_squash = next[alpha[TOP]] & squash;
- if (active_squash.any()) {
- for (size_t j = active_squash.find_first(); j != active_squash.npos;
- j = active_squash.find_next(j)) {
- next[alpha[TOP]] &= squash_mask.find(j)->second;
- }
- }
- }
-}
-
-} // namespace ue2
-
-#endif
+ continue;
+ }
+ succ.set(graph[v].index);
+ }
+
+ if (top_allowed && !nfa.toppable.test(i)) {
+ /* we don't need to generate a top at this location as we are in
+ * an nfa state which cannot be on when a trigger arrives. */
+ top_allowed = false;
+ }
+ }
+
+ StateSet active_squash = succ & squash;
+ if (active_squash.any()) {
+ for (size_t j = active_squash.find_first(); j != active_squash.npos;
+ j = active_squash.find_next(j)) {
+ succ &= squash_mask.find(j)->second;
+ }
+ }
+
+ for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
+ const CharReach &cr = cr_by_index[j];
+ for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
+ next[s].set(j); /* already alpha'ed */
+ }
+ }
+
+ next[alpha[TOP]] = in;
+
+ if (top_allowed) {
+ /* we don't add in the anchored starts as the only case as the only
+ * time it is appropriate is if no characters have been consumed.*/
+ next[alpha[TOP]] |= nfa.initDS;
+
+ active_squash = next[alpha[TOP]] & squash;
+ if (active_squash.any()) {
+ for (size_t j = active_squash.find_first(); j != active_squash.npos;
+ j = active_squash.find_next(j)) {
+ next[alpha[TOP]] &= squash_mask.find(j)->second;
+ }
+ }
+ }
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
index 8aaaf99fde..b1c8b9b001 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
@@ -1,556 +1,556 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous optimisations.
- *
- * We sometimes see patterns of the form:
- *
- * /^.*<[^<]*foobaz/s
- *
- * This is bad for Rose as the escapes from the cyclic state are the same as
- * the trigger. However, we can transform this into:
- *
- * /^.*<.*foobaz/s
- *
- * ... as the first dot star can eat all but the last '<'.
- *
- * Slightly more formally:
- *
- * Given a cyclic state v with character reachability v_cr and proper preds
- * {p1 .. pn} with character reachability {p1_cr .. pn_cr}.
- *
- * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr)
- *
- * v_cr can be replaced with v_cr' without changing the behaviour of the system
- * if:
- *
- * for any given proper pred pi: if pi is set in the nfa then after consuming
- * any symbol in v_cr', pi will still be set in the nfa and every successor of
- * v is a successor of pi.
- *
- * The easiest way for this condition to be satisfied is for each proper pred
- * pi to have all its preds all have an edge to a pred of pi with a character
- * reachability containing v_cr'. There are, however, other ways to establish
- * the condition holds.
- *
- * Note: a similar transformation can be applied in reverse, details left as an
- * exercise for the interested reader. */
-#include "ng_misc_opt.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/charreach.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous optimisations.
+ *
+ * We sometimes see patterns of the form:
+ *
+ * /^.*<[^<]*foobaz/s
+ *
+ * This is bad for Rose as the escapes from the cyclic state are the same as
+ * the trigger. However, we can transform this into:
+ *
+ * /^.*<.*foobaz/s
+ *
+ * ... as the first dot star can eat all but the last '<'.
+ *
+ * Slightly more formally:
+ *
+ * Given a cyclic state v with character reachability v_cr and proper preds
+ * {p1 .. pn} with character reachability {p1_cr .. pn_cr}.
+ *
+ * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr)
+ *
+ * v_cr can be replaced with v_cr' without changing the behaviour of the system
+ * if:
+ *
+ * for any given proper pred pi: if pi is set in the nfa then after consuming
+ * any symbol in v_cr', pi will still be set in the nfa and every successor of
+ * v is a successor of pi.
+ *
+ * The easiest way for this condition to be satisfied is for each proper pred
+ * pi to have all its preds all have an edge to a pred of pi with a character
+ * reachability containing v_cr'. There are, however, other ways to establish
+ * the condition holds.
+ *
+ * Note: a similar transformation can be applied in reverse, details left as an
+ * exercise for the interested reader. */
+#include "ng_misc_opt.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/charreach.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/flat_containers.h"
-#include "ue2common.h"
-
+#include "ue2common.h"
+
#include <boost/dynamic_bitset.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/filtered_graph.hpp>
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
using boost::make_filtered_graph;
-
-namespace ue2 {
-
-static
-void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
- vector<NFAVertex> *cand) {
- for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)
- || !hasSelfLoop(v, g)
- || g[v].char_reach.all()) {
- continue;
- }
-
- // For `v' to be a candidate, its predecessors must all have the same
- // successor set as `v'.
-
+
+namespace ue2 {
+
+static
+void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
+ vector<NFAVertex> *cand) {
+ for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)
+ || !hasSelfLoop(v, g)
+ || g[v].char_reach.all()) {
+ continue;
+ }
+
+ // For `v' to be a candidate, its predecessors must all have the same
+ // successor set as `v'.
+
auto succ_v = succs(v, g);
flat_set<NFAVertex> succ_u;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- succ_u.clear();
- succ(g, u, &succ_u);
- if (succ_v != succ_u) {
- goto next_cand;
- }
- }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ succ_u.clear();
+ succ(g, u, &succ_u);
+ if (succ_v != succ_u) {
+ goto next_cand;
+ }
+ }
DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index);
- cand->push_back(v);
- next_cand:;
- }
-}
-
-static
-void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
- vector<NFAVertex> *cand) {
- for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)
- || !hasSelfLoop(v, g)
- || g[v].char_reach.all()) {
- continue;
- }
-
- // For `v' to be a candidate, its predecessors must all have the same
- // successor set as `v'.
-
+ cand->push_back(v);
+ next_cand:;
+ }
+}
+
+static
+void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
+ vector<NFAVertex> *cand) {
+ for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)
+ || !hasSelfLoop(v, g)
+ || g[v].char_reach.all()) {
+ continue;
+ }
+
+ // For `v' to be a candidate, its predecessors must all have the same
+ // successor set as `v'.
+
auto pred_v = preds(v, g);
flat_set<NFAVertex> pred_u;
-
- for (auto u : adjacent_vertices_range(v, g)) {
- pred_u.clear();
- pred(g, u, &pred_u);
- if (pred_v != pred_u) {
- goto next_cand;
- }
- }
+
+ for (auto u : adjacent_vertices_range(v, g)) {
+ pred_u.clear();
+ pred(g, u, &pred_u);
+ if (pred_v != pred_u) {
+ goto next_cand;
+ }
+ }
DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index);
- cand->push_back(v);
- next_cand:;
- }
-}
-
-/** Find the intersection of the reachability of the predecessors of \p v. */
-static
-void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
- add.setall();
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u != v) {
- add &= g[u].char_reach;
- }
- }
-}
-
-/** Find the intersection of the reachability of the successors of \p v. */
-static
-void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
- add.setall();
- for (auto u : adjacent_vertices_range(v, g)) {
- if (u != v) {
- add &= g[u].char_reach;
- }
- }
-}
-
-/** The sustain set is used to show that once vertex p is on it stays on given
- * the alphabet new_cr. Every vertex pp in the sustain set has the following
- * properties:
- * -# an edge to p
- * -# enough edges to vertices in the sustain set to ensure that a vertex in
- * the sustain set will be on after consuming a character. */
-static
-set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
- bool ignore_starts, const CharReach &new_cr) {
+ cand->push_back(v);
+ next_cand:;
+ }
+}
+
+/** Find the intersection of the reachability of the predecessors of \p v. */
+static
+void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
+ add.setall();
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u != v) {
+ add &= g[u].char_reach;
+ }
+ }
+}
+
+/** Find the intersection of the reachability of the successors of \p v. */
+static
+void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
+ add.setall();
+ for (auto u : adjacent_vertices_range(v, g)) {
+ if (u != v) {
+ add &= g[u].char_reach;
+ }
+ }
+}
+
+/** The sustain set is used to show that once vertex p is on it stays on given
+ * the alphabet new_cr. Every vertex pp in the sustain set has the following
+ * properties:
+ * -# an edge to p
+ * -# enough edges to vertices in the sustain set to ensure that a vertex in
+ * the sustain set will be on after consuming a character. */
+static
+set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
+ bool ignore_starts, const CharReach &new_cr) {
auto cand = preds<set<NFAVertex>>(p, g);
- if (ignore_starts) {
- cand.erase(g.startDs);
- }
- /* remove elements from cand until the sustain set property holds */
- bool changed;
- do {
- DEBUG_PRINTF("|cand| %zu\n", cand.size());
- changed = false;
- set<NFAVertex>::const_iterator it = cand.begin();
- while (it != cand.end()) {
- NFAVertex u = *it;
- ++it;
- CharReach sus_cr;
- for (auto v : adjacent_vertices_range(u, g)) {
- if (contains(cand, v)) {
- sus_cr |= g[v].char_reach;
- }
- }
-
- if (!new_cr.isSubsetOf(sus_cr)) {
- cand.erase(u);
- changed = true;
- }
- }
- } while (changed);
-
- /* Note: it may be possible to find a (larger) sustain set for a smaller
- * new_cr */
- return cand;
-}
-
-/** Finds the reverse version of the sustain set.. whatever that means. */
-static
-set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
- const CharReach &new_cr) {
+ if (ignore_starts) {
+ cand.erase(g.startDs);
+ }
+ /* remove elements from cand until the sustain set property holds */
+ bool changed;
+ do {
+ DEBUG_PRINTF("|cand| %zu\n", cand.size());
+ changed = false;
+ set<NFAVertex>::const_iterator it = cand.begin();
+ while (it != cand.end()) {
+ NFAVertex u = *it;
+ ++it;
+ CharReach sus_cr;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (contains(cand, v)) {
+ sus_cr |= g[v].char_reach;
+ }
+ }
+
+ if (!new_cr.isSubsetOf(sus_cr)) {
+ cand.erase(u);
+ changed = true;
+ }
+ }
+ } while (changed);
+
+ /* Note: it may be possible to find a (larger) sustain set for a smaller
+ * new_cr */
+ return cand;
+}
+
+/** Finds the reverse version of the sustain set.. whatever that means. */
+static
+set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
+ const CharReach &new_cr) {
auto cand = succs<set<NFAVertex>>(p, g);
- /* remove elements from cand until the sustain set property holds */
- bool changed;
- do {
- changed = false;
- set<NFAVertex>::const_iterator it = cand.begin();
- while (it != cand.end()) {
- NFAVertex u = *it;
- ++it;
- CharReach sus_cr;
- for (auto v : inv_adjacent_vertices_range(u, g)) {
- if (contains(cand, v)) {
- sus_cr |= g[v].char_reach;
- }
- }
-
- if (!new_cr.isSubsetOf(sus_cr)) {
- cand.erase(u);
- changed = true;
- }
- }
- } while (changed);
-
- /* Note: it may be possible to find a (larger) sustain set for a smaller
- * new_cr */
- return cand;
-}
-
-static
-bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) {
+ /* remove elements from cand until the sustain set property holds */
+ bool changed;
+ do {
+ changed = false;
+ set<NFAVertex>::const_iterator it = cand.begin();
+ while (it != cand.end()) {
+ NFAVertex u = *it;
+ ++it;
+ CharReach sus_cr;
+ for (auto v : inv_adjacent_vertices_range(u, g)) {
+ if (contains(cand, v)) {
+ sus_cr |= g[v].char_reach;
+ }
+ }
+
+ if (!new_cr.isSubsetOf(sus_cr)) {
+ cand.erase(u);
+ changed = true;
+ }
+ }
+ } while (changed);
+
+ /* Note: it may be possible to find a (larger) sustain set for a smaller
+ * new_cr */
+ return cand;
+}
+
+static
+bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) {
DEBUG_PRINTF("considering vertex %zu\n", g[v].index);
- const CharReach &v_cr = g[v].char_reach;
-
- CharReach add;
- predCRIntersection(g, v, add);
-
- add |= v_cr;
-
- if (add == v_cr) {
- DEBUG_PRINTF("no benefit\n");
- return false;
- }
-
- DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
-
- for (auto p : inv_adjacent_vertices_range(v, g)) {
- if (p == v) {
- continue;
- }
+ const CharReach &v_cr = g[v].char_reach;
+
+ CharReach add;
+ predCRIntersection(g, v, add);
+
+ add |= v_cr;
+
+ if (add == v_cr) {
+ DEBUG_PRINTF("no benefit\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
+
+ for (auto p : inv_adjacent_vertices_range(v, g)) {
+ if (p == v) {
+ continue;
+ }
DEBUG_PRINTF("looking at pred %zu\n", g[p].index);
-
- bool ignore_sds = som; /* if we are tracking som, entries into a state
- from sds are significant. */
-
- set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add);
- DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
- if (sustain.empty()) {
- DEBUG_PRINTF("yawn\n");
- }
-
- for (auto pp : inv_adjacent_vertices_range(p, g)) {
- /* we need to ensure that whenever pp sets p, that a member of the
- sustain set is set. Note: p's cr may be not be a subset of
- new_cr */
- CharReach sustain_cr;
- for (auto pv : adjacent_vertices_range(pp, g)) {
- if (contains(sustain, pv)) {
- sustain_cr |= g[pv].char_reach;
- }
- }
- if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
- DEBUG_PRINTF("unable to establish that preds are forced on\n");
- return false;
- }
- }
- }
-
- /* the cr can be increased */
- g[v].char_reach = add;
+
+ bool ignore_sds = som; /* if we are tracking som, entries into a state
+ from sds are significant. */
+
+ set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add);
+ DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
+ if (sustain.empty()) {
+ DEBUG_PRINTF("yawn\n");
+ }
+
+ for (auto pp : inv_adjacent_vertices_range(p, g)) {
+ /* we need to ensure that whenever pp sets p, that a member of the
+ sustain set is set. Note: p's cr may be not be a subset of
+ new_cr */
+ CharReach sustain_cr;
+ for (auto pv : adjacent_vertices_range(pp, g)) {
+ if (contains(sustain, pv)) {
+ sustain_cr |= g[pv].char_reach;
+ }
+ }
+ if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
+ DEBUG_PRINTF("unable to establish that preds are forced on\n");
+ return false;
+ }
+ }
+ }
+
+ /* the cr can be increased */
+ g[v].char_reach = add;
DEBUG_PRINTF("vertex %zu was widened\n", g[v].index);
- return true;
-}
-
-static
-bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) {
+ return true;
+}
+
+static
+bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) {
DEBUG_PRINTF("considering vertex %zu\n", g[v].index);
- const CharReach &v_cr = g[v].char_reach;
-
- CharReach add;
- succCRIntersection(g, v, add);
-
- add |= v_cr;
-
- if (add == v_cr) {
- DEBUG_PRINTF("no benefit\n");
- return false;
- }
-
- DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
-
- for (auto p : adjacent_vertices_range(v, g)) {
- if (p == v) {
- continue;
- }
+ const CharReach &v_cr = g[v].char_reach;
+
+ CharReach add;
+ succCRIntersection(g, v, add);
+
+ add |= v_cr;
+
+ if (add == v_cr) {
+ DEBUG_PRINTF("no benefit\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
+
+ for (auto p : adjacent_vertices_range(v, g)) {
+ if (p == v) {
+ continue;
+ }
DEBUG_PRINTF("looking at succ %zu\n", g[p].index);
-
- set<NFAVertex> sustain = findSustainSet_rev(g, p, add);
- DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
- if (sustain.empty()) {
- DEBUG_PRINTF("yawn\n");
- }
-
- for (auto pp : adjacent_vertices_range(p, g)) {
- /* we need to ensure something - see fwd ver */
- CharReach sustain_cr;
- for (auto pv : inv_adjacent_vertices_range(pp, g)) {
- if (contains(sustain, pv)) {
- sustain_cr |= g[pv].char_reach;
- }
- }
- if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
- DEBUG_PRINTF("unable to establish that succs are thingy\n");
- return false;
- }
- }
- }
-
- /* the cr can be increased */
- g[v].char_reach = add;
+
+ set<NFAVertex> sustain = findSustainSet_rev(g, p, add);
+ DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
+ if (sustain.empty()) {
+ DEBUG_PRINTF("yawn\n");
+ }
+
+ for (auto pp : adjacent_vertices_range(p, g)) {
+ /* we need to ensure something - see fwd ver */
+ CharReach sustain_cr;
+ for (auto pv : inv_adjacent_vertices_range(pp, g)) {
+ if (contains(sustain, pv)) {
+ sustain_cr |= g[pv].char_reach;
+ }
+ }
+ if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
+ DEBUG_PRINTF("unable to establish that succs are thingy\n");
+ return false;
+ }
+ }
+ }
+
+ /* the cr can be increased */
+ g[v].char_reach = add;
DEBUG_PRINTF("vertex %zu was widened\n", g[v].index);
- return true;
-}
-
-static
-bool enlargeCyclicCR(NGHolder &g, som_type som,
- const vector<NFAVertex> &ordering) {
- DEBUG_PRINTF("hello\n");
-
- vector<NFAVertex> candidates;
- findCandidates(g, ordering, &candidates);
-
- bool rv = false;
- for (auto v : candidates) {
- rv |= enlargeCyclicVertex(g, som, v);
- }
-
- return rv;
-}
-
-static
-bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) {
- DEBUG_PRINTF("olleh\n");
-
- vector<NFAVertex> candidates;
- findCandidates_rev(g, ordering, &candidates);
-
- bool rv = false;
- for (auto v : candidates) {
- rv |= enlargeCyclicVertex_rev(g, v);
- }
-
- return rv;
-}
-
-bool improveGraph(NGHolder &g, som_type som) {
- /* use a topo ordering so that we can get chains of cyclic states
- * done in one sweep */
-
- const vector<NFAVertex> ordering = getTopoOrdering(g);
-
- return enlargeCyclicCR(g, som, ordering)
- | enlargeCyclicCR_rev(g, ordering);
-}
-
-/** finds a smaller reachability for a state by the reverse transformation of
- * enlargeCyclicCR. */
-CharReach reduced_cr(NFAVertex v, const NGHolder &g,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
+ return true;
+}
+
+static
+bool enlargeCyclicCR(NGHolder &g, som_type som,
+ const vector<NFAVertex> &ordering) {
+ DEBUG_PRINTF("hello\n");
+
+ vector<NFAVertex> candidates;
+ findCandidates(g, ordering, &candidates);
+
+ bool rv = false;
+ for (auto v : candidates) {
+ rv |= enlargeCyclicVertex(g, som, v);
+ }
+
+ return rv;
+}
+
+static
+bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) {
+ DEBUG_PRINTF("olleh\n");
+
+ vector<NFAVertex> candidates;
+ findCandidates_rev(g, ordering, &candidates);
+
+ bool rv = false;
+ for (auto v : candidates) {
+ rv |= enlargeCyclicVertex_rev(g, v);
+ }
+
+ return rv;
+}
+
+bool improveGraph(NGHolder &g, som_type som) {
+ /* use a topo ordering so that we can get chains of cyclic states
+ * done in one sweep */
+
+ const vector<NFAVertex> ordering = getTopoOrdering(g);
+
+ return enlargeCyclicCR(g, som, ordering)
+ | enlargeCyclicCR_rev(g, ordering);
+}
+
+/** finds a smaller reachability for a state by the reverse transformation of
+ * enlargeCyclicCR. */
+CharReach reduced_cr(NFAVertex v, const NGHolder &g,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index);
- CharReach v_cr = g[v].char_reach;
- if (proper_in_degree(v, g) != 1) {
- return v_cr;
- }
-
- NFAVertex pred = getSoleSourceVertex(g, v);
- assert(pred);
-
- /* require pred to be fed by one vertex OR (start + startDS) */
- NFAVertex predpred;
- size_t idp = in_degree(pred, g);
- if (hasSelfLoop(pred, g)) {
- return v_cr; /* not cliche */
- } else if (idp == 1) {
- predpred = getSoleSourceVertex(g, pred);
- } else if (idp == 2
- && edge(g.start, pred, g).second
- && edge(g.startDs, pred, g).second) {
- predpred = g.startDs;
- } else {
- return v_cr; /* not cliche */
- }
-
- assert(predpred);
-
- /* require predpred to be cyclic and its cr to be a superset of
- pred and v */
- if (!hasSelfLoop(predpred, g)) {
- return v_cr; /* not cliche */
- }
-
- if (contains(br_cyclic, predpred)
- && !br_cyclic.at(predpred).unbounded()) {
- return v_cr; /* fake cyclic */
- }
-
- const CharReach &p_cr = g[pred].char_reach;
- const CharReach &pp_cr = g[predpred].char_reach;
- if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
- return v_cr; /* not cliche */
- }
-
- DEBUG_PRINTF("confirming [x]* prop\n");
- /* we require all of v succs to be succ of p */
- set<NFAVertex> v_succ;
- insert(&v_succ, adjacent_vertices(v, g));
- set<NFAVertex> p_succ;
- insert(&p_succ, adjacent_vertices(pred, g));
-
- if (!is_subset_of(v_succ, p_succ)) {
- DEBUG_PRINTF("fail\n");
- return v_cr; /* not cliche */
- }
-
- if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
- /* need to check that reports of v are a subset of p's */
- if (!is_subset_of(g[v].reports,
- g[pred].reports)) {
- DEBUG_PRINTF("fail - reports not subset\n");
- return v_cr; /* not cliche */
- }
- }
-
- DEBUG_PRINTF("woot success\n");
- v_cr &= ~p_cr;
- return v_cr;
-}
-
-vector<CharReach> reduced_cr(const NGHolder &g,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
- assert(hasCorrectlyNumberedVertices(g));
- vector<CharReach> refined_cr(num_vertices(g), CharReach());
-
- for (auto v : vertices_range(g)) {
- u32 v_idx = g[v].index;
- refined_cr[v_idx] = reduced_cr(v, g, br_cyclic);
- }
-
- return refined_cr;
-}
-
-static
-bool anyOutSpecial(NFAVertex v, const NGHolder &g) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_special(w, g) && w != v) {
- return true;
- }
- }
- return false;
-}
-
-bool mergeCyclicDotStars(NGHolder &g) {
- set<NFAVertex> verticesToRemove;
- set<NFAEdge> edgesToRemove;
-
- // avoid graphs where startDs is not a free spirit
- if (out_degree(g.startDs, g) > 1) {
- return false;
- }
-
- // check if any of the connected vertices are dots
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- const CharReach &cr = g[v].char_reach;
-
- // if this is a cyclic dot
- if (cr.all() && edge(v, v, g).second) {
- // prevent insane graphs
- if (anyOutSpecial(v, g)) {
- continue;
- }
- // we don't know if we're going to remove this vertex yet
- vector<NFAEdge> deadEdges;
-
- // check if all adjacent vertices have edges from start
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- // skip self
- if (t == v) {
- continue;
- }
- // skip vertices that don't have edges from start
- if (!edge(g.start, t, g).second) {
- continue;
- }
- // add an edge from startDs to this vertex
- add_edge_if_not_present(g.startDs, t, g);
-
- // mark this edge for removal
- deadEdges.push_back(e);
- }
- // if the number of edges to be removed equals out degree, vertex
- // needs to be removed; else, only remove the edges
- if (deadEdges.size() == proper_out_degree(v, g)) {
- verticesToRemove.insert(v);
- } else {
- edgesToRemove.insert(deadEdges.begin(), deadEdges.end());
- }
- }
- }
-
- if (verticesToRemove.empty() && edgesToRemove.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(),
- verticesToRemove.size());
- remove_edges(edgesToRemove, g);
- remove_vertices(verticesToRemove, g);
- /* some predecessors to the cyclic vertices may no longer be useful (no out
- * edges), so we can remove them */
- pruneUseless(g);
- return true;
-}
-
+ CharReach v_cr = g[v].char_reach;
+ if (proper_in_degree(v, g) != 1) {
+ return v_cr;
+ }
+
+ NFAVertex pred = getSoleSourceVertex(g, v);
+ assert(pred);
+
+ /* require pred to be fed by one vertex OR (start + startDS) */
+ NFAVertex predpred;
+ size_t idp = in_degree(pred, g);
+ if (hasSelfLoop(pred, g)) {
+ return v_cr; /* not cliche */
+ } else if (idp == 1) {
+ predpred = getSoleSourceVertex(g, pred);
+ } else if (idp == 2
+ && edge(g.start, pred, g).second
+ && edge(g.startDs, pred, g).second) {
+ predpred = g.startDs;
+ } else {
+ return v_cr; /* not cliche */
+ }
+
+ assert(predpred);
+
+ /* require predpred to be cyclic and its cr to be a superset of
+ pred and v */
+ if (!hasSelfLoop(predpred, g)) {
+ return v_cr; /* not cliche */
+ }
+
+ if (contains(br_cyclic, predpred)
+ && !br_cyclic.at(predpred).unbounded()) {
+ return v_cr; /* fake cyclic */
+ }
+
+ const CharReach &p_cr = g[pred].char_reach;
+ const CharReach &pp_cr = g[predpred].char_reach;
+ if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
+ return v_cr; /* not cliche */
+ }
+
+ DEBUG_PRINTF("confirming [x]* prop\n");
+ /* we require all of v succs to be succ of p */
+ set<NFAVertex> v_succ;
+ insert(&v_succ, adjacent_vertices(v, g));
+ set<NFAVertex> p_succ;
+ insert(&p_succ, adjacent_vertices(pred, g));
+
+ if (!is_subset_of(v_succ, p_succ)) {
+ DEBUG_PRINTF("fail\n");
+ return v_cr; /* not cliche */
+ }
+
+ if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
+ /* need to check that reports of v are a subset of p's */
+ if (!is_subset_of(g[v].reports,
+ g[pred].reports)) {
+ DEBUG_PRINTF("fail - reports not subset\n");
+ return v_cr; /* not cliche */
+ }
+ }
+
+ DEBUG_PRINTF("woot success\n");
+ v_cr &= ~p_cr;
+ return v_cr;
+}
+
+vector<CharReach> reduced_cr(const NGHolder &g,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
+ assert(hasCorrectlyNumberedVertices(g));
+ vector<CharReach> refined_cr(num_vertices(g), CharReach());
+
+ for (auto v : vertices_range(g)) {
+ u32 v_idx = g[v].index;
+ refined_cr[v_idx] = reduced_cr(v, g, br_cyclic);
+ }
+
+ return refined_cr;
+}
+
+static
+bool anyOutSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_special(w, g) && w != v) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool mergeCyclicDotStars(NGHolder &g) {
+ set<NFAVertex> verticesToRemove;
+ set<NFAEdge> edgesToRemove;
+
+ // avoid graphs where startDs is not a free spirit
+ if (out_degree(g.startDs, g) > 1) {
+ return false;
+ }
+
+ // check if any of the connected vertices are dots
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ const CharReach &cr = g[v].char_reach;
+
+ // if this is a cyclic dot
+ if (cr.all() && edge(v, v, g).second) {
+ // prevent insane graphs
+ if (anyOutSpecial(v, g)) {
+ continue;
+ }
+ // we don't know if we're going to remove this vertex yet
+ vector<NFAEdge> deadEdges;
+
+ // check if all adjacent vertices have edges from start
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ // skip self
+ if (t == v) {
+ continue;
+ }
+ // skip vertices that don't have edges from start
+ if (!edge(g.start, t, g).second) {
+ continue;
+ }
+ // add an edge from startDs to this vertex
+ add_edge_if_not_present(g.startDs, t, g);
+
+ // mark this edge for removal
+ deadEdges.push_back(e);
+ }
+ // if the number of edges to be removed equals out degree, vertex
+ // needs to be removed; else, only remove the edges
+ if (deadEdges.size() == proper_out_degree(v, g)) {
+ verticesToRemove.insert(v);
+ } else {
+ edgesToRemove.insert(deadEdges.begin(), deadEdges.end());
+ }
+ }
+ }
+
+ if (verticesToRemove.empty() && edgesToRemove.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(),
+ verticesToRemove.size());
+ remove_edges(edgesToRemove, g);
+ remove_vertices(verticesToRemove, g);
+ /* some predecessors to the cyclic vertices may no longer be useful (no out
+ * edges), so we can remove them */
+ pruneUseless(g);
+ return true;
+}
+
struct PrunePathsInfo {
explicit PrunePathsInfo(const NGHolder &g)
: color_map(make_small_color_map(g)), bad(num_vertices(g)) {}
@@ -725,4 +725,4 @@ bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) {
return changed;
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
index 5ed089dc05..70bc7741cb 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
@@ -1,77 +1,77 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous optimisations.
- */
-
-#ifndef NG_MISC_OPT_H
-#define NG_MISC_OPT_H
-
-#include <map>
-#include <vector>
-
-#include "ng_holder.h"
-#include "som/som.h"
-#include "util/depth.h"
-
-namespace ue2 {
-
-/** Small structure describing the bounds on a repeat. */
-struct BoundedRepeatSummary {
- BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {}
- BoundedRepeatSummary(const depth &min_in, const depth &max_in)
- : repeatMin(min_in), repeatMax(max_in) {
- assert(repeatMin <= repeatMax);
- assert(repeatMax.is_reachable());
- }
- bool unbounded(void) const { return repeatMax.is_infinite(); }
-
- depth repeatMin; //!< minimum repeat bound.
- depth repeatMax; //!< maximum repeat bound.
-};
-
-/* returns true if anything changed */
-bool improveGraph(NGHolder &g, som_type som);
-
-/** Sometimes the reach of a vertex is greater than it needs to be to reduce
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous optimisations.
+ */
+
+#ifndef NG_MISC_OPT_H
+#define NG_MISC_OPT_H
+
+#include <map>
+#include <vector>
+
+#include "ng_holder.h"
+#include "som/som.h"
+#include "util/depth.h"
+
+namespace ue2 {
+
+/** Small structure describing the bounds on a repeat. */
+struct BoundedRepeatSummary {
+ BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {}
+ BoundedRepeatSummary(const depth &min_in, const depth &max_in)
+ : repeatMin(min_in), repeatMax(max_in) {
+ assert(repeatMin <= repeatMax);
+ assert(repeatMax.is_reachable());
+ }
+ bool unbounded(void) const { return repeatMax.is_infinite(); }
+
+ depth repeatMin; //!< minimum repeat bound.
+ depth repeatMax; //!< maximum repeat bound.
+};
+
+/* returns true if anything changed */
+bool improveGraph(NGHolder &g, som_type som);
+
+/** Sometimes the reach of a vertex is greater than it needs to be to reduce
* stop chars for the benefit of the rest of our code base (accel, etc). In
* these circumstances, we can treat the reach as the smaller one as
- * the graphs are equivalent. */
-CharReach reduced_cr(NFAVertex v, const NGHolder &g,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
-
-std::vector<CharReach> reduced_cr(const NGHolder &g,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
-
-/** Remove cyclic stars connected to start */
-bool mergeCyclicDotStars(NGHolder &g);
-
+ * the graphs are equivalent. */
+CharReach reduced_cr(NFAVertex v, const NGHolder &g,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
+
+std::vector<CharReach> reduced_cr(const NGHolder &g,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
+
+/** Remove cyclic stars connected to start */
+bool mergeCyclicDotStars(NGHolder &g);
+
/**
* Given a cyclic state 'c' with a broad reach and a later state 'v' that is
* only reachable if c is still on, then any edges to a successor of a direct
@@ -79,6 +79,6 @@ bool mergeCyclicDotStars(NGHolder &g);
*/
bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
index 780a319f5d..b81b397bd2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
@@ -1,220 +1,220 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Network flow (min flow, max cut) algorithms.
- */
-#include "ng_netflow.h"
-
-#include "ng_holder.h"
-#include "ng_literal_analysis.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Network flow (min flow, max cut) algorithms.
+ */
+#include "ng_netflow.h"
+
+#include "ng_holder.h"
+#include "ng_literal_analysis.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <algorithm>
-#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
-
-using namespace std;
-using boost::default_color_type;
-
-namespace ue2 {
-
-static
-void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge,
- NFAEdge fwd, NFAEdge rev) {
- u32 fwdIndex = g[fwd].index;
- u32 revIndex = g[rev].index;
-
- // Make sure our vector is big enough.
- size_t sz = max(fwdIndex, revIndex) + 1;
- if (reverseEdge.size() < sz) {
- reverseEdge.resize(sz);
- }
-
- // Add entries to list.
- reverseEdge[fwdIndex] = rev;
- reverseEdge[revIndex] = fwd;
-}
-
-/** Add temporary reverse edges to the graph \p g, as they are required by the
- * BGL's boykov_kolmogorov_max_flow algorithm. */
-static
-void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
- vector<u64a> &capacityMap) {
- // We're probably going to need space for 2x edge count.
- const size_t numEdges = num_edges(g);
- reverseEdge.reserve(numEdges * 2);
- capacityMap.reserve(numEdges * 2);
-
- // To avoid walking the graph for _ages_, we build a temporary map of all
- // edges indexed by vertex pair for existence checks.
- map<pair<size_t, size_t>, NFAEdge> allEdges;
- for (const auto &e : edges_range(g)) {
- NFAVertex u = source(e, g), v = target(e, g);
- size_t uidx = g[u].index, vidx = g[v].index;
- allEdges[make_pair(uidx, vidx)] = e;
- }
-
- // Now we walk over all edges and add their reverse edges to the reverseEdge
- // vector, also adding them to the graph when they don't already exist.
- for (const auto &m : allEdges) {
- const NFAEdge &fwd = m.second;
- const size_t uidx = m.first.first, vidx = m.first.second;
-
- auto it = allEdges.find(make_pair(vidx, uidx));
- if (it == allEdges.end()) {
- // No reverse edge, add one.
- NFAVertex u = source(fwd, g), v = target(fwd, g);
+
+#include <algorithm>
+#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
+
+using namespace std;
+using boost::default_color_type;
+
+namespace ue2 {
+
+static
+void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge,
+ NFAEdge fwd, NFAEdge rev) {
+ u32 fwdIndex = g[fwd].index;
+ u32 revIndex = g[rev].index;
+
+ // Make sure our vector is big enough.
+ size_t sz = max(fwdIndex, revIndex) + 1;
+ if (reverseEdge.size() < sz) {
+ reverseEdge.resize(sz);
+ }
+
+ // Add entries to list.
+ reverseEdge[fwdIndex] = rev;
+ reverseEdge[revIndex] = fwd;
+}
+
+/** Add temporary reverse edges to the graph \p g, as they are required by the
+ * BGL's boykov_kolmogorov_max_flow algorithm. */
+static
+void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
+ vector<u64a> &capacityMap) {
+ // We're probably going to need space for 2x edge count.
+ const size_t numEdges = num_edges(g);
+ reverseEdge.reserve(numEdges * 2);
+ capacityMap.reserve(numEdges * 2);
+
+ // To avoid walking the graph for _ages_, we build a temporary map of all
+ // edges indexed by vertex pair for existence checks.
+ map<pair<size_t, size_t>, NFAEdge> allEdges;
+ for (const auto &e : edges_range(g)) {
+ NFAVertex u = source(e, g), v = target(e, g);
+ size_t uidx = g[u].index, vidx = g[v].index;
+ allEdges[make_pair(uidx, vidx)] = e;
+ }
+
+ // Now we walk over all edges and add their reverse edges to the reverseEdge
+ // vector, also adding them to the graph when they don't already exist.
+ for (const auto &m : allEdges) {
+ const NFAEdge &fwd = m.second;
+ const size_t uidx = m.first.first, vidx = m.first.second;
+
+ auto it = allEdges.find(make_pair(vidx, uidx));
+ if (it == allEdges.end()) {
+ // No reverse edge, add one.
+ NFAVertex u = source(fwd, g), v = target(fwd, g);
NFAEdge rev = add_edge(v, u, g);
- it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
- // Add to capacity map.
- u32 revIndex = g[rev].index;
- if (capacityMap.size() < revIndex + 1) {
- capacityMap.resize(revIndex + 1);
- }
- capacityMap[revIndex] = 0;
- }
-
- addReverseEdge(g, reverseEdge, fwd, it->second);
- }
-}
-
-/** Remove all edges with indices >= \p idx. */
-static
-void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) {
- remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g);
- capacityMap.resize(idx);
+ it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
+ // Add to capacity map.
+ u32 revIndex = g[rev].index;
+ if (capacityMap.size() < revIndex + 1) {
+ capacityMap.resize(revIndex + 1);
+ }
+ capacityMap[revIndex] = 0;
+ }
+
+ addReverseEdge(g, reverseEdge, fwd, it->second);
+ }
+}
+
+/** Remove all edges with indices >= \p idx. */
+static
+void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) {
+ remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g);
+ capacityMap.resize(idx);
renumber_edges(g);
-}
-
-/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and
- * colour map (from which we can find the min cut). */
-static
-u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in,
+}
+
+/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and
+ * colour map (from which we can find the min cut). */
+static
+u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in,
decltype(make_small_color_map(NGHolder())) &colorMap) {
- vector<u64a> capacityMap = capacityMap_in;
- NFAVertex src = h.start;
- NFAVertex sink = h.acceptEod;
-
- // netflow relies on these stylised edges, as all starts should be covered
- // by our source and all accepts by our sink.
- assert(edge(h.start, h.startDs, h).second);
- assert(edge(h.accept, h.acceptEod, h).second);
-
- // The boykov_kolmogorov_max_flow algorithm requires us to have reverse
- // edges for all edges in the graph, so we create them here (and remove
- // them after the call).
- const unsigned int numRealEdges = num_edges(h);
- vector<NFAEdge> reverseEdges;
- addReverseEdges(h, reverseEdges, capacityMap);
-
- const unsigned int numTotalEdges = num_edges(h);
- const unsigned int numVertices = num_vertices(h);
-
- vector<u64a> edgeResiduals(numTotalEdges);
- vector<NFAEdge> predecessors(numVertices);
- vector<s32> distances(numVertices);
-
+ vector<u64a> capacityMap = capacityMap_in;
+ NFAVertex src = h.start;
+ NFAVertex sink = h.acceptEod;
+
+ // netflow relies on these stylised edges, as all starts should be covered
+ // by our source and all accepts by our sink.
+ assert(edge(h.start, h.startDs, h).second);
+ assert(edge(h.accept, h.acceptEod, h).second);
+
+ // The boykov_kolmogorov_max_flow algorithm requires us to have reverse
+ // edges for all edges in the graph, so we create them here (and remove
+ // them after the call).
+ const unsigned int numRealEdges = num_edges(h);
+ vector<NFAEdge> reverseEdges;
+ addReverseEdges(h, reverseEdges, capacityMap);
+
+ const unsigned int numTotalEdges = num_edges(h);
+ const unsigned int numVertices = num_vertices(h);
+
+ vector<u64a> edgeResiduals(numTotalEdges);
+ vector<NFAEdge> predecessors(numVertices);
+ vector<s32> distances(numVertices);
+
auto v_index_map = get(vertex_index, h);
auto e_index_map = get(edge_index, h);
-
+
u64a flow = boykov_kolmogorov_max_flow(h,
- make_iterator_property_map(capacityMap.begin(), e_index_map),
- make_iterator_property_map(edgeResiduals.begin(), e_index_map),
- make_iterator_property_map(reverseEdges.begin(), e_index_map),
- make_iterator_property_map(predecessors.begin(), v_index_map),
+ make_iterator_property_map(capacityMap.begin(), e_index_map),
+ make_iterator_property_map(edgeResiduals.begin(), e_index_map),
+ make_iterator_property_map(reverseEdges.begin(), e_index_map),
+ make_iterator_property_map(predecessors.begin(), v_index_map),
colorMap,
- make_iterator_property_map(distances.begin(), v_index_map),
- v_index_map,
- src, sink);
-
- // Remove reverse edges from graph.
- removeEdgesFromIndex(h, capacityMap, numRealEdges);
+ make_iterator_property_map(distances.begin(), v_index_map),
+ v_index_map,
+ src, sink);
+
+ // Remove reverse edges from graph.
+ removeEdgesFromIndex(h, capacityMap, numRealEdges);
assert(num_edges(h) == numRealEdges);
-
- DEBUG_PRINTF("flow = %llu\n", flow);
- return flow;
-}
-
-/** Returns a min cut (in \p cutset) for the graph in \p h. */
-vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) {
- assert(hasCorrectlyNumberedEdges(h));
- assert(hasCorrectlyNumberedVertices(h));
-
+
+ DEBUG_PRINTF("flow = %llu\n", flow);
+ return flow;
+}
+
+/** Returns a min cut (in \p cutset) for the graph in \p h. */
+vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) {
+ assert(hasCorrectlyNumberedEdges(h));
+ assert(hasCorrectlyNumberedVertices(h));
+
auto colors = make_small_color_map(h);
u64a flow = getMaxFlow(h, scores, colors);
-
- vector<NFAEdge> picked_white;
- vector<NFAEdge> picked_black;
- u64a observed_black_flow = 0;
- u64a observed_white_flow = 0;
-
- for (const auto &e : edges_range(h)) {
- NFAVertex from = source(e, h);
- NFAVertex to = target(e, h);
- u64a ec = scores[h[e].index];
- if (ec == 0) {
- continue; // skips, among other things, reverse edges
- }
-
+
+ vector<NFAEdge> picked_white;
+ vector<NFAEdge> picked_black;
+ u64a observed_black_flow = 0;
+ u64a observed_white_flow = 0;
+
+ for (const auto &e : edges_range(h)) {
+ NFAVertex from = source(e, h);
+ NFAVertex to = target(e, h);
+ u64a ec = scores[h[e].index];
+ if (ec == 0) {
+ continue; // skips, among other things, reverse edges
+ }
+
auto fromColor = get(colors, from);
auto toColor = get(colors, to);
-
+
if (fromColor != small_color::white && toColor == small_color::white) {
- assert(ec <= INVALID_EDGE_CAP);
+ assert(ec <= INVALID_EDGE_CAP);
DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n",
- h[from].index, h[to].index, ec);
- observed_white_flow += ec;
- picked_white.push_back(e);
- }
+ h[from].index, h[to].index, ec);
+ observed_white_flow += ec;
+ picked_white.push_back(e);
+ }
if (fromColor == small_color::black && toColor != small_color::black) {
- assert(ec <= INVALID_EDGE_CAP);
+ assert(ec <= INVALID_EDGE_CAP);
DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n",
- h[from].index, h[to].index, ec);
- observed_black_flow += ec;
- picked_black.push_back(e);
- }
- }
-
- DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow,
- observed_black_flow, observed_white_flow);
+ h[from].index, h[to].index, ec);
+ observed_black_flow += ec;
+ picked_black.push_back(e);
+ }
+ }
+
+ DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow,
+ observed_black_flow, observed_white_flow);
if (min(observed_white_flow, observed_black_flow) != flow) {
- DEBUG_PRINTF("bad cut\n");
- }
-
- if (observed_white_flow < observed_black_flow) {
- return picked_white;
- } else {
- return picked_black;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("bad cut\n");
+ }
+
+ if (observed_white_flow < observed_black_flow) {
+ return picked_white;
+ } else {
+ return picked_black;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
index d8e00b8e17..9e9b32e2b3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Network flow (min flow, max cut) algorithms.
- */
-#ifndef NG_NETFLOW_H
-#define NG_NETFLOW_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/** Returns a min cut (in \p cutset) for the graph in \p h. */
-std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Network flow (min flow, max cut) algorithms.
+ */
+#ifndef NG_NETFLOW_H
+#define NG_NETFLOW_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/** Returns a min cut (in \p cutset) for the graph in \p h. */
+std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
index 04611872a4..9ad642ad09 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
@@ -1,240 +1,240 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Prefilter Reductions.
- *
- * This file contains routines for reducing the size of an NFA graph that we
- * know will be used as a prefilter.
- *
- * The approach used is to consider the graph as a chain of region subgraphs,
- * and to reduce the size of the graph by replacing regions with constructs
- * that can be implemented in fewer states.
- *
- * Right now, the approach used is to replace a region with a bounded repeat of
- * vertices (with bounds derived from the min/max width of the region
- * subgraph). These vertices are given the union of the region's character
- * reachability.
- *
- * For regions with bounded max width, this strategy is quite dependent on the
- * LimEx NFA's bounded repeat functionality.
- */
-#include "ng_prefilter.h"
-
-#include "ng_holder.h"
-#include "ng_region.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-
-#include <queue>
+ * \brief Prefilter Reductions.
+ *
+ * This file contains routines for reducing the size of an NFA graph that we
+ * know will be used as a prefilter.
+ *
+ * The approach used is to consider the graph as a chain of region subgraphs,
+ * and to reduce the size of the graph by replacing regions with constructs
+ * that can be implemented in fewer states.
+ *
+ * Right now, the approach used is to replace a region with a bounded repeat of
+ * vertices (with bounds derived from the min/max width of the region
+ * subgraph). These vertices are given the union of the region's character
+ * reachability.
+ *
+ * For regions with bounded max width, this strategy is quite dependent on the
+ * LimEx NFA's bounded repeat functionality.
+ */
+#include "ng_prefilter.h"
+
+#include "ng_holder.h"
+#include "ng_region.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+
+#include <queue>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-/** Keep attempting to reduce the size of the graph until the number of
- * vertices falls below this value. */
-static const size_t MAX_COMPONENT_VERTICES = 128;
-
-/** Only replace a region with at least this many vertices. */
-static const size_t MIN_REPLACE_VERTICES = 2;
-
-/** Estimate of how many vertices are required to represent a bounded repeat in
- * the implementation NFA. */
-static const size_t BOUNDED_REPEAT_COUNT = 4;
-
-/** Scoring penalty for boundary regions. */
-static const size_t PENALTY_BOUNDARY = 32;
-
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+/** Keep attempting to reduce the size of the graph until the number of
+ * vertices falls below this value. */
+static const size_t MAX_COMPONENT_VERTICES = 128;
+
+/** Only replace a region with at least this many vertices. */
+static const size_t MIN_REPLACE_VERTICES = 2;
+
+/** Estimate of how many vertices are required to represent a bounded repeat in
+ * the implementation NFA. */
+static const size_t BOUNDED_REPEAT_COUNT = 4;
+
+/** Scoring penalty for boundary regions. */
+static const size_t PENALTY_BOUNDARY = 32;
+
/** Regions with max bounds greater than this value will have their max bound
* replaced with inf. */
static const size_t MAX_REPLACE_BOUND = 10000;
-namespace {
-
-/** Information describing a region. */
-struct RegionInfo {
- explicit RegionInfo(u32 id_in) : id(id_in) {}
- u32 id; //!< region id
- deque<NFAVertex> vertices; //!< vertices in the region
- CharReach reach; //!< union of region reach
+namespace {
+
+/** Information describing a region. */
+struct RegionInfo {
+ explicit RegionInfo(u32 id_in) : id(id_in) {}
+ u32 id; //!< region id
+ deque<NFAVertex> vertices; //!< vertices in the region
+ CharReach reach; //!< union of region reach
depth minWidth{0}; //!< min width of region subgraph
depth maxWidth{depth::infinity()}; //!< max width of region subgraph
- bool atBoundary = false; //!< region is next to an accept
-
- // Bigger score is better.
- size_t score() const {
+ bool atBoundary = false; //!< region is next to an accept
+
+ // Bigger score is better.
+ size_t score() const {
// TODO: charreach should be a signal?
- size_t numVertices = vertices.size();
- if (atBoundary) {
- return numVertices - min(PENALTY_BOUNDARY, numVertices);
- } else {
- return numVertices;
- }
- }
-};
-
-/** Comparator used to order regions for consideration in a priority queue. */
-struct RegionInfoQueueComp {
- bool operator()(const RegionInfo &r1, const RegionInfo &r2) const {
- size_t score1 = r1.score(), score2 = r2.score();
- if (score1 != score2) {
- return score1 < score2;
- }
- if (r1.reach.count() != r2.reach.count()) {
- return r1.reach.count() < r2.reach.count();
- }
- return r1.id < r2.id;
- }
-};
-
-} // namespace
-
-static
-void findWidths(const NGHolder &g,
+ size_t numVertices = vertices.size();
+ if (atBoundary) {
+ return numVertices - min(PENALTY_BOUNDARY, numVertices);
+ } else {
+ return numVertices;
+ }
+ }
+};
+
+/** Comparator used to order regions for consideration in a priority queue. */
+struct RegionInfoQueueComp {
+ bool operator()(const RegionInfo &r1, const RegionInfo &r2) const {
+ size_t score1 = r1.score(), score2 = r2.score();
+ if (score1 != score2) {
+ return score1 < score2;
+ }
+ if (r1.reach.count() != r2.reach.count()) {
+ return r1.reach.count() < r2.reach.count();
+ }
+ return r1.id < r2.id;
+ }
+};
+
+} // namespace
+
+static
+void findWidths(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- RegionInfo &ri) {
- NGHolder rg;
+ RegionInfo &ri) {
+ NGHolder rg;
unordered_map<NFAVertex, NFAVertex> mapping;
- fillHolder(&rg, g, ri.vertices, &mapping);
-
- // Wire our entries to start and our exits to accept.
- for (auto v : ri.vertices) {
- NFAVertex v_new = mapping[v];
+ fillHolder(&rg, g, ri.vertices, &mapping);
+
+ // Wire our entries to start and our exits to accept.
+ for (auto v : ri.vertices) {
+ NFAVertex v_new = mapping[v];
assert(v_new != NGHolder::null_vertex());
-
- if (isRegionEntry(g, v, region_map) &&
- !edge(rg.start, v_new, rg).second) {
- add_edge(rg.start, v_new, rg);
- }
- if (isRegionExit(g, v, region_map) &&
- !edge(v_new, rg.accept, rg).second) {
- add_edge(v_new, rg.accept, rg);
- }
- }
-
- ri.minWidth = findMinWidth(rg);
- ri.maxWidth = findMaxWidth(rg);
-}
-
-// acc can be either h.accept or h.acceptEod.
-static
-void markBoundaryRegions(const NGHolder &h,
+
+ if (isRegionEntry(g, v, region_map) &&
+ !edge(rg.start, v_new, rg).second) {
+ add_edge(rg.start, v_new, rg);
+ }
+ if (isRegionExit(g, v, region_map) &&
+ !edge(v_new, rg.accept, rg).second) {
+ add_edge(v_new, rg.accept, rg);
+ }
+ }
+
+ ri.minWidth = findMinWidth(rg);
+ ri.maxWidth = findMaxWidth(rg);
+}
+
+// acc can be either h.accept or h.acceptEod.
+static
+void markBoundaryRegions(const NGHolder &h,
const unordered_map<NFAVertex, u32> &region_map,
- map<u32, RegionInfo> &regions, NFAVertex acc) {
- for (auto v : inv_adjacent_vertices_range(acc, h)) {
- if (is_special(v, h)) {
- continue;
- }
- u32 id = region_map.at(v);
-
+ map<u32, RegionInfo> &regions, NFAVertex acc) {
+ for (auto v : inv_adjacent_vertices_range(acc, h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+ u32 id = region_map.at(v);
+
auto ri = regions.find(id);
- if (ri == regions.end()) {
- continue; // Not tracking this region as it's too small.
- }
-
- ri->second.atBoundary = true;
- }
-}
-
-static
-map<u32, RegionInfo> findRegionInfo(const NGHolder &h,
+ if (ri == regions.end()) {
+ continue; // Not tracking this region as it's too small.
+ }
+
+ ri->second.atBoundary = true;
+ }
+}
+
+static
+map<u32, RegionInfo> findRegionInfo(const NGHolder &h,
const unordered_map<NFAVertex, u32> &region_map) {
- map<u32, RegionInfo> regions;
- for (auto v : vertices_range(h)) {
- if (is_special(v, h)) {
- continue;
- }
- u32 id = region_map.at(v);
+ map<u32, RegionInfo> regions;
+ for (auto v : vertices_range(h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+ u32 id = region_map.at(v);
RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second;
- ri.vertices.push_back(v);
- ri.reach |= h[v].char_reach;
- }
-
- // There's no point tracking more information about regions that we won't
- // consider replacing, so we remove them from the region map.
+ ri.vertices.push_back(v);
+ ri.reach |= h[v].char_reach;
+ }
+
+ // There's no point tracking more information about regions that we won't
+ // consider replacing, so we remove them from the region map.
for (auto it = regions.begin(); it != regions.end();) {
- if (it->second.vertices.size() < MIN_REPLACE_VERTICES) {
- regions.erase(it++);
- } else {
- ++it;
- }
- }
-
- DEBUG_PRINTF("%zu regions\n", regions.size());
-
- markBoundaryRegions(h, region_map, regions, h.accept);
- markBoundaryRegions(h, region_map, regions, h.acceptEod);
-
- // Determine min/max widths.
- for (RegionInfo &ri : regions | map_values) {
- findWidths(h, region_map, ri);
- DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id,
- ri.atBoundary ? "(boundary) " : "",
- ri.minWidth.str().c_str(), ri.maxWidth.str().c_str());
- }
-
- return regions;
-}
-
-static
+ if (it->second.vertices.size() < MIN_REPLACE_VERTICES) {
+ regions.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+
+ DEBUG_PRINTF("%zu regions\n", regions.size());
+
+ markBoundaryRegions(h, region_map, regions, h.accept);
+ markBoundaryRegions(h, region_map, regions, h.acceptEod);
+
+ // Determine min/max widths.
+ for (RegionInfo &ri : regions | map_values) {
+ findWidths(h, region_map, ri);
+ DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id,
+ ri.atBoundary ? "(boundary) " : "",
+ ri.minWidth.str().c_str(), ri.maxWidth.str().c_str());
+ }
+
+ return regions;
+}
+
+static
void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) {
- for (const auto &e : in_edges_range(from, g)) {
- NFAVertex u = source(e, g);
+ for (const auto &e : in_edges_range(from, g)) {
+ NFAVertex u = source(e, g);
add_edge_if_not_present(u, to, g[e], g);
- }
-}
-
-static
+ }
+}
+
+static
void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) {
- for (const auto &e : out_edges_range(from, g)) {
- NFAVertex t = target(e, g);
- add_edge_if_not_present(to, t, g[e], g);
-
- if (is_any_accept(t, g)) {
- const auto &reports = g[from].reports;
- g[to].reports.insert(reports.begin(), reports.end());
- }
- }
-}
-
-static
+ for (const auto &e : out_edges_range(from, g)) {
+ NFAVertex t = target(e, g);
+ add_edge_if_not_present(to, t, g[e], g);
+
+ if (is_any_accept(t, g)) {
+ const auto &reports = g[from].reports;
+ g[to].reports.insert(reports.begin(), reports.end());
+ }
+ }
+}
+
+static
void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) {
// Set of vertices in region, for quick lookups.
const unordered_set<NFAVertex> rverts(ri.vertices.begin(),
@@ -250,12 +250,12 @@ void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) {
}
static
-void replaceRegion(NGHolder &g, const RegionInfo &ri,
- size_t *verticesAdded, size_t *verticesRemoved) {
- // TODO: more complex replacements.
- assert(ri.vertices.size() >= MIN_REPLACE_VERTICES);
- assert(ri.minWidth.is_finite());
-
+void replaceRegion(NGHolder &g, const RegionInfo &ri,
+ size_t *verticesAdded, size_t *verticesRemoved) {
+ // TODO: more complex replacements.
+ assert(ri.vertices.size() >= MIN_REPLACE_VERTICES);
+ assert(ri.minWidth.is_finite());
+
depth minWidth = ri.minWidth;
depth maxWidth = ri.maxWidth;
@@ -265,129 +265,129 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri,
maxWidth = depth::infinity();
}
- size_t replacementSize;
+ size_t replacementSize;
if (minWidth == maxWidth || maxWidth.is_infinite()) {
replacementSize = minWidth; // {N} or {N,}
- } else {
+ } else {
replacementSize = maxWidth; // {N,M} case
- }
-
- DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(),
- replacementSize);
-
+ }
+
+ DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(),
+ replacementSize);
+
vector<NFAVertex> verts;
verts.reserve(replacementSize);
- for (size_t i = 0; i < replacementSize; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = ri.reach;
- if (i > 0) {
- add_edge(verts.back(), v, g);
- }
- verts.push_back(v);
- }
-
+ for (size_t i = 0; i < replacementSize; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = ri.reach;
+ if (i > 0) {
+ add_edge(verts.back(), v, g);
+ }
+ verts.push_back(v);
+ }
+
if (maxWidth.is_infinite()) {
- add_edge(verts.back(), verts.back(), g);
- }
-
+ add_edge(verts.back(), verts.back(), g);
+ }
+
removeInteriorEdges(g, ri);
-
- for (size_t i = 0; i < replacementSize; i++) {
- NFAVertex v_new = verts[i];
-
- for (auto v_old : ri.vertices) {
- if (i == 0) {
+
+ for (size_t i = 0; i < replacementSize; i++) {
+ NFAVertex v_new = verts[i];
+
+ for (auto v_old : ri.vertices) {
+ if (i == 0) {
copyInEdges(g, v_old, v_new);
- }
- if (i + 1 >= ri.minWidth) {
+ }
+ if (i + 1 >= ri.minWidth) {
copyOutEdges(g, v_old, v_new);
- }
- }
- }
-
- remove_vertices(ri.vertices, g, false);
-
- *verticesAdded = verts.size();
- *verticesRemoved = ri.vertices.size();
-}
-
-namespace {
-struct SourceHasEdgeToAccept {
- explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {}
- bool operator()(const NFAEdge &e) const {
- return edge(source(e, g), g.accept, g).second;
- }
- const NGHolder &g;
-};
-}
-
-static
-void reduceRegions(NGHolder &h) {
- map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h));
-
- RegionInfoQueueComp cmp;
- priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp);
-
- size_t numVertices = 0;
- for (const RegionInfo &ri : regions | map_values) {
- numVertices += ri.vertices.size();
- pq.push(ri);
- }
-
- while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) {
- const RegionInfo &ri = pq.top();
- DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, "
- "widths=[%s,%s]\n",
- ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(),
- ri.score(), ri.minWidth.str().c_str(),
- ri.maxWidth.str().c_str());
-
- size_t verticesAdded = 0;
- size_t verticesRemoved = 0;
- replaceRegion(h, ri, &verticesAdded, &verticesRemoved);
- DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n",
- verticesRemoved, verticesAdded);
-
- // We are trusting that implementation NFAs will be able to use the
- // LimEx bounded repeat code here.
- numVertices -= verticesRemoved;
- numVertices += BOUNDED_REPEAT_COUNT;
-
- DEBUG_PRINTF("numVertices is now %zu\n", numVertices);
- pq.pop();
- }
-
- // We may have vertices that have edges to both accept and acceptEod: in
- // this case, we can optimize for performance by removing the acceptEod
- // edges.
+ }
+ }
+ }
+
+ remove_vertices(ri.vertices, g, false);
+
+ *verticesAdded = verts.size();
+ *verticesRemoved = ri.vertices.size();
+}
+
+namespace {
+struct SourceHasEdgeToAccept {
+ explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {}
+ bool operator()(const NFAEdge &e) const {
+ return edge(source(e, g), g.accept, g).second;
+ }
+ const NGHolder &g;
+};
+}
+
+static
+void reduceRegions(NGHolder &h) {
+ map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h));
+
+ RegionInfoQueueComp cmp;
+ priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp);
+
+ size_t numVertices = 0;
+ for (const RegionInfo &ri : regions | map_values) {
+ numVertices += ri.vertices.size();
+ pq.push(ri);
+ }
+
+ while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) {
+ const RegionInfo &ri = pq.top();
+ DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, "
+ "widths=[%s,%s]\n",
+ ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(),
+ ri.score(), ri.minWidth.str().c_str(),
+ ri.maxWidth.str().c_str());
+
+ size_t verticesAdded = 0;
+ size_t verticesRemoved = 0;
+ replaceRegion(h, ri, &verticesAdded, &verticesRemoved);
+ DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n",
+ verticesRemoved, verticesAdded);
+
+ // We are trusting that implementation NFAs will be able to use the
+ // LimEx bounded repeat code here.
+ numVertices -= verticesRemoved;
+ numVertices += BOUNDED_REPEAT_COUNT;
+
+ DEBUG_PRINTF("numVertices is now %zu\n", numVertices);
+ pq.pop();
+ }
+
+ // We may have vertices that have edges to both accept and acceptEod: in
+ // this case, we can optimize for performance by removing the acceptEod
+ // edges.
remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h);
-}
-
-void prefilterReductions(NGHolder &h, const CompileContext &cc) {
- if (!cc.grey.prefilterReductions) {
- return;
- }
-
- if (num_vertices(h) <= MAX_COMPONENT_VERTICES) {
- DEBUG_PRINTF("graph is already small enough (%zu vertices)\n",
- num_vertices(h));
- return;
- }
-
+}
+
+void prefilterReductions(NGHolder &h, const CompileContext &cc) {
+ if (!cc.grey.prefilterReductions) {
+ return;
+ }
+
+ if (num_vertices(h) <= MAX_COMPONENT_VERTICES) {
+ DEBUG_PRINTF("graph is already small enough (%zu vertices)\n",
+ num_vertices(h));
+ return;
+ }
+
DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n",
num_vertices(h), num_edges(h));
-
+
renumber_vertices(h);
renumber_edges(h);
-
- reduceRegions(h);
-
+
+ reduceRegions(h);
+
renumber_vertices(h);
renumber_edges(h);
DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n",
num_vertices(h), num_edges(h));
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
index 88cbefd2de..e1f5c13f37 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Prefilter Reductions.
- */
-
-#ifndef NG_PREFILTER_H
-#define NG_PREFILTER_H
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-void prefilterReductions(NGHolder &h, const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Prefilter Reductions.
+ */
+
+#ifndef NG_PREFILTER_H
+#define NG_PREFILTER_H
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+void prefilterReductions(NGHolder &h, const CompileContext &cc);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
index adda70312f..997f652d0d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
@@ -1,434 +1,434 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for pruning unreachable vertices or reports from the graph.
- */
-#include "ng_prune.h"
-
-#include "ng_dominators.h"
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for pruning unreachable vertices or reports from the graph.
+ */
+#include "ng_prune.h"
+
+#include "ng_dominators.h"
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-#include "util/report_manager.h"
-
-#include <deque>
-#include <map>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::default_color_type;
-using boost::reverse_graph;
-
-namespace ue2 {
-
-/** Remove any vertices that can't be reached by traversing the graph in
- * reverse from acceptEod. */
-void pruneUnreachable(NGHolder &g) {
- deque<NFAVertex> dead;
-
+#include "util/report_manager.h"
+
+#include <deque>
+#include <map>
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::default_color_type;
+using boost::reverse_graph;
+
+namespace ue2 {
+
+/** Remove any vertices that can't be reached by traversing the graph in
+ * reverse from acceptEod. */
+void pruneUnreachable(NGHolder &g) {
+ deque<NFAVertex> dead;
+
if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g)
&& edge(g.accept, g.acceptEod, g).second) {
- // Trivial case: there are no in-edges to our accepts (other than
- // accept->acceptEod), so all non-specials are unreachable.
- for (auto v : vertices_range(g)) {
- if (!is_special(v, g)) {
- dead.push_back(v);
- }
- }
- } else {
- // Walk a reverse graph from acceptEod with Boost's depth_first_visit
- // call.
+ // Trivial case: there are no in-edges to our accepts (other than
+ // accept->acceptEod), so all non-specials are unreachable.
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g)) {
+ dead.push_back(v);
+ }
+ }
+ } else {
+ // Walk a reverse graph from acceptEod with Boost's depth_first_visit
+ // call.
typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph;
RevNFAGraph revg(g);
-
+
map<RevNFAGraph::vertex_descriptor, default_color_type> colours;
-
- depth_first_visit(revg, g.acceptEod,
- make_dfs_visitor(boost::null_visitor()),
- make_assoc_property_map(colours));
-
- DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
-
- // All non-special vertices that aren't in the colour map (because they
- // weren't reached) can be removed.
- for (auto v : vertices_range(revg)) {
- if (is_special(v, revg)) {
- continue;
- }
- if (!contains(colours, v)) {
- dead.push_back(v);
- }
- }
- }
-
- if (dead.empty()) {
- DEBUG_PRINTF("no unreachable vertices\n");
- return;
- }
-
- remove_vertices(dead, g, false);
- DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
-}
-
-template<class nfag_t>
-static
+
+ depth_first_visit(revg, g.acceptEod,
+ make_dfs_visitor(boost::null_visitor()),
+ make_assoc_property_map(colours));
+
+ DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
+
+ // All non-special vertices that aren't in the colour map (because they
+ // weren't reached) can be removed.
+ for (auto v : vertices_range(revg)) {
+ if (is_special(v, revg)) {
+ continue;
+ }
+ if (!contains(colours, v)) {
+ dead.push_back(v);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ DEBUG_PRINTF("no unreachable vertices\n");
+ return;
+ }
+
+ remove_vertices(dead, g, false);
+ DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
+}
+
+template<class nfag_t>
+static
bool pruneForwardUseless(NGHolder &h, const nfag_t &g,
typename nfag_t::vertex_descriptor s,
decltype(make_small_color_map(NGHolder())) &colors) {
- // Begin with all vertices set to white, as DFV only marks visited
- // vertices.
+ // Begin with all vertices set to white, as DFV only marks visited
+ // vertices.
colors.fill(small_color::white);
-
+
depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors);
-
- vector<NFAVertex> dead;
-
- // All non-special vertices that are still white can be removed.
- for (auto v : vertices_range(g)) {
+
+ vector<NFAVertex> dead;
+
+ // All non-special vertices that are still white can be removed.
+ for (auto v : vertices_range(g)) {
if (!is_special(v, g) && get(colors, v) == small_color::white) {
DEBUG_PRINTF("vertex %zu is unreachable from %zu\n",
- g[v].index, g[s].index);
+ g[v].index, g[s].index);
dead.push_back(NFAVertex(v));
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu vertices\n", dead.size());
- remove_vertices(dead, h, false);
- return true;
-}
-
-/** Remove any vertices which can't be reached by traversing the graph forward
- * from start or in reverse from acceptEod. If \p renumber is false, no
- * vertex/edge renumbering is done. */
-void pruneUseless(NGHolder &g, bool renumber) {
- DEBUG_PRINTF("pruning useless vertices\n");
- assert(hasCorrectlyNumberedVertices(g));
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu vertices\n", dead.size());
+ remove_vertices(dead, h, false);
+ return true;
+}
+
+/** Remove any vertices which can't be reached by traversing the graph forward
+ * from start or in reverse from acceptEod. If \p renumber is false, no
+ * vertex/edge renumbering is done. */
+void pruneUseless(NGHolder &g, bool renumber) {
+ DEBUG_PRINTF("pruning useless vertices\n");
+ assert(hasCorrectlyNumberedVertices(g));
auto colors = make_small_color_map(g);
-
+
bool work_done = pruneForwardUseless(g, g, g.start, colors);
work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g),
g.acceptEod, colors);
-
- if (!work_done) {
- return;
- }
-
- if (renumber) {
+
+ if (!work_done) {
+ return;
+ }
+
+ if (renumber) {
renumber_edges(g);
renumber_vertices(g);
- }
-}
-
-/** This code removes any vertices which do not accept any symbols. Any
- * vertices which no longer lie on a path from a start to an accept are also
- * pruned. */
-void pruneEmptyVertices(NGHolder &g) {
- DEBUG_PRINTF("pruning empty vertices\n");
- vector<NFAVertex> dead;
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
- if (cr.none()) {
+ }
+}
+
+/** This code removes any vertices which do not accept any symbols. Any
+ * vertices which no longer lie on a path from a start to an accept are also
+ * pruned. */
+void pruneEmptyVertices(NGHolder &g) {
+ DEBUG_PRINTF("pruning empty vertices\n");
+ vector<NFAVertex> dead;
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+ if (cr.none()) {
DEBUG_PRINTF("empty: %zu\n", g[v].index);
- dead.push_back(v);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_vertices(dead, g);
- pruneUseless(g);
-}
-
-/** Remove any edges from vertices that generate accepts (for Highlander
- * graphs). */
-void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
- // Safety check: all reports must be simple exhaustible reports, or this is
- // not safe. This optimisation should be called early enough that no
- // internal reports have been added.
- for (auto report_id : all_reports(g)) {
- const Report &ir = rm.getReport(report_id);
-
- if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
- !isExternalReport(ir)) {
- DEBUG_PRINTF("report %u is not external highlander with "
- "no bounds\n", report_id);
- return;
- }
- }
-
- vector<NFAEdge> dead;
- for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
- // We can prune any out-edges that aren't accepts
- for (const auto &e : out_edges_range(u, g)) {
- if (!is_any_accept(target(e, g), g)) {
- dead.push_back(e);
- }
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
-}
-
-static
-bool isDominatedByReporter(const NGHolder &g,
+ dead.push_back(v);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_vertices(dead, g);
+ pruneUseless(g);
+}
+
+/** Remove any edges from vertices that generate accepts (for Highlander
+ * graphs). */
+void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
+ // Safety check: all reports must be simple exhaustible reports, or this is
+ // not safe. This optimisation should be called early enough that no
+ // internal reports have been added.
+ for (auto report_id : all_reports(g)) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
+ !isExternalReport(ir)) {
+ DEBUG_PRINTF("report %u is not external highlander with "
+ "no bounds\n", report_id);
+ return;
+ }
+ }
+
+ vector<NFAEdge> dead;
+ for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
+ // We can prune any out-edges that aren't accepts
+ for (const auto &e : out_edges_range(u, g)) {
+ if (!is_any_accept(target(e, g), g)) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+}
+
+static
+bool isDominatedByReporter(const NGHolder &g,
const unordered_map<NFAVertex, NFAVertex> &dom,
- NFAVertex v, ReportID report_id) {
- for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
- NFAVertex u = it->second;
- // Note: reporters with edges only to acceptEod are not considered to
- // dominate.
- if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
+ NFAVertex v, ReportID report_id) {
+ for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
+ NFAVertex u = it->second;
+ // Note: reporters with edges only to acceptEod are not considered to
+ // dominate.
+ if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n",
- g[v].index, g[u].index, report_id);
- return true;
- }
- v = u;
- }
- return false;
-}
-
-/**
- * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
- * itself and (c) only simple exhaustible reports.
- */
-static
-bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
- const ReportManager &rm,
- NFAVertex v) {
- if (!edge(v, v, g).second) {
- return false;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && w != g.accept) {
- return false;
- }
- }
-
- for (const auto &report_id : g[v].reports) {
- if (!isSimpleExhaustible(rm.getReport(report_id))) {
- return false;
- }
- }
-
- return true;
-}
-
-void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
- vector<NFAVertex> reporters;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- for (const auto &report_id : g[v].reports) {
- const Report &r = rm.getReport(report_id);
- if (isSimpleExhaustible(r)) {
- reporters.push_back(v);
- break;
- }
- }
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- for (const auto &report_id : g[v].reports) {
- const Report &r = rm.getReport(report_id);
- if (isSimpleExhaustible(r)) {
- reporters.push_back(v);
- break;
- }
- }
- }
-
- if (reporters.empty()) {
- return;
- }
-
-
+ g[v].index, g[u].index, report_id);
+ return true;
+ }
+ v = u;
+ }
+ return false;
+}
+
+/**
+ * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
+ * itself and (c) only simple exhaustible reports.
+ */
+static
+bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
+ const ReportManager &rm,
+ NFAVertex v) {
+ if (!edge(v, v, g).second) {
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && w != g.accept) {
+ return false;
+ }
+ }
+
+ for (const auto &report_id : g[v].reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
+ vector<NFAVertex> reporters;
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+
+ if (reporters.empty()) {
+ return;
+ }
+
+
sort(begin(reporters), end(reporters));
- reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
-
- DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
- reporters.size());
-
- const auto &dom = findDominators(g);
- bool modified = false;
-
- // If a reporter vertex is dominated by another with the same report, we
- // can remove that report; if all reports are removed, we can remove the
- // vertex entirely.
- for (const auto v : reporters) {
- const auto reports = g[v].reports; // copy, as we're going to mutate
- for (const auto &report_id : reports) {
- if (!isSimpleExhaustible(rm.getReport(report_id))) {
- continue;
- }
- if (isDominatedByReporter(g, dom, v, report_id)) {
+ reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
+
+ DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
+ reporters.size());
+
+ const auto &dom = findDominators(g);
+ bool modified = false;
+
+ // If a reporter vertex is dominated by another with the same report, we
+ // can remove that report; if all reports are removed, we can remove the
+ // vertex entirely.
+ for (const auto v : reporters) {
+ const auto reports = g[v].reports; // copy, as we're going to mutate
+ for (const auto &report_id : reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ continue;
+ }
+ if (isDominatedByReporter(g, dom, v, report_id)) {
DEBUG_PRINTF("removed dominated report %u from vertex %zu\n",
- report_id, g[v].index);
- g[v].reports.erase(report_id);
- }
- }
-
- if (g[v].reports.empty()) {
+ report_id, g[v].index);
+ g[v].reports.erase(report_id);
+ }
+ }
+
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n",
- g[v].index);
- remove_edge(v, g.accept, g);
- remove_edge(v, g.acceptEod, g);
- modified = true;
- }
- }
-
- // If a reporter vertex has a self-loop, but otherwise only leads to accept
- // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
- // the self-loop.
- for (const auto v : reporters) {
- if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
- remove_edge(v, v, g);
- modified = true;
+ g[v].index);
+ remove_edge(v, g.accept, g);
+ remove_edge(v, g.acceptEod, g);
+ modified = true;
+ }
+ }
+
+ // If a reporter vertex has a self-loop, but otherwise only leads to accept
+ // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
+ // the self-loop.
+ for (const auto v : reporters) {
+ if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
+ remove_edge(v, v, g);
+ modified = true;
DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index);
- }
- }
-
- if (!modified) {
- return;
- }
-
- pruneUseless(g);
-
- // We may have only removed self-loops, in which case pruneUseless wouldn't
- // renumber, so we do edge renumbering explicitly here.
+ }
+ }
+
+ if (!modified) {
+ return;
+ }
+
+ pruneUseless(g);
+
+ // We may have only removed self-loops, in which case pruneUseless wouldn't
+ // renumber, so we do edge renumbering explicitly here.
renumber_edges(g);
-}
-
-/** Removes the given Report ID from vertices connected to accept, and then
- * prunes useless vertices that have had their report sets reduced to empty. */
-void pruneReport(NGHolder &g, ReportID report) {
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.erase(report);
- if (reports.empty()) {
- dead.insert(e);
- }
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- if (u == g.accept) {
- continue;
- }
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.erase(report);
- if (reports.empty()) {
- dead.insert(e);
- }
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUnreachable(g);
+}
+
+/** Removes the given Report ID from vertices connected to accept, and then
+ * prunes useless vertices that have had their report sets reduced to empty. */
+void pruneReport(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
renumber_vertices(g);
renumber_edges(g);
-}
-
-/** Removes all Report IDs bar the given one from vertices connected to accept,
- * and then prunes useless vertices that have had their report sets reduced to
- * empty. */
-void pruneAllOtherReports(NGHolder &g, ReportID report) {
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.clear();
- reports.insert(report);
- } else {
- reports.clear();
- dead.insert(e);
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- if (u == g.accept) {
- continue;
- }
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.clear();
- reports.insert(report);
- } else {
- reports.clear();
- dead.insert(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUnreachable(g);
+}
+
+/** Removes all Report IDs bar the given one from vertices connected to accept,
+ * and then prunes useless vertices that have had their report sets reduced to
+ * empty. */
+void pruneAllOtherReports(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
renumber_vertices(g);
renumber_edges(g);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
index 475953be3c..0dcef7c8d5 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
@@ -1,75 +1,75 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for pruning unreachable vertices or reports from the graph.
- */
-
-#ifndef NG_PRUNE_H
-#define NG_PRUNE_H
-
-#include "ue2common.h"
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
-/** Remove any vertices that can't be reached by traversing the graph in
- * reverse from acceptEod. */
-void pruneUnreachable(NGHolder &g);
-
-/** Remove any vertices which can't be reached by traversing the graph forward
- * from start or in reverse from acceptEod. If \p renumber is false, no
- * vertex/edge renumbering is done. */
-void pruneUseless(NGHolder &g, bool renumber = true);
-
-/** Remove any vertices with empty reachability. */
-void pruneEmptyVertices(NGHolder &g);
-
-/** Remove any edges from vertices that generate accepts (for Highlander
- * graphs). */
-void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm);
-
-/**
- * Prune highlander reports that are dominated by earlier ones in the graph.
- */
-void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm);
-
-/** Removes the given Report ID from vertices connected to accept, and then
- * prunes useless vertices that have had their report sets reduced to empty. */
-void pruneReport(NGHolder &g, ReportID report);
-
-/** Removes all Report IDs bar the given one from vertices connected to accept,
- * and then prunes useless vertices that have had their report sets reduced to
- * empty. */
-void pruneAllOtherReports(NGHolder &g, ReportID report);
-
-} // namespace ue2
-
-#endif // NG_PRUNE_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for pruning unreachable vertices or reports from the graph.
+ */
+
+#ifndef NG_PRUNE_H
+#define NG_PRUNE_H
+
+#include "ue2common.h"
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
+/** Remove any vertices that can't be reached by traversing the graph in
+ * reverse from acceptEod. */
+void pruneUnreachable(NGHolder &g);
+
+/** Remove any vertices which can't be reached by traversing the graph forward
+ * from start or in reverse from acceptEod. If \p renumber is false, no
+ * vertex/edge renumbering is done. */
+void pruneUseless(NGHolder &g, bool renumber = true);
+
+/** Remove any vertices with empty reachability. */
+void pruneEmptyVertices(NGHolder &g);
+
+/** Remove any edges from vertices that generate accepts (for Highlander
+ * graphs). */
+void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm);
+
+/**
+ * Prune highlander reports that are dominated by earlier ones in the graph.
+ */
+void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm);
+
+/** Removes the given Report ID from vertices connected to accept, and then
+ * prunes useless vertices that have had their report sets reduced to empty. */
+void pruneReport(NGHolder &g, ReportID report);
+
+/** Removes all Report IDs bar the given one from vertices connected to accept,
+ * and then prunes useless vertices that have had their report sets reduced to
+ * empty. */
+void pruneAllOtherReports(NGHolder &g, ReportID report);
+
+} // namespace ue2
+
+#endif // NG_PRUNE_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
index 984518b0fc..eb1f7114f6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
@@ -1,578 +1,578 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Puff construction from NGHolder.
- */
-#include "ng_puff.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/mpvcompile.h"
-#include "rose/rose_build.h"
-#include "util/compile_context.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static const unsigned MIN_PUFF_LENGTH = 16;
-static const unsigned HEAD_BACKOFF = 16;
-
-static
-size_t countChain(const NGHolder &g, NFAVertex v) {
- size_t count = 0;
- while (v) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Puff construction from NGHolder.
+ */
+#include "ng_puff.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/mpvcompile.h"
+#include "rose/rose_build.h"
+#include "util/compile_context.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static const unsigned MIN_PUFF_LENGTH = 16;
+static const unsigned HEAD_BACKOFF = 16;
+
+static
+size_t countChain(const NGHolder &g, NFAVertex v) {
+ size_t count = 0;
+ while (v) {
DEBUG_PRINTF("counting vertex %zu\n", g[v].index);
- if (is_special(v, g)) {
- break;
- }
-
- count++;
- v = getSoleDestVertex(g, v);
- }
- DEBUG_PRINTF("done %zu\n", count);
- return count;
-}
-
-static
-void wireNewAccepts(NGHolder &g, NFAVertex head,
- const flat_set<ReportID> &chain_reports) {
- for (auto u : inv_adjacent_vertices_range(head, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
+ if (is_special(v, g)) {
+ break;
+ }
+
+ count++;
+ v = getSoleDestVertex(g, v);
+ }
+ DEBUG_PRINTF("done %zu\n", count);
+ return count;
+}
+
+static
+void wireNewAccepts(NGHolder &g, NFAVertex head,
+ const flat_set<ReportID> &chain_reports) {
+ for (auto u : inv_adjacent_vertices_range(head, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index);
- assert(!edge(u, g.accept, g).second);
- assert(!edge(u, g.acceptEod, g).second);
- add_edge(u, g.accept, g);
-
- // Replace reports with our chain reports.
- auto &u_reports = g[u].reports;
- u_reports.clear();
- u_reports.insert(chain_reports.begin(), chain_reports.end());
- }
-}
-
-static
-bool isFixedDepth(const NGHolder &g, NFAVertex v) {
- // If the vertex is reachable from startDs, it can't be fixed depth.
+ assert(!edge(u, g.accept, g).second);
+ assert(!edge(u, g.acceptEod, g).second);
+ add_edge(u, g.accept, g);
+
+ // Replace reports with our chain reports.
+ auto &u_reports = g[u].reports;
+ u_reports.clear();
+ u_reports.insert(chain_reports.begin(), chain_reports.end());
+ }
+}
+
+static
+bool isFixedDepth(const NGHolder &g, NFAVertex v) {
+ // If the vertex is reachable from startDs, it can't be fixed depth.
auto depthFromStartDs = calcDepthsFrom(g, g.startDs);
-
- u32 idx = g[v].index;
- const DepthMinMax &ds = depthFromStartDs.at(idx);
- if (ds.min.is_reachable()) {
- DEBUG_PRINTF("vertex reachable from startDs\n");
- return false;
- }
-
+
+ u32 idx = g[v].index;
+ const DepthMinMax &ds = depthFromStartDs.at(idx);
+ if (ds.min.is_reachable()) {
+ DEBUG_PRINTF("vertex reachable from startDs\n");
+ return false;
+ }
+
auto depthFromStart = calcDepthsFrom(g, g.start);
-
- /* we can still consider the head of a puff chain as at fixed depth if
- * it has a self-loop: so we look at all the preds of v (other than v
- * itself) */
-
- assert(v && !is_special(v, g));
-
- u32 count = 0;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // self-loop
- }
- count++;
-
- idx = g[u].index;
- const DepthMinMax &d = depthFromStart.at(idx);
- if (d.min != d.max) {
- return false;
- }
- }
-
- return count != 0; // at least one fixed-depth pred
-}
-
-static
-bool singleStart(const NGHolder &g) {
- set<NFAVertex> seen;
-
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (!is_special(v, g)) {
+
+ /* we can still consider the head of a puff chain as at fixed depth if
+ * it has a self-loop: so we look at all the preds of v (other than v
+ * itself) */
+
+ assert(v && !is_special(v, g));
+
+ u32 count = 0;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // self-loop
+ }
+ count++;
+
+ idx = g[u].index;
+ const DepthMinMax &d = depthFromStart.at(idx);
+ if (d.min != d.max) {
+ return false;
+ }
+ }
+
+ return count != 0; // at least one fixed-depth pred
+}
+
+static
+bool singleStart(const NGHolder &g) {
+ set<NFAVertex> seen;
+
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (!is_special(v, g)) {
DEBUG_PRINTF("saw %zu\n", g[v].index);
- seen.insert(v);
- }
- }
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (!is_special(v, g)) {
+ seen.insert(v);
+ }
+ }
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (!is_special(v, g)) {
DEBUG_PRINTF("saw %zu\n", g[v].index);
- seen.insert(v);
- }
- }
-
- DEBUG_PRINTF("comp has %zu starts\n", seen.size());
-
- return seen.size() == 1;
-}
-
-static
-bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
- const CharReach puff_escapes = ~g[head].char_reach;
-
- for (auto u : inv_adjacent_vertices_range(head, g)) {
- if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
+ seen.insert(v);
+ }
+ }
+
+ DEBUG_PRINTF("comp has %zu starts\n", seen.size());
+
+ return seen.size() == 1;
+}
+
+static
+bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
+ const CharReach puff_escapes = ~g[head].char_reach;
+
+ for (auto u : inv_adjacent_vertices_range(head, g)) {
+ if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index,
- g[head].index);
- return false;
- }
- }
-
- DEBUG_PRINTF("reset on trigger\n");
- return true;
-}
-
-/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
- * */
-static
-bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
+ g[head].index);
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("reset on trigger\n");
+ return true;
+}
+
+/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
+ * */
+static
+bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
DEBUG_PRINTF("head = %zu\n", g[head].index);
-
- const CharReach &puff_cr = g[head].char_reach;
-
- /* we can use the pred of the head as the base of our check if it the cr
- * matches as if
- * head cr subsetof pred cr: if head is being pushed on then puff must
- * still being pushed on
- * pred cr subsetof head cr: if the puff matches then head must be also
- * always be on if the is connected to a wide enough cyclic
- */
- if (proper_in_degree(head, g) == 1
- && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
- head = getSoleSourceVertex(g, head);
+
+ const CharReach &puff_cr = g[head].char_reach;
+
+ /* we can use the pred of the head as the base of our check if it the cr
+ * matches as if
+ * head cr subsetof pred cr: if head is being pushed on then puff must
+ * still being pushed on
+ * pred cr subsetof head cr: if the puff matches then head must be also
+ * always be on if the is connected to a wide enough cyclic
+ */
+ if (proper_in_degree(head, g) == 1
+ && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
+ head = getSoleSourceVertex(g, head);
DEBUG_PRINTF("temp new head = %zu\n", g[head].index);
- }
-
- for (auto s : inv_adjacent_vertices_range(head, g)) {
+ }
+
+ for (auto s : inv_adjacent_vertices_range(head, g)) {
DEBUG_PRINTF("s = %zu\n", g[s].index);
- if (!puff_cr.isSubsetOf(g[s].char_reach)) {
+ if (!puff_cr.isSubsetOf(g[s].char_reach)) {
DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index,
g[head].index);
- return false;
- }
-
- if (!hasSelfLoop(s, g) && s != g.start) {
- DEBUG_PRINTF("no self loop\n");
- return false;
- }
-
- if (s == g.start && !edge(g.startDs, head, g).second) {
- DEBUG_PRINTF("not float\n");
- return false;
- }
- }
-
- DEBUG_PRINTF("reset on trigger\n");
- return true;
-}
-
-static
-u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
- NFAVertex pv, bool prefilter) {
- CharReach accept_cr;
- DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
-
- if (prefilter) {
- /* a later prefilter stage make weaken the lead up so we can't be sure
- * that all the triggers will be squashing the puffette. */
- return 0;
- }
-
- /* TODO: inspect further back in the pattern */
- for (auto u : inv_adjacent_vertices_range(pv, g)) {
- accept_cr |= g[u].char_reach;
- }
-
- DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
-
- if ((accept_cr & cr).any()) {
- return 0; /* the accept byte doesn't always kill the puffette. TODO:
- * maybe if we look further back we could find something that
- * would kill the puffette... */
- }
- DEBUG_PRINTF("returning squash distance of %u\n", min_width);
- return min_width;
-}
-
-/** Gives a stronger puff trigger when the trigger is connected to a wide
- * cyclic state (aside from sds) */
-static
-void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
- DEBUG_PRINTF("attempting to improve puff trigger\n");
- assert(!nodes->empty());
- const CharReach &puff_cr = g[nodes->back()].char_reach;
- if (puff_cr.all()) {
- return; /* we can't really do much with this one */
- }
-
- /* add the runway */
- DEBUG_PRINTF("backing off - allowing a decent header\n");
- assert(nodes->size() > HEAD_BACKOFF);
- for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
- nodes->pop_back();
- }
- *a = nodes->back();
- nodes->pop_back();
-}
-
-static
-void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
- const CharReach &cr, const ReportID report, u32 width,
- bool fixed_depth, bool unbounded, bool auto_restart,
- RoseBuild &rose, ReportManager &rm,
- flat_set<ReportID> &chain_reports, bool prefilter) {
- DEBUG_PRINTF("constructing Puff for report %u\n", report);
+ return false;
+ }
+
+ if (!hasSelfLoop(s, g) && s != g.start) {
+ DEBUG_PRINTF("no self loop\n");
+ return false;
+ }
+
+ if (s == g.start && !edge(g.startDs, head, g).second) {
+ DEBUG_PRINTF("not float\n");
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("reset on trigger\n");
+ return true;
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
+ NFAVertex pv, bool prefilter) {
+ CharReach accept_cr;
+ DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
+
+ if (prefilter) {
+ /* a later prefilter stage make weaken the lead up so we can't be sure
+ * that all the triggers will be squashing the puffette. */
+ return 0;
+ }
+
+ /* TODO: inspect further back in the pattern */
+ for (auto u : inv_adjacent_vertices_range(pv, g)) {
+ accept_cr |= g[u].char_reach;
+ }
+
+ DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+ if ((accept_cr & cr).any()) {
+ return 0; /* the accept byte doesn't always kill the puffette. TODO:
+ * maybe if we look further back we could find something that
+ * would kill the puffette... */
+ }
+ DEBUG_PRINTF("returning squash distance of %u\n", min_width);
+ return min_width;
+}
+
+/** Gives a stronger puff trigger when the trigger is connected to a wide
+ * cyclic state (aside from sds) */
+static
+void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
+ DEBUG_PRINTF("attempting to improve puff trigger\n");
+ assert(!nodes->empty());
+ const CharReach &puff_cr = g[nodes->back()].char_reach;
+ if (puff_cr.all()) {
+ return; /* we can't really do much with this one */
+ }
+
+ /* add the runway */
+ DEBUG_PRINTF("backing off - allowing a decent header\n");
+ assert(nodes->size() > HEAD_BACKOFF);
+ for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
+ nodes->pop_back();
+ }
+ *a = nodes->back();
+ nodes->pop_back();
+}
+
+static
+void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
+ const CharReach &cr, const ReportID report, u32 width,
+ bool fixed_depth, bool unbounded, bool auto_restart,
+ RoseBuild &rose, ReportManager &rm,
+ flat_set<ReportID> &chain_reports, bool prefilter) {
+ DEBUG_PRINTF("constructing Puff for report %u\n", report);
DEBUG_PRINTF("a = %zu\n", g[a].index);
-
+
const Report &puff_report = rm.getReport(report);
const bool simple_exhaust = isSimpleExhaustible(puff_report);
- const bool pureAnchored = a == g.start && singleStart(g);
- if (!pureAnchored) {
- if (a == g.startDs || a == g.start) {
- DEBUG_PRINTF("add outfix ar(false)\n");
-
+ const bool pureAnchored = a == g.start && singleStart(g);
+ if (!pureAnchored) {
+ if (a == g.startDs || a == g.start) {
+ DEBUG_PRINTF("add outfix ar(false)\n");
+
raw_puff rp(width, unbounded, report, cr, auto_restart,
simple_exhaust);
- rose.addOutfix(rp);
- return;
- }
-
- DEBUG_PRINTF("add chain tail\n");
- u32 qi = ~0U;
- u32 event = MQE_TOP;
- raw_puff rp(width, unbounded, report, cr);
- rose.addChainTail(rp, &qi, &event);
- assert(qi != ~0U);
- u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
- prefilter);
-
+ rose.addOutfix(rp);
+ return;
+ }
+
+ DEBUG_PRINTF("add chain tail\n");
+ u32 qi = ~0U;
+ u32 event = MQE_TOP;
+ raw_puff rp(width, unbounded, report, cr);
+ rose.addChainTail(rp, &qi, &event);
+ assert(qi != ~0U);
+ u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
+ prefilter);
+
Report ir = makeMpvTrigger(event, squashDistance);
- /* only need to trigger once if floatingUnboundedDot */
- bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
- if (floatingUnboundedDot) {
- ir.ekey = rm.getUnassociatedExhaustibleKey();
- }
- ReportID id = rm.getInternalId(ir);
- chain_reports.insert(id);
- } else {
- DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
- assert(!auto_restart || unbounded);
+ /* only need to trigger once if floatingUnboundedDot */
+ bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
+ if (floatingUnboundedDot) {
+ ir.ekey = rm.getUnassociatedExhaustibleKey();
+ }
+ ReportID id = rm.getInternalId(ir);
+ chain_reports.insert(id);
+ } else {
+ DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
+ assert(!auto_restart || unbounded);
raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust);
- rose.addOutfix(rp);
- }
-}
-
-static
-bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
- set<NFAVertex> &dead, const CompileContext &cc,
- bool prefilter) {
- DEBUG_PRINTF("hello\n");
- vector<NFAVertex> nodes;
- const CharReach &cr = g[a].char_reach;
- bool isDot = cr.all();
- bool unbounded = false;
- bool exhaustible = can_exhaust(g, rm);
-
- while (true) {
- if (is_special(a, g)) {
- DEBUG_PRINTF("stopped puffing due to special vertex\n");
- break;
- }
-
- if (g[a].char_reach != cr) {
- DEBUG_PRINTF("stopped puffing due to change in character "
- "reachability\n");
- break;
- }
-
- if (proper_in_degree(a, g) != 1) {
- DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
- break;
- }
-
- size_t outDegree = out_degree(a, g);
- if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
- DEBUG_PRINTF("stopping puffing due to out degree\n");
- break;
- }
-
- if (hasSelfLoop(a, g)) {
- DEBUG_PRINTF("has self-loop, marking unbounded\n");
- unbounded = true;
- }
-
- nodes.push_back(a);
+ rose.addOutfix(rp);
+ }
+}
+
+static
+bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
+ set<NFAVertex> &dead, const CompileContext &cc,
+ bool prefilter) {
+ DEBUG_PRINTF("hello\n");
+ vector<NFAVertex> nodes;
+ const CharReach &cr = g[a].char_reach;
+ bool isDot = cr.all();
+ bool unbounded = false;
+ bool exhaustible = can_exhaust(g, rm);
+
+ while (true) {
+ if (is_special(a, g)) {
+ DEBUG_PRINTF("stopped puffing due to special vertex\n");
+ break;
+ }
+
+ if (g[a].char_reach != cr) {
+ DEBUG_PRINTF("stopped puffing due to change in character "
+ "reachability\n");
+ break;
+ }
+
+ if (proper_in_degree(a, g) != 1) {
+ DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
+ break;
+ }
+
+ size_t outDegree = out_degree(a, g);
+ if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
+ DEBUG_PRINTF("stopping puffing due to out degree\n");
+ break;
+ }
+
+ if (hasSelfLoop(a, g)) {
+ DEBUG_PRINTF("has self-loop, marking unbounded\n");
+ unbounded = true;
+ }
+
+ nodes.push_back(a);
DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index,
- in_degree(a, g));
-
- a = getSoleSourceVertex(g, a);
-
- assert(a); /* already checked that old a had a proper in degree of 1 */
-
- // Snark: we can't handle this case, because we can only handle a
- // single report ID on a vertex
- if (is_match_vertex(a, g)) {
- DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
- if (!nodes.empty()) {
- nodes.pop_back();
- }
- break;
- }
- }
-
- if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
- for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
- if (is_special(u, g)) {
- DEBUG_PRINTF("pop\n");
- a = nodes.back();
- nodes.pop_back();
- break;
- }
- }
- }
-
- if (a != g.startDs && edge(g.startDs, a, g).second
- && proper_out_degree(a, g) == 1
- && g[a].char_reach == cr) {
- nodes.push_back(a);
- a = g.startDs;
- }
-
- bool auto_restart = false;
-
+ in_degree(a, g));
+
+ a = getSoleSourceVertex(g, a);
+
+ assert(a); /* already checked that old a had a proper in degree of 1 */
+
+ // Snark: we can't handle this case, because we can only handle a
+ // single report ID on a vertex
+ if (is_match_vertex(a, g)) {
+ DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
+ if (!nodes.empty()) {
+ nodes.pop_back();
+ }
+ break;
+ }
+ }
+
+ if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
+ for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
+ if (is_special(u, g)) {
+ DEBUG_PRINTF("pop\n");
+ a = nodes.back();
+ nodes.pop_back();
+ break;
+ }
+ }
+ }
+
+ if (a != g.startDs && edge(g.startDs, a, g).second
+ && proper_out_degree(a, g) == 1
+ && g[a].char_reach == cr) {
+ nodes.push_back(a);
+ a = g.startDs;
+ }
+
+ bool auto_restart = false;
+
DEBUG_PRINTF("a = %zu\n", g[a].index);
-
- if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
+
+ if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index);
- if (nodes.size() < MIN_PUFF_LENGTH) {
- return false;
- } else {
- DEBUG_PRINTF("mark unbounded\n");
- unbounded = true;
- a = g.start;
- auto_restart = !isDot;
- }
- }
-
- bool supported = false;
- bool fixed_depth = isFixedDepth(g, nodes.back());
-
- if (exhaustible) {
- supported = true;
- } else if (fixed_depth) {
- supported = true;
- } else if (unbounded) {
- /* any C{n, } can be supported as all ranges will be squashed together
- * only need to track the first */
- supported = true;
- } else if (triggerResetsPuff(g, nodes.back())) {
- supported = true;
- } else if (triggerFloodsPuff(g, nodes.back())) {
- DEBUG_PRINTF("trigger floods puff\n");
- supported = true;
- unbounded = true;
- }
-
- if (!supported) {
- DEBUG_PRINTF("not supported\n");
- return false;
- }
-
- if (cc.grey.puffImproveHead && a != g.start) {
- if (edge(g.startDs, a, g).second) {
- goto skip_improve; /* direct sds cases are better handled by auto
- * restarting puffettes */
- }
-
- if (fixed_depth) {
- goto skip_improve; /* no danger of trigger floods */
- }
-
- /* if we come after something literalish don't bother */
- if (g[a].char_reach.count() <= 2
- && in_degree(a, g) == 1
- && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
- goto skip_improve;
- }
-
- if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
- return false; /* not enough of the puff left to worth bothering
- about */
- }
-
- improveHead(g, &a, &nodes);
- skip_improve:;
- }
-
- assert(!nodes.empty());
- const auto &reports = g[nodes[0]].reports;
- assert(!reports.empty());
-
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- const bool highlander = ir.ekey != INVALID_EKEY;
- if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
- DEBUG_PRINTF("report %u is bounded highlander but not simple "
- "exhaustible\n",
- report);
- return false;
- }
-
- if (ir.type == INTERNAL_ROSE_CHAIN) {
- DEBUG_PRINTF("puffettes cannot be chained together\n");
- return false;
- }
- }
-
- NFAVertex puffv = nodes.back();
+ if (nodes.size() < MIN_PUFF_LENGTH) {
+ return false;
+ } else {
+ DEBUG_PRINTF("mark unbounded\n");
+ unbounded = true;
+ a = g.start;
+ auto_restart = !isDot;
+ }
+ }
+
+ bool supported = false;
+ bool fixed_depth = isFixedDepth(g, nodes.back());
+
+ if (exhaustible) {
+ supported = true;
+ } else if (fixed_depth) {
+ supported = true;
+ } else if (unbounded) {
+ /* any C{n, } can be supported as all ranges will be squashed together
+ * only need to track the first */
+ supported = true;
+ } else if (triggerResetsPuff(g, nodes.back())) {
+ supported = true;
+ } else if (triggerFloodsPuff(g, nodes.back())) {
+ DEBUG_PRINTF("trigger floods puff\n");
+ supported = true;
+ unbounded = true;
+ }
+
+ if (!supported) {
+ DEBUG_PRINTF("not supported\n");
+ return false;
+ }
+
+ if (cc.grey.puffImproveHead && a != g.start) {
+ if (edge(g.startDs, a, g).second) {
+ goto skip_improve; /* direct sds cases are better handled by auto
+ * restarting puffettes */
+ }
+
+ if (fixed_depth) {
+ goto skip_improve; /* no danger of trigger floods */
+ }
+
+ /* if we come after something literalish don't bother */
+ if (g[a].char_reach.count() <= 2
+ && in_degree(a, g) == 1
+ && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
+ goto skip_improve;
+ }
+
+ if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
+ return false; /* not enough of the puff left to worth bothering
+ about */
+ }
+
+ improveHead(g, &a, &nodes);
+ skip_improve:;
+ }
+
+ assert(!nodes.empty());
+ const auto &reports = g[nodes[0]].reports;
+ assert(!reports.empty());
+
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ const bool highlander = ir.ekey != INVALID_EKEY;
+ if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
+ DEBUG_PRINTF("report %u is bounded highlander but not simple "
+ "exhaustible\n",
+ report);
+ return false;
+ }
+
+ if (ir.type == INTERNAL_ROSE_CHAIN) {
+ DEBUG_PRINTF("puffettes cannot be chained together\n");
+ return false;
+ }
+ }
+
+ NFAVertex puffv = nodes.back();
assert(puffv != NGHolder::null_vertex());
- u32 width = countChain(g, nodes.back());
-
- flat_set<ReportID> chain_reports;
-
- for (auto report : reports) {
- constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
- auto_restart, rose, rm, chain_reports, prefilter);
- }
-
- if (!chain_reports.empty()) {
- wireNewAccepts(g, puffv, chain_reports);
- }
-
- dead.insert(nodes.begin(), nodes.end());
- return true;
-}
-
-bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
- bool prefilter, const CompileContext &cc) {
- if (!cc.grey.allowPuff) {
- return false;
- }
-
- size_t count = 0;
- set<NFAVertex> dead;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
- count++;
- }
- }
-
- if (!dead.empty()) {
- remove_vertices(dead, g);
- pruneUseless(g);
- }
-
- DEBUG_PRINTF("puffs: %zu\n", count);
- return num_vertices(g) <= N_SPECIALS;
-}
-
-bool isPuffable(const NGHolder &g, bool fixed_depth,
- const ReportManager &rm, const Grey &grey) {
- if (!grey.allowPuff) {
- return false;
- }
-
- if (!onlyOneTop(g)) {
- DEBUG_PRINTF("more than one top\n");
- return false;
- }
-
- const set<ReportID> reports = all_reports(g);
- if (reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- return false;
- }
-
- const Report &ir = rm.getReport(*reports.begin());
-
- if (ir.type == INTERNAL_ROSE_CHAIN) {
- DEBUG_PRINTF("puffettes cannot be chained together\n");
- return false;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(g, repeat)) {
- DEBUG_PRINTF("not pure bounded repeat\n");
- return false;
- }
-
- if (repeat.bounds.min == depth(0)) {
- DEBUG_PRINTF("repeat min bound is zero\n");
- return false;
- }
-
- // We can puff if:
- // (a) repeat is {N,}; or
- // (b) repeat is {N} and fixed-depth, or highlander (and will accept the
- // first match)
-
- DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
-
- if (repeat.bounds.max.is_infinite()) {
- return true;
- }
-
- if (repeat.bounds.min == repeat.bounds.max) {
- if (fixed_depth) {
- DEBUG_PRINTF("fixed depth\n");
- return true;
- }
-
- const bool highlander = ir.ekey != INVALID_EKEY;
-
- // If we're highlander, we must be simple-exhaustible as well.
- if (highlander && isSimpleExhaustible(ir)) {
- return true;
- }
- }
-
- return false;
-}
-
-} // namespace ue2
+ u32 width = countChain(g, nodes.back());
+
+ flat_set<ReportID> chain_reports;
+
+ for (auto report : reports) {
+ constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
+ auto_restart, rose, rm, chain_reports, prefilter);
+ }
+
+ if (!chain_reports.empty()) {
+ wireNewAccepts(g, puffv, chain_reports);
+ }
+
+ dead.insert(nodes.begin(), nodes.end());
+ return true;
+}
+
+bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
+ bool prefilter, const CompileContext &cc) {
+ if (!cc.grey.allowPuff) {
+ return false;
+ }
+
+ size_t count = 0;
+ set<NFAVertex> dead;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
+ count++;
+ }
+ }
+
+ if (!dead.empty()) {
+ remove_vertices(dead, g);
+ pruneUseless(g);
+ }
+
+ DEBUG_PRINTF("puffs: %zu\n", count);
+ return num_vertices(g) <= N_SPECIALS;
+}
+
+bool isPuffable(const NGHolder &g, bool fixed_depth,
+ const ReportManager &rm, const Grey &grey) {
+ if (!grey.allowPuff) {
+ return false;
+ }
+
+ if (!onlyOneTop(g)) {
+ DEBUG_PRINTF("more than one top\n");
+ return false;
+ }
+
+ const set<ReportID> reports = all_reports(g);
+ if (reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ return false;
+ }
+
+ const Report &ir = rm.getReport(*reports.begin());
+
+ if (ir.type == INTERNAL_ROSE_CHAIN) {
+ DEBUG_PRINTF("puffettes cannot be chained together\n");
+ return false;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(g, repeat)) {
+ DEBUG_PRINTF("not pure bounded repeat\n");
+ return false;
+ }
+
+ if (repeat.bounds.min == depth(0)) {
+ DEBUG_PRINTF("repeat min bound is zero\n");
+ return false;
+ }
+
+ // We can puff if:
+ // (a) repeat is {N,}; or
+ // (b) repeat is {N} and fixed-depth, or highlander (and will accept the
+ // first match)
+
+ DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
+
+ if (repeat.bounds.max.is_infinite()) {
+ return true;
+ }
+
+ if (repeat.bounds.min == repeat.bounds.max) {
+ if (fixed_depth) {
+ DEBUG_PRINTF("fixed depth\n");
+ return true;
+ }
+
+ const bool highlander = ir.ekey != INVALID_EKEY;
+
+ // If we're highlander, we must be simple-exhaustible as well.
+ if (highlander && isSimpleExhaustible(ir)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
index c31e7540ba..af0237a594 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
@@ -1,56 +1,56 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Puff construction from NGHolder.
- */
-
-#ifndef NG_PUFF_H
-#define NG_PUFF_H
-
-namespace ue2 {
-
-struct CompileContext;
-struct Grey;
-class RoseBuild;
-class NGHolder;
-class ReportManager;
-
-/** \brief Split off portions of the graph that are implementable as Puff
- * engines. Returns true if the entire graph is consumed. */
-bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
- bool prefilter, const CompileContext &cc);
-
-/** \brief True if the entire graph in \a g could be constructed as a Puff
- * engine. */
-bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm,
- const Grey &grey);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Puff construction from NGHolder.
+ */
+
+#ifndef NG_PUFF_H
+#define NG_PUFF_H
+
+namespace ue2 {
+
+struct CompileContext;
+struct Grey;
+class RoseBuild;
+class NGHolder;
+class ReportManager;
+
+/** \brief Split off portions of the graph that are implementable as Puff
+ * engines. Returns true if the entire graph is consumed. */
+bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
+ bool prefilter, const CompileContext &cc);
+
+/** \brief True if the entire graph in \a g could be constructed as a Puff
+ * engine. */
+bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm,
+ const Grey &grey);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
index 06b9daeeca..fc46907024 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
@@ -1,899 +1,899 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph reductions.
- *
- * This code attempts to make the NFA graph smaller by performing a number of
- * local transformations:
- *
- * ### (1) removal of redundant vertices:
- *
- * v is redundant wrt to u if succ(v) is a subset of succ(u)
- * AND pred(v) is a subset of pred(u)
- * AND cr(v) is a subset of cr(u)
- *
- * ### (2) 'diamond' transformation:
- *
- * given succ(v) == succ(u) and pred(v) == pred(u),
- * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
- * and cr(w) = union(cr(v), cr(u))
- *
- * ### (3) locally identifiable left equivalence:
- *
- * given pred(v) == pred(u) (**) and cr(v) == cr(u),
- * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
- * and succ(w) = union(succ(v), succ(u))
- *
- * ### (4) locally identifiable right equivalence:
- *
- * given succ(v) == succ(u) (**) and cr(v) == cr(u),
- * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
- * and pred(w) = union(pred(v), pred(u))
- *
- * NOTE (**): for left and right equivalence, we can also do the transform if
- * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
- * enables equivalent vertices with self-loops to be merged.
- *
- * If v and u raise accepts, they can only be merged if they raise the same
- * report IDs.
- *
- * Transformations are applied repeatedly until the graph stops changing.
- *
- * Note that the final graph may depend on the order in which these
- * transformations are applied. In order to reduce the non-determinism the
- * following order is imposed: (1); (2); (3) + (4).
- */
-#include "ng_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_calc_components.h"
-#include "ng_dominators.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph reductions.
+ *
+ * This code attempts to make the NFA graph smaller by performing a number of
+ * local transformations:
+ *
+ * ### (1) removal of redundant vertices:
+ *
+ * v is redundant wrt to u if succ(v) is a subset of succ(u)
+ * AND pred(v) is a subset of pred(u)
+ * AND cr(v) is a subset of cr(u)
+ *
+ * ### (2) 'diamond' transformation:
+ *
+ * given succ(v) == succ(u) and pred(v) == pred(u),
+ * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
+ * and cr(w) = union(cr(v), cr(u))
+ *
+ * ### (3) locally identifiable left equivalence:
+ *
+ * given pred(v) == pred(u) (**) and cr(v) == cr(u),
+ * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
+ * and succ(w) = union(succ(v), succ(u))
+ *
+ * ### (4) locally identifiable right equivalence:
+ *
+ * given succ(v) == succ(u) (**) and cr(v) == cr(u),
+ * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
+ * and pred(w) = union(pred(v), pred(u))
+ *
+ * NOTE (**): for left and right equivalence, we can also do the transform if
+ * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
+ * enables equivalent vertices with self-loops to be merged.
+ *
+ * If v and u raise accepts, they can only be merged if they raise the same
+ * report IDs.
+ *
+ * Transformations are applied repeatedly until the graph stops changing.
+ *
+ * Note that the final graph may depend on the order in which these
+ * transformations are applied. In order to reduce the non-determinism the
+ * following order is imposed: (1); (2); (3) + (4).
+ */
+#include "ng_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_calc_components.h"
+#include "ng_dominators.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <set>
-#include <vector>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Precalculated (and maintained) information about a vertex. */
-class VertexInfo {
-public:
- flat_set<NFAVertex> pred; //!< predecessors of this vertex
- flat_set<NFAVertex> succ; //!< successors of this vertex
- bool isAccept = false; //!< does this vertex lead to accept?
- bool isRemoved = false; //!< have we already removed this vertex?
-
- size_t inDegree() const { return pred.size(); }
- size_t outDegree() const { return succ.size(); }
-};
-
-class VertexInfoMap {
-public:
- explicit VertexInfoMap(const NGHolder &gg)
- : g(gg), infos(num_vertices(gg)) {}
- VertexInfo &operator[](NFAVertex v) {
- u32 i = g[v].index;
- assert(i < infos.size());
- return infos[i];
- }
-
- const VertexInfo &operator[](NFAVertex v) const {
- u32 i = g[v].index;
- assert(i < infos.size());
- return infos[i];
- }
-
-private:
- const NGHolder &g;
- vector<VertexInfo> infos;
-};
-
-} // namespace
-
-/** Populates the info map with their predecessor and successor states, and
- * whether they are accept states. */
-static
-void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
- assert(info.pred.empty() && info.succ.empty());
-
- // Build successor and predecessor sets
- insert(&info.pred, inv_adjacent_vertices(v, g));
- insert(&info.succ, adjacent_vertices(v, g));
-
- // Note whether the vertex is an accept state
- if (!is_special(v, g)) {
- if (contains(info.succ, g.accept)
- || contains(info.succ, g.acceptEod)) {
- info.isAccept = true;
- }
- }
- }
-}
-
-/** Helper function to take the intersection of two sorted vertex sets
- * in-place. */
-static
-void inplaceIntersection(vector<NFAVertex> &vset1,
- const flat_set<NFAVertex> &vset2) {
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <cassert>
+#include <map>
+#include <set>
+#include <vector>
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Precalculated (and maintained) information about a vertex. */
+class VertexInfo {
+public:
+ flat_set<NFAVertex> pred; //!< predecessors of this vertex
+ flat_set<NFAVertex> succ; //!< successors of this vertex
+ bool isAccept = false; //!< does this vertex lead to accept?
+ bool isRemoved = false; //!< have we already removed this vertex?
+
+ size_t inDegree() const { return pred.size(); }
+ size_t outDegree() const { return succ.size(); }
+};
+
+class VertexInfoMap {
+public:
+ explicit VertexInfoMap(const NGHolder &gg)
+ : g(gg), infos(num_vertices(gg)) {}
+ VertexInfo &operator[](NFAVertex v) {
+ u32 i = g[v].index;
+ assert(i < infos.size());
+ return infos[i];
+ }
+
+ const VertexInfo &operator[](NFAVertex v) const {
+ u32 i = g[v].index;
+ assert(i < infos.size());
+ return infos[i];
+ }
+
+private:
+ const NGHolder &g;
+ vector<VertexInfo> infos;
+};
+
+} // namespace
+
+/** Populates the info map with their predecessor and successor states, and
+ * whether they are accept states. */
+static
+void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+ assert(info.pred.empty() && info.succ.empty());
+
+ // Build successor and predecessor sets
+ insert(&info.pred, inv_adjacent_vertices(v, g));
+ insert(&info.succ, adjacent_vertices(v, g));
+
+ // Note whether the vertex is an accept state
+ if (!is_special(v, g)) {
+ if (contains(info.succ, g.accept)
+ || contains(info.succ, g.acceptEod)) {
+ info.isAccept = true;
+ }
+ }
+ }
+}
+
+/** Helper function to take the intersection of two sorted vertex sets
+ * in-place. */
+static
+void inplaceIntersection(vector<NFAVertex> &vset1,
+ const flat_set<NFAVertex> &vset2) {
const NFAVertex GONE = NGHolder::null_vertex();
-
- vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
- flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
-
- while ((it != ite) && (jt != jte)) {
- assert(*it != GONE);
-
- if (*it < *jt) {
- // present in vset1 but not in vset2. Set to null, remove in a
- // second pass.
- *it = GONE;
- ++it;
- } else if (*jt < *it) {
- // present in vset2 but not in vset1, skip.
- ++jt;
- } else {
- // present in both sets.
- ++it; ++jt;
- }
- }
-
- // Left overs are only in that set.
- vset1.erase(it, ite);
-
- // Remove nulls created above.
- vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
-}
-
-/** Find the intersection of the successors of our predecessors. */
-static
-void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
- const VertexInfoMap &infoMap,
- vector<NFAVertex> &intersection,
- bool considerSelf = true /* follow self loops */) {
- /* find a good seed for the intersection */
- const flat_set<NFAVertex> *best = nullptr;
- for (auto u : predSet) {
- if (!considerSelf && u == v) {
- continue;
- }
-
- const flat_set<NFAVertex> &succSet = infoMap[u].succ;
- if (!best || succSet.size() <= best->size()) {
- best = &succSet;
-
- // Break out if we've reduced our intersection to [v]
- if (best->size() == 1) {
- assert(*(best->begin()) == v);
- intersection.push_back(v);
- return;
- }
- }
- }
-
- if (best) {
- insert(&intersection, intersection.end(), *best);
- }
-
- for (auto u : predSet) {
- if (!considerSelf && u == v) {
- continue;
- }
-
- inplaceIntersection(intersection, infoMap[u].succ);
-
- // Check: intersection should always be at least size 1
- assert(!intersection.empty());
-
- // Break out if we've reduced our intersection to [v]
- if (intersection.size() == 1) {
- assert(*intersection.begin() == v);
- return;
- }
- }
-}
-
-/** Find the intersection of the predecessors of our successors. */
-static
-void predSuccIntersection(const NFAVertex v,
- const flat_set<NFAVertex> &succSet,
- const VertexInfoMap &infoMap,
- vector<NFAVertex> &intersection,
- bool considerSelf = true /* follow self loops */) {
- /* find a good seed for the intersection */
- const flat_set<NFAVertex> *best = nullptr;
- for (auto w : succSet) {
- if (!considerSelf && w == v) {
- continue;
- }
-
- const flat_set<NFAVertex> &predSet = infoMap[w].pred;
- if (!best || predSet.size() <= best->size()) {
- best = &predSet;
-
- // Break out if we've reduced our intersection to [v]
- if (best->size() == 1) {
- assert(*(best->begin()) == v);
- intersection.push_back(v);
- return;
- }
- }
- }
-
- if (best) {
- insert(&intersection, intersection.end(), *best);
- }
-
- for (auto w : succSet) {
- if (!considerSelf && w == v) {
- continue;
- }
-
- inplaceIntersection(intersection, infoMap[w].pred);
-
- // Check: intersection should always be at least size 1
- assert(!intersection.empty());
-
- // Break out if we've reduced our intersection to [v]
- if (intersection.size() == 1) {
- assert(*intersection.begin() == v);
- return;
- }
- }
-}
-
-/** Update containers to take into account the removal of vertex v. */
-static
-void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- VertexInfo &info = infoMap[v];
- assert(!info.isRemoved);
- assert(!contains(removable, v));
- info.isRemoved = true;
- removable.insert(v);
-
- // remove v from its predecessors' successors
- for (auto u : info.pred) {
- infoMap[u].succ.erase(v);
- }
-
- // remove v from its successors' predecessors
- for (auto w : info.succ) {
- infoMap[w].pred.erase(v);
- }
-}
-
-static
-bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
+
+ vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
+ flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
+
+ while ((it != ite) && (jt != jte)) {
+ assert(*it != GONE);
+
+ if (*it < *jt) {
+ // present in vset1 but not in vset2. Set to null, remove in a
+ // second pass.
+ *it = GONE;
+ ++it;
+ } else if (*jt < *it) {
+ // present in vset2 but not in vset1, skip.
+ ++jt;
+ } else {
+ // present in both sets.
+ ++it; ++jt;
+ }
+ }
+
+ // Left overs are only in that set.
+ vset1.erase(it, ite);
+
+ // Remove nulls created above.
+ vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
+}
+
+/** Find the intersection of the successors of our predecessors. */
+static
+void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
+ const VertexInfoMap &infoMap,
+ vector<NFAVertex> &intersection,
+ bool considerSelf = true /* follow self loops */) {
+ /* find a good seed for the intersection */
+ const flat_set<NFAVertex> *best = nullptr;
+ for (auto u : predSet) {
+ if (!considerSelf && u == v) {
+ continue;
+ }
+
+ const flat_set<NFAVertex> &succSet = infoMap[u].succ;
+ if (!best || succSet.size() <= best->size()) {
+ best = &succSet;
+
+ // Break out if we've reduced our intersection to [v]
+ if (best->size() == 1) {
+ assert(*(best->begin()) == v);
+ intersection.push_back(v);
+ return;
+ }
+ }
+ }
+
+ if (best) {
+ insert(&intersection, intersection.end(), *best);
+ }
+
+ for (auto u : predSet) {
+ if (!considerSelf && u == v) {
+ continue;
+ }
+
+ inplaceIntersection(intersection, infoMap[u].succ);
+
+ // Check: intersection should always be at least size 1
+ assert(!intersection.empty());
+
+ // Break out if we've reduced our intersection to [v]
+ if (intersection.size() == 1) {
+ assert(*intersection.begin() == v);
+ return;
+ }
+ }
+}
+
+/** Find the intersection of the predecessors of our successors. */
+static
+void predSuccIntersection(const NFAVertex v,
+ const flat_set<NFAVertex> &succSet,
+ const VertexInfoMap &infoMap,
+ vector<NFAVertex> &intersection,
+ bool considerSelf = true /* follow self loops */) {
+ /* find a good seed for the intersection */
+ const flat_set<NFAVertex> *best = nullptr;
+ for (auto w : succSet) {
+ if (!considerSelf && w == v) {
+ continue;
+ }
+
+ const flat_set<NFAVertex> &predSet = infoMap[w].pred;
+ if (!best || predSet.size() <= best->size()) {
+ best = &predSet;
+
+ // Break out if we've reduced our intersection to [v]
+ if (best->size() == 1) {
+ assert(*(best->begin()) == v);
+ intersection.push_back(v);
+ return;
+ }
+ }
+ }
+
+ if (best) {
+ insert(&intersection, intersection.end(), *best);
+ }
+
+ for (auto w : succSet) {
+ if (!considerSelf && w == v) {
+ continue;
+ }
+
+ inplaceIntersection(intersection, infoMap[w].pred);
+
+ // Check: intersection should always be at least size 1
+ assert(!intersection.empty());
+
+ // Break out if we've reduced our intersection to [v]
+ if (intersection.size() == 1) {
+ assert(*intersection.begin() == v);
+ return;
+ }
+ }
+}
+
+/** Update containers to take into account the removal of vertex v. */
+static
+void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ VertexInfo &info = infoMap[v];
+ assert(!info.isRemoved);
+ assert(!contains(removable, v));
+ info.isRemoved = true;
+ removable.insert(v);
+
+ // remove v from its predecessors' successors
+ for (auto u : info.pred) {
+ infoMap[u].succ.erase(v);
+ }
+
+ // remove v from its successors' predecessors
+ for (auto w : info.succ) {
+ infoMap[w].pred.erase(v);
+ }
+}
+
+static
+bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
NFAEdge e = edge(g.start, v, g);
return e && !g[e].tops.empty();
-}
-
-/** Transform (1), removal of redundant vertices. */
-static
-bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- /* useless merges can be done in any order, no need to take any care with
- * ordering */
-
- // Temporary vectors used for intersections below
- vector<NFAVertex> succPredSet, predSuccSet, intersection;
-
- bool changed = false;
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
-
- if (info.isRemoved) {
- continue;
- }
-
- assert(!contains(removable, v));
-
- if (is_special(v, g)) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials (start)
- * can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, v)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (info.pred.empty() || info.succ.empty()) {
+}
+
+/** Transform (1), removal of redundant vertices. */
+static
+bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ /* useless merges can be done in any order, no need to take any care with
+ * ordering */
+
+ // Temporary vectors used for intersections below
+ vector<NFAVertex> succPredSet, predSuccSet, intersection;
+
+ bool changed = false;
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+
+ if (info.isRemoved) {
+ continue;
+ }
+
+ assert(!contains(removable, v));
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials (start)
+ * can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, v)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (info.pred.empty() || info.succ.empty()) {
DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index);
- assert(0); // non-special states should always have succ/pred lists
- continue;
- }
-
- // The following cases are more complex and rely on the intersection of
- // Succ(Pred(v)) and Pred(Succ(v))
-
- // Compute intersections, operating on the smaller set first
- // Note that we use vectors here, as set_intersection underneath
- // guarantees sorted output, and vectors were quite a bit
- // faster than sets or lists.
-
- succPredSet.clear();
- predSuccSet.clear();
-
- if (info.pred.size() <= info.succ.size()) {
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- // nobody in here but us chickens
- assert(*succPredSet.begin() == v);
- continue;
- }
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- } else {
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- assert(*succPredSet.begin() == v);
- continue;
- }
- }
-
- // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
- intersection.clear();
- set_intersection(succPredSet.begin(), succPredSet.end(),
- predSuccSet.begin(), predSuccSet.end(),
- back_inserter(intersection));
-
- /* Boring if it is just us in the intersection */
- if (intersection.size() < 2) {
- continue;
- }
-
- // Compare char_reach, mark v for removal if any members of
- // the intersection have an equal or greater reach
- const CharReach &currReach = g[v].char_reach;
- const auto &currReports = g[v].reports;
- for (auto t : intersection) {
- const VertexInfo &info2 = infoMap[t];
-
- /* start is never a succ of a state, so will never be in the
- * predsucc/succpred intersection */
- assert(t != g.start);
-
- if (t == v || info2.isRemoved) {
- continue;
- }
-
- // For each candidate C to make V redundant, check:
- // if V is an accept state, C must be an accept state for
- // the same pattern
- // pred(C) is a superset of pred(V)
- // succ(C) is a superset of succ(V)
- // reach(C) is a superset of reach(V)
- //
- // Note: pred/sec tests are covered by the intersections
- // calculated above.
-
- /* note: links to accepts are also tracked in succs */
- if (info.isAccept && currReports != g[t].reports) {
- continue;
- }
-
- if (som) {
- if (t == g.startDs) {
- continue;
- }
- if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
- continue;
- }
- }
-
- /* we do not need to check for out edge tops - as only start
- * can have tops and it has already been ruled out. */
- if (hasInEdgeTops(g, t)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- CharReach &otherReach = g[t].char_reach;
- if (currReach.isSubsetOf(otherReach)) {
+ assert(0); // non-special states should always have succ/pred lists
+ continue;
+ }
+
+ // The following cases are more complex and rely on the intersection of
+ // Succ(Pred(v)) and Pred(Succ(v))
+
+ // Compute intersections, operating on the smaller set first
+ // Note that we use vectors here, as set_intersection underneath
+ // guarantees sorted output, and vectors were quite a bit
+ // faster than sets or lists.
+
+ succPredSet.clear();
+ predSuccSet.clear();
+
+ if (info.pred.size() <= info.succ.size()) {
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ // nobody in here but us chickens
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ } else {
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ }
+
+ // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
+ intersection.clear();
+ set_intersection(succPredSet.begin(), succPredSet.end(),
+ predSuccSet.begin(), predSuccSet.end(),
+ back_inserter(intersection));
+
+ /* Boring if it is just us in the intersection */
+ if (intersection.size() < 2) {
+ continue;
+ }
+
+ // Compare char_reach, mark v for removal if any members of
+ // the intersection have an equal or greater reach
+ const CharReach &currReach = g[v].char_reach;
+ const auto &currReports = g[v].reports;
+ for (auto t : intersection) {
+ const VertexInfo &info2 = infoMap[t];
+
+ /* start is never a succ of a state, so will never be in the
+ * predsucc/succpred intersection */
+ assert(t != g.start);
+
+ if (t == v || info2.isRemoved) {
+ continue;
+ }
+
+ // For each candidate C to make V redundant, check:
+ // if V is an accept state, C must be an accept state for
+ // the same pattern
+ // pred(C) is a superset of pred(V)
+ // succ(C) is a superset of succ(V)
+ // reach(C) is a superset of reach(V)
+ //
+ // Note: pred/sec tests are covered by the intersections
+ // calculated above.
+
+ /* note: links to accepts are also tracked in succs */
+ if (info.isAccept && currReports != g[t].reports) {
+ continue;
+ }
+
+ if (som) {
+ if (t == g.startDs) {
+ continue;
+ }
+ if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
+ continue;
+ }
+ }
+
+ /* we do not need to check for out edge tops - as only start
+ * can have tops and it has already been ruled out. */
+ if (hasInEdgeTops(g, t)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ CharReach &otherReach = g[t].char_reach;
+ if (currReach.isSubsetOf(otherReach)) {
DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
- g[v].index, g[t].index);
- markForRemoval(v, infoMap, removable);
- changed = true;
- break;
- }
- }
- }
-
- return changed;
-}
-
-/** Transform (2), diamond merge pass. */
-static
-bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- // Temporary vectors used for intersections below
- vector<NFAVertex> succPredSet, predSuccSet, intersection;
-
- bool changed = false;
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
-
- if (info.isRemoved) {
- continue;
- }
-
- assert(!contains(removable, v));
-
- if (is_special(v, g)) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials (start)
- * can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, v)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (info.pred.empty() || info.succ.empty()) {
- assert(0); // non-special states should always have succ/pred lists
- continue;
- }
-
- // The following cases are more complex and rely on the intersection of
- // Succ(Pred(v)) and Pred(Succ(v))
-
- // Compute intersections, operating on the smaller set first
- // Note that we use vectors here, as set_intersection underneath
- // guarantees sorted output, and vectors were quite a bit faster than
- // sets or lists.
-
- succPredSet.clear();
- predSuccSet.clear();
-
- if (info.pred.size() <= info.succ.size()) {
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- // nobody in here but us chickens
- assert(*succPredSet.begin() == v);
- continue;
- }
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- } else {
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- assert(*succPredSet.begin() == v);
- continue;
- }
- }
-
- // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
- intersection.clear();
- set_intersection(succPredSet.begin(), succPredSet.end(),
- predSuccSet.begin(), predSuccSet.end(),
- back_inserter(intersection));
-
- /* Boring if it is just us in the intersection */
- if (intersection.size() < 2) {
- continue;
- }
-
- const CharReach &currReach = g[v].char_reach;
- const auto &currReports = g[v].reports;
- for (auto t : intersection) {
- const VertexInfo &info2 = infoMap[t];
-
- if (t == v || info2.isRemoved || is_special(t, g)) {
- continue;
- }
-
- /* note: links to accepts are also tracked in succs */
- if (info.isAccept && currReports != g[t].reports) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials
- * (start) can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, t)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (som) {
- if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
- continue; // can only merge like with like.
- }
- }
-
- // If in-degree of v == in-degree of target
- // and out-degree of v == out-degree of target
- // (because pred and succ are supersets)
- // then combine charreach of v into target and remove v
- if (info.inDegree() == info2.inDegree()
- && info.outDegree() == info2.outDegree()) {
- // add character reachability of v into target
- CharReach &otherReach = g[t].char_reach;
- otherReach |= currReach;
- // v can be removed
+ g[v].index, g[t].index);
+ markForRemoval(v, infoMap, removable);
+ changed = true;
+ break;
+ }
+ }
+ }
+
+ return changed;
+}
+
+/** Transform (2), diamond merge pass. */
+static
+bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ // Temporary vectors used for intersections below
+ vector<NFAVertex> succPredSet, predSuccSet, intersection;
+
+ bool changed = false;
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+
+ if (info.isRemoved) {
+ continue;
+ }
+
+ assert(!contains(removable, v));
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials (start)
+ * can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, v)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (info.pred.empty() || info.succ.empty()) {
+ assert(0); // non-special states should always have succ/pred lists
+ continue;
+ }
+
+ // The following cases are more complex and rely on the intersection of
+ // Succ(Pred(v)) and Pred(Succ(v))
+
+ // Compute intersections, operating on the smaller set first
+ // Note that we use vectors here, as set_intersection underneath
+ // guarantees sorted output, and vectors were quite a bit faster than
+ // sets or lists.
+
+ succPredSet.clear();
+ predSuccSet.clear();
+
+ if (info.pred.size() <= info.succ.size()) {
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ // nobody in here but us chickens
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ } else {
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ }
+
+ // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
+ intersection.clear();
+ set_intersection(succPredSet.begin(), succPredSet.end(),
+ predSuccSet.begin(), predSuccSet.end(),
+ back_inserter(intersection));
+
+ /* Boring if it is just us in the intersection */
+ if (intersection.size() < 2) {
+ continue;
+ }
+
+ const CharReach &currReach = g[v].char_reach;
+ const auto &currReports = g[v].reports;
+ for (auto t : intersection) {
+ const VertexInfo &info2 = infoMap[t];
+
+ if (t == v || info2.isRemoved || is_special(t, g)) {
+ continue;
+ }
+
+ /* note: links to accepts are also tracked in succs */
+ if (info.isAccept && currReports != g[t].reports) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials
+ * (start) can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, t)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (som) {
+ if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
+ continue; // can only merge like with like.
+ }
+ }
+
+ // If in-degree of v == in-degree of target
+ // and out-degree of v == out-degree of target
+ // (because pred and succ are supersets)
+ // then combine charreach of v into target and remove v
+ if (info.inDegree() == info2.inDegree()
+ && info.outDegree() == info2.outDegree()) {
+ // add character reachability of v into target
+ CharReach &otherReach = g[t].char_reach;
+ otherReach |= currReach;
+ // v can be removed
DEBUG_PRINTF("removing redundant vertex %zu and merging "
"reachability with vertex %zu\n",
- g[v].index, g[t].index);
- markForRemoval(v, infoMap, removable);
- changed = true;
- break;
- }
- }
- }
-
- return changed;
-}
-
-namespace {
-
-struct ReachMismatch {};
-
-class ReachSubsetVisitor : public boost::default_dfs_visitor {
-public:
- explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
-
- template <class Graph, class Vertex>
- void discover_vertex(const Vertex &v, const Graph &g) const {
- if (is_any_start(v, g)) {
- return; // start vertices are OK
- } else if (is_special(v, g)) {
- assert(0);
- throw ReachMismatch(); // other special nodes??
- }
-
- const CharReach &vcr = g[v].char_reach;
- DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
- cr.count());
- if (vcr != (vcr & cr)) {
- throw ReachMismatch();
- }
- }
-
-private:
- const CharReach &cr;
-};
-
-/** Terminator function for DFS used in pathReachSubset. */
-template <class Graph, class Vertex> class VertexIs {
-public:
- explicit VertexIs(const Vertex &v) : vertex(v) {}
- bool operator()(const Vertex &v, const Graph &) const {
- return v == vertex;
- }
-
-private:
- Vertex vertex;
-};
-
-} // namespace
-
-/** Returns true if every vertex on paths leading to edge \p e has reachability
- * which is a subset of the reachability of \p dom */
-static
-bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
- const NGHolder &g) {
- const CharReach &domReach = g[dom].char_reach;
- if (domReach.all()) {
- return true;
- }
-
- NFAVertex start = source(e, g);
+ g[v].index, g[t].index);
+ markForRemoval(v, infoMap, removable);
+ changed = true;
+ break;
+ }
+ }
+ }
+
+ return changed;
+}
+
+namespace {
+
+struct ReachMismatch {};
+
+class ReachSubsetVisitor : public boost::default_dfs_visitor {
+public:
+ explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
+
+ template <class Graph, class Vertex>
+ void discover_vertex(const Vertex &v, const Graph &g) const {
+ if (is_any_start(v, g)) {
+ return; // start vertices are OK
+ } else if (is_special(v, g)) {
+ assert(0);
+ throw ReachMismatch(); // other special nodes??
+ }
+
+ const CharReach &vcr = g[v].char_reach;
+ DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
+ cr.count());
+ if (vcr != (vcr & cr)) {
+ throw ReachMismatch();
+ }
+ }
+
+private:
+ const CharReach &cr;
+};
+
+/** Terminator function for DFS used in pathReachSubset. */
+template <class Graph, class Vertex> class VertexIs {
+public:
+ explicit VertexIs(const Vertex &v) : vertex(v) {}
+ bool operator()(const Vertex &v, const Graph &) const {
+ return v == vertex;
+ }
+
+private:
+ Vertex vertex;
+};
+
+} // namespace
+
+/** Returns true if every vertex on paths leading to edge \p e has reachability
+ * which is a subset of the reachability of \p dom */
+static
+bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
+ const NGHolder &g) {
+ const CharReach &domReach = g[dom].char_reach;
+ if (domReach.all()) {
+ return true;
+ }
+
+ NFAVertex start = source(e, g);
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
- map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
-
- // Walk the graph backwards from v, examining each node. We fail (return
- // false) if we encounter a node with reach NOT a subset of domReach, and
- // we stop searching at dom.
- try {
+ map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
+
+ // Walk the graph backwards from v, examining each node. We fail (return
+ // false) if we encounter a node with reach NOT a subset of domReach, and
+ // we stop searching at dom.
+ try {
depth_first_visit(RevGraph(g), start,
- ReachSubsetVisitor(domReach),
- make_assoc_property_map(vertexColor),
- VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
- } catch(ReachMismatch&) {
- return false;
- }
-
- return true;
-}
-
-/** Returns true if every vertex on paths leading from edge \p e has
- * reachability which is a subset of the reachability of \p dom */
-static
-bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
- const NGHolder &g) {
- const CharReach &domReach = g[dom].char_reach;
- if (domReach.all()) {
- return true;
- }
-
- NFAVertex start = target(e, g);
+ ReachSubsetVisitor(domReach),
+ make_assoc_property_map(vertexColor),
+ VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
+ } catch(ReachMismatch&) {
+ return false;
+ }
+
+ return true;
+}
+
+/** Returns true if every vertex on paths leading from edge \p e has
+ * reachability which is a subset of the reachability of \p dom */
+static
+bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
+ const NGHolder &g) {
+ const CharReach &domReach = g[dom].char_reach;
+ if (domReach.all()) {
+ return true;
+ }
+
+ NFAVertex start = target(e, g);
map<NFAVertex, boost::default_color_type> vertexColor;
-
- // Walk the graph forward from v, examining each node. We fail (return
- // false) if we encounter a node with reach NOT a subset of domReach, and
- // we stop searching at dom.
- try {
+
+ // Walk the graph forward from v, examining each node. We fail (return
+ // false) if we encounter a node with reach NOT a subset of domReach, and
+ // we stop searching at dom.
+ try {
depth_first_visit(g, start, ReachSubsetVisitor(domReach),
- make_assoc_property_map(vertexColor),
+ make_assoc_property_map(vertexColor),
VertexIs<NGHolder, NFAVertex>(dom));
- } catch(ReachMismatch&) {
- return false;
- }
-
- return true;
-}
-
-static
-bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g)) {
- return false;
- }
- }
- return true;
-}
-
-static
-bool allInsSpecial(NFAVertex v, const NGHolder &g) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g)) {
- return false;
- }
- }
- return true;
-}
-
-/** Cheaply check whether this graph can't be reduced at all, because it is
- * just a chain of vertices with no other edges. */
-static
-bool isIrreducible(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- // skip specials
- if (is_special(v, g)) {
- continue;
- }
-
- if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
- return false;
- }
- if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
- return false;
- }
- }
-
- /* if calcComponents got sleepy and went home, the above checks don't hold
- * as it assumes there is only one connected component. */
- if (isAlternationOfClasses(g)) {
- return false;
- }
-
- return true;
-}
-
-static
-u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
- u32 count = 0;
-
- cyclic.resize(num_vertices(g));
-
- for (auto v : vertices_range(g)) {
- assert(g[v].index < cyclic.size());
+ } catch(ReachMismatch&) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+bool allInsSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/** Cheaply check whether this graph can't be reduced at all, because it is
+ * just a chain of vertices with no other edges. */
+static
+bool isIrreducible(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ // skip specials
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
+ return false;
+ }
+ if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
+ return false;
+ }
+ }
+
+ /* if calcComponents got sleepy and went home, the above checks don't hold
+ * as it assumes there is only one connected component. */
+ if (isAlternationOfClasses(g)) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
+ u32 count = 0;
+
+ cyclic.resize(num_vertices(g));
+
+ for (auto v : vertices_range(g)) {
+ assert(g[v].index < cyclic.size());
if (hasSelfLoop(v, g)) {
- count++;
+ count++;
cyclic[g[v].index] = true;
- }
- }
-
- return count;
-}
-
-static
-void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
- set<NFAEdge> &dead, som_type som) {
+ }
+ }
+
+ return count;
+}
+
+static
+void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
+ set<NFAEdge> &dead, som_type som) {
auto dominators = findDominators(g);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- // Path in through a dominator (e.g. '.+a?foobar')
- NFAVertex dom = dominators[v];
- if (dom && cyclic[g[dom].index]
- && edge(dom, v, g).second) {
-
- if (som && dom == g.startDs) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // Path in through a dominator (e.g. '.+a?foobar')
+ NFAVertex dom = dominators[v];
+ if (dom && cyclic[g[dom].index]
+ && edge(dom, v, g).second) {
+
+ if (som && dom == g.startDs) {
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic "
"vertex %zu\n", g[v].index, g[dom].index);
-
- // iff all paths through in-edge e of v involve vertices whose
- // reachability is a subset of reach(dom), we can delete edge e.
- for (const auto &e : in_edges_range(v, g)) {
- if (source(e, g) == dom) {
- continue;
- }
-
- if (reversePathReachSubset(e, dom, g)) {
+
+ // iff all paths through in-edge e of v involve vertices whose
+ // reachability is a subset of reach(dom), we can delete edge e.
+ for (const auto &e : in_edges_range(v, g)) {
+ if (source(e, g) == dom) {
+ continue;
+ }
+
+ if (reversePathReachSubset(e, dom, g)) {
DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading "
"paths share dom reach\n",
- g[source(e, g)].index, g[target(e, g)].index);
- dead.insert(e);
- if (source(e, g) == v) {
- cyclic[g[v].index] = false;
- }
- continue;
- }
- }
- }
- }
-}
-
-static
-void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
- set<NFAEdge> &dead) {
+ g[source(e, g)].index, g[target(e, g)].index);
+ dead.insert(e);
+ if (source(e, g) == v) {
+ cyclic[g[v].index] = false;
+ }
+ continue;
+ }
+ }
+ }
+ }
+}
+
+static
+void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
+ set<NFAEdge> &dead) {
auto postdominators = findPostDominators(g);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- // Path out through a post-dominator (e.g. a?.+foobar')
- NFAVertex postdom = postdominators[v];
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // Path out through a post-dominator (e.g. a?.+foobar')
+ NFAVertex postdom = postdominators[v];
if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) {
DEBUG_PRINTF("vertex %zu is postdominated by directly-connected "
"cyclic vertex %zu\n", g[v].index, g[postdom].index);
-
- // iff all paths through in-edge e of v involve vertices whose
- // reachability is a subset of reach(dom), we can delete edge e.
- for (const auto &e : out_edges_range(v, g)) {
- if (target(e, g) == postdom) {
- continue;
- }
-
- if (forwardPathReachSubset(e, postdom, g)) {
+
+ // iff all paths through in-edge e of v involve vertices whose
+ // reachability is a subset of reach(dom), we can delete edge e.
+ for (const auto &e : out_edges_range(v, g)) {
+ if (target(e, g) == postdom) {
+ continue;
+ }
+
+ if (forwardPathReachSubset(e, postdom, g)) {
DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing "
"paths share postdom reach\n",
- g[source(e, g)].index, g[target(e, g)].index);
- if (target(e, g) == v) {
- cyclic[g[v].index] = false;
- }
- dead.insert(e);
- continue;
- }
- }
- }
- }
-}
-
-bool removeRedundancy(NGHolder &g, som_type som) {
- DEBUG_PRINTF("rr som = %d\n", (int)som);
+ g[source(e, g)].index, g[target(e, g)].index);
+ if (target(e, g) == v) {
+ cyclic[g[v].index] = false;
+ }
+ dead.insert(e);
+ continue;
+ }
+ }
+ }
+ }
+}
+
+bool removeRedundancy(NGHolder &g, som_type som) {
+ DEBUG_PRINTF("rr som = %d\n", (int)som);
renumber_vertices(g);
-
- // Cheap check: if all the non-special vertices have in-degree one and
- // out-degree one, there's no redundancy in this here graph and we can
- // vamoose.
- if (isIrreducible(g)) {
- return false;
- }
-
- VertexInfoMap infoMap(g);
-
- // Populate maps of successors and predecessors, and accept status
- populateContainers(g, infoMap);
-
- /* Run multiple passes: terminate when a full pass doesn't remove
- * any vertices */
- bool doUseless = true;
- bool doDiamond = true;
- set<NFAVertex> removable;
- while (doUseless || doDiamond) {
- if (doUseless
- && doUselessMergePass(g, som, infoMap, removable)) {
- doDiamond = true;
- }
- doUseless = false;
-
- if (doDiamond
- && doDiamondMergePass(g, som, infoMap, removable)) {
- doUseless = true;
- }
- doDiamond = false;
- }
- DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
- remove_vertices(removable, g);
-
- return !removable.empty();
-}
-
-/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
- * reachability that is a superset of all paths feeding into that edge. */
-bool removeCyclicDominated(NGHolder &g, som_type som) {
- set<NFAEdge> dead;
- vector<bool> cyclic;
- bool changed = false;
-
- findCyclic(g, cyclic);
-
- findCyclicDom(g, cyclic, dead, som);
- if (!dead.empty()) {
- remove_edges(dead, g);
- pruneUseless(g);
- dead.clear();
- cyclic.clear(); // need to recalculate cyclic as ids have changed
- findCyclic(g, cyclic);
- changed = true;
- }
-
- findCyclicPostDom(g, cyclic, dead);
- if (!dead.empty()) {
- remove_edges(dead, g);
- pruneUseless(g);
- dead.clear();
- changed = true;
- }
-
- return changed;
-}
-
-} // namespace ue2
+
+ // Cheap check: if all the non-special vertices have in-degree one and
+ // out-degree one, there's no redundancy in this here graph and we can
+ // vamoose.
+ if (isIrreducible(g)) {
+ return false;
+ }
+
+ VertexInfoMap infoMap(g);
+
+ // Populate maps of successors and predecessors, and accept status
+ populateContainers(g, infoMap);
+
+ /* Run multiple passes: terminate when a full pass doesn't remove
+ * any vertices */
+ bool doUseless = true;
+ bool doDiamond = true;
+ set<NFAVertex> removable;
+ while (doUseless || doDiamond) {
+ if (doUseless
+ && doUselessMergePass(g, som, infoMap, removable)) {
+ doDiamond = true;
+ }
+ doUseless = false;
+
+ if (doDiamond
+ && doDiamondMergePass(g, som, infoMap, removable)) {
+ doUseless = true;
+ }
+ doDiamond = false;
+ }
+ DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
+ remove_vertices(removable, g);
+
+ return !removable.empty();
+}
+
+/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
+ * reachability that is a superset of all paths feeding into that edge. */
+bool removeCyclicDominated(NGHolder &g, som_type som) {
+ set<NFAEdge> dead;
+ vector<bool> cyclic;
+ bool changed = false;
+
+ findCyclic(g, cyclic);
+
+ findCyclicDom(g, cyclic, dead, som);
+ if (!dead.empty()) {
+ remove_edges(dead, g);
+ pruneUseless(g);
+ dead.clear();
+ cyclic.clear(); // need to recalculate cyclic as ids have changed
+ findCyclic(g, cyclic);
+ changed = true;
+ }
+
+ findCyclicPostDom(g, cyclic, dead);
+ if (!dead.empty()) {
+ remove_edges(dead, g);
+ pruneUseless(g);
+ dead.clear();
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
index 941844d061..617aed6b37 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
@@ -1,54 +1,54 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph reductions.
- */
-
-#ifndef NG_REDUNDANCY_H
-#define NG_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** Attempt to make the NFA graph \p g smaller by performing a number of local
- * transformations. */
-bool removeRedundancy(NGHolder &g, som_type som);
-
-/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
- * reachability that is a superset of all paths feeding into that edge. Returns
- * true if any edges/vertices were removed. */
-bool removeCyclicDominated(NGHolder &g, som_type som);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph reductions.
+ */
+
+#ifndef NG_REDUNDANCY_H
+#define NG_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** Attempt to make the NFA graph \p g smaller by performing a number of local
+ * transformations. */
+bool removeRedundancy(NGHolder &g, som_type som);
+
+/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
+ * reachability that is a superset of all paths feeding into that edge. Returns
+ * true if any edges/vertices were removed. */
+bool removeCyclicDominated(NGHolder &g, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
index 2675be643f..a879e34695 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
@@ -1,476 +1,476 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region analysis.
- *
- * Definition: a \a region is a subset of vertices in a graph such that:
- * - the edges entering the region are a cutset of the graph
- * - for every in-edge (u, v) to the region there exist edges (u, w) for all
- * w in {w : w in region and w has an in-edge}
- * - the regions in a graph partition the graph
- *
- * Note:
- * - we partition a graph into the maximal number of regions
- * - similar properties for exit edges should hold as a consequence
- * - graph == sequence of regions
- * - a region is considered to have an epsilon vertex to allow jumps
- * - vertices which only lead to back edges need to be floated up in the topo
- * order
- *
- * Algorithm overview:
- * -# topo-order over the DAG skeleton;
- * -# incrementally add vertices to the current region until the boundary edges
- * form a valid cut-set;
- * -# for each back-edge, if the source and target are in different regions,
- * merge the regions (and all intervening regions) into a common region.
- */
-#include "ng_region.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region analysis.
+ *
+ * Definition: a \a region is a subset of vertices in a graph such that:
+ * - the edges entering the region are a cutset of the graph
+ * - for every in-edge (u, v) to the region there exist edges (u, w) for all
+ * w in {w : w in region and w has an in-edge}
+ * - the regions in a graph partition the graph
+ *
+ * Note:
+ * - we partition a graph into the maximal number of regions
+ * - similar properties for exit edges should hold as a consequence
+ * - graph == sequence of regions
+ * - a region is considered to have an epsilon vertex to allow jumps
+ * - vertices which only lead to back edges need to be floated up in the topo
+ * order
+ *
+ * Algorithm overview:
+ * -# topo-order over the DAG skeleton;
+ * -# incrementally add vertices to the current region until the boundary edges
+ * form a valid cut-set;
+ * -# for each back-edge, if the source and target are in different regions,
+ * merge the regions (and all intervening regions) into a common region.
+ */
+#include "ng_region.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <set>
-#include <utility>
-#include <vector>
-
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
+
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
using BackEdgeSet = unordered_set<NFAEdge>;
using AcyclicGraph =
boost::filtered_graph<NGHolder, bad_edge_filter<BackEdgeSet>>;
-
-namespace {
-struct exit_info {
- explicit exit_info(NFAVertex v) : exit(v) {}
-
- NFAVertex exit;
+
+namespace {
+struct exit_info {
+ explicit exit_info(NFAVertex v) : exit(v) {}
+
+ NFAVertex exit;
flat_set<NFAVertex> open;
-};
-}
-
-static
-void checkAndAddExitCandidate(const AcyclicGraph &g,
+};
+}
+
+static
+void checkAndAddExitCandidate(const AcyclicGraph &g,
const unordered_set<NFAVertex> &r, NFAVertex v,
vector<exit_info> &exits) {
exit_info v_exit(v);
auto &open = v_exit.open;
-
- /* find the set of vertices reachable from v which are not in r */
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!contains(r, w)) {
+
+ /* find the set of vertices reachable from v which are not in r */
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!contains(r, w)) {
open.insert(w);
- }
- }
-
+ }
+ }
+
if (!open.empty()) {
DEBUG_PRINTF("exit %zu\n", g[v].index);
exits.push_back(move(v_exit));
- }
-}
-
-static
+ }
+}
+
+static
void findExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r,
vector<exit_info> &exits) {
exits.clear();
- for (auto v : r) {
- checkAndAddExitCandidate(g, r, v, exits);
- }
-}
-
-static
+ for (auto v : r) {
+ checkAndAddExitCandidate(g, r, v, exits);
+ }
+}
+
+static
void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r,
NFAVertex new_v, vector<exit_info> &exits) {
/* new_v is no long an open edge */
for (auto &exit : exits) {
exit.open.erase(new_v);
- }
-
+ }
+
/* no open edges: no longer an exit */
exits.erase(remove_if(exits.begin(), exits.end(),
[&](const exit_info &exit) { return exit.open.empty(); }),
exits.end());
- checkAndAddExitCandidate(g, r, new_v, exits);
-}
-
-/** the set of exits from a candidate region are valid if: FIXME: document
- */
-static
-bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits,
+ checkAndAddExitCandidate(g, r, new_v, exits);
+}
+
+/** the set of exits from a candidate region are valid if: FIXME: document
+ */
+static
+bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits,
const flat_set<NFAVertex> &open_jumps) {
- if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) {
- return true;
- }
- if (exits.size() == 1 && open_jumps.size() == 1) {
+ if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) {
+ return true;
+ }
+ if (exits.size() == 1 && open_jumps.size() == 1) {
DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index,
- g[exits[0].exit].index);
- if (*open_jumps.begin() == exits[0].exit) {
- return true;
- }
- }
-
- assert(!exits.empty());
- const auto &enters = exits.front().open;
-
- if (!open_jumps.empty() && enters != open_jumps) {
- return false;
- }
-
- for (auto it = begin(exits) + 1; it != end(exits); ++it) {
- if (it->open != enters) {
- return false;
- }
- }
-
- return true;
-}
-
-static
+ g[exits[0].exit].index);
+ if (*open_jumps.begin() == exits[0].exit) {
+ return true;
+ }
+ }
+
+ assert(!exits.empty());
+ const auto &enters = exits.front().open;
+
+ if (!open_jumps.empty() && enters != open_jumps) {
+ return false;
+ }
+
+ for (auto it = begin(exits) + 1; it != end(exits); ++it) {
+ if (it->open != enters) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
void setRegion(const unordered_set<NFAVertex> &r, u32 rid,
unordered_map<NFAVertex, u32> &regions) {
- for (auto v : r) {
- regions[v] = rid;
- }
-}
-
-static
-void buildInitialCandidate(const AcyclicGraph &g,
- vector<NFAVertex>::const_reverse_iterator &it,
- const vector<NFAVertex>::const_reverse_iterator &ite,
+ for (auto v : r) {
+ regions[v] = rid;
+ }
+}
+
+static
+void buildInitialCandidate(const AcyclicGraph &g,
+ vector<NFAVertex>::const_reverse_iterator &it,
+ const vector<NFAVertex>::const_reverse_iterator &ite,
unordered_set<NFAVertex> &candidate,
- /* in exits of prev region;
- * out exits from candidate */
+ /* in exits of prev region;
+ * out exits from candidate */
vector<exit_info> &exits,
flat_set<NFAVertex> &open_jumps) {
- if (it == ite) {
+ if (it == ite) {
candidate.clear();
exits.clear();
- return;
- }
-
+ return;
+ }
+
if (exits.empty()) {
- DEBUG_PRINTF("odd\n");
+ DEBUG_PRINTF("odd\n");
candidate.clear();
DEBUG_PRINTF("adding %zu to initial\n", g[*it].index);
candidate.insert(*it);
open_jumps.erase(*it);
checkAndAddExitCandidate(g, candidate, *it, exits);
- ++it;
- return;
- }
-
+ ++it;
+ return;
+ }
+
// Note: findExits() will clear exits, so it's safe to mutate/move its
// elements here.
auto &enters = exits.front().open;
candidate.clear();
-
- for (; it != ite; ++it) {
+
+ for (; it != ite; ++it) {
DEBUG_PRINTF("adding %zu to initial\n", g[*it].index);
candidate.insert(*it);
- if (contains(enters, *it)) {
- break;
- }
- }
-
- if (it != ite) {
- enters.erase(*it);
+ if (contains(enters, *it)) {
+ break;
+ }
+ }
+
+ if (it != ite) {
+ enters.erase(*it);
open_jumps = move(enters);
DEBUG_PRINTF("oj size = %zu\n", open_jumps.size());
- ++it;
- } else {
+ ++it;
+ } else {
open_jumps.clear();
- }
-
+ }
+
findExits(g, candidate, exits);
-}
-
-static
-void findDagLeaders(const NGHolder &h, const AcyclicGraph &g,
- const vector<NFAVertex> &topo,
+}
+
+static
+void findDagLeaders(const NGHolder &h, const AcyclicGraph &g,
+ const vector<NFAVertex> &topo,
unordered_map<NFAVertex, u32> &regions) {
- assert(!topo.empty());
- u32 curr_id = 0;
+ assert(!topo.empty());
+ u32 curr_id = 0;
auto t_it = topo.rbegin();
unordered_set<NFAVertex> candidate;
flat_set<NFAVertex> open_jumps;
DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index);
- assert(t_it != topo.rend());
- candidate.insert(*t_it++);
+ assert(t_it != topo.rend());
+ candidate.insert(*t_it++);
DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index);
- assert(t_it != topo.rend());
- candidate.insert(*t_it++);
-
+ assert(t_it != topo.rend());
+ candidate.insert(*t_it++);
+
vector<exit_info> exits;
findExits(g, candidate, exits);
- while (t_it != topo.rend()) {
- assert(!candidate.empty());
-
- if (exitValid(g, exits, open_jumps)) {
- if (contains(candidate, h.accept) && !open_jumps.empty()) {
- /* we have tried to make an optional region containing accept as
- * we have an open jump to eod. This candidate region needs to
- * be put in with the previous region. */
- curr_id--;
- DEBUG_PRINTF("merging in with region %u\n", curr_id);
- } else {
- DEBUG_PRINTF("setting region %u\n", curr_id);
- }
- setRegion(candidate, curr_id++, regions);
+ while (t_it != topo.rend()) {
+ assert(!candidate.empty());
+
+ if (exitValid(g, exits, open_jumps)) {
+ if (contains(candidate, h.accept) && !open_jumps.empty()) {
+ /* we have tried to make an optional region containing accept as
+ * we have an open jump to eod. This candidate region needs to
+ * be put in with the previous region. */
+ curr_id--;
+ DEBUG_PRINTF("merging in with region %u\n", curr_id);
+ } else {
+ DEBUG_PRINTF("setting region %u\n", curr_id);
+ }
+ setRegion(candidate, curr_id++, regions);
buildInitialCandidate(g, t_it, topo.rend(), candidate, exits,
open_jumps);
- } else {
- NFAVertex curr = *t_it;
+ } else {
+ NFAVertex curr = *t_it;
DEBUG_PRINTF("adding %zu to current\n", g[curr].index);
- candidate.insert(curr);
- open_jumps.erase(curr);
+ candidate.insert(curr);
+ open_jumps.erase(curr);
refineExits(g, candidate, *t_it, exits);
- DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(),
- exits.size());
- ++t_it;
- }
- }
- /* assert exits valid */
- setRegion(candidate, curr_id, regions);
-}
-
-static
-void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo,
- const BackEdgeSet &backEdges,
+ DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(),
+ exits.size());
+ ++t_it;
+ }
+ }
+ /* assert exits valid */
+ setRegion(candidate, curr_id, regions);
+}
+
+static
+void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo,
+ const BackEdgeSet &backEdges,
unordered_map<NFAVertex, u32> &regions) {
- for (const auto &e : backEdges) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
-
- u32 ru = regions[u];
- u32 rv = regions[v];
- if (ru == rv) {
- continue;
- }
-
+ for (const auto &e : backEdges) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+
+ u32 ru = regions[u];
+ u32 rv = regions[v];
+ if (ru == rv) {
+ continue;
+ }
+
DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv,
- g[u].index, ru);
- assert(rv < ru);
-
- for (auto t : topo) {
- u32 r = regions[t];
- if (r <= ru && r > rv) {
- regions[t] = rv;
- } else if (r > ru) {
- regions[t] = rv + r - ru;
- }
- }
- }
-}
-
-static
-void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g,
- vector<NFAVertex> &topoOrder) {
- // Start is last element of reverse topo ordering.
- auto it = find(topoOrder.begin(), topoOrder.end(), w.start);
- if (it != topoOrder.end() - 1) {
- DEBUG_PRINTF("repositioning start\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.end(), w.start);
- }
-
- // StartDs is second-to-last element of reverse topo ordering.
- it = find(topoOrder.begin(), topoOrder.end(), w.startDs);
- if (it != topoOrder.end() - 2) {
- DEBUG_PRINTF("repositioning start ds\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.end() - 1, w.startDs);
- }
-
- // AcceptEOD is first element of reverse topo ordering.
- it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod);
- if (it != topoOrder.begin()) {
- DEBUG_PRINTF("repositioning accept\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.begin(), w.acceptEod);
- }
-
- // Accept is second element of reverse topo ordering, if it's connected.
- it = find(topoOrder.begin(), topoOrder.end(), w.accept);
- if (it != topoOrder.begin() + 1) {
- DEBUG_PRINTF("repositioning accept\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- if (in_degree(w.accept, acyclic_g) != 0) {
- topoOrder.insert(topoOrder.begin() + 1, w.accept);
- }
- }
-}
-
-static
-void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) {
+ g[u].index, ru);
+ assert(rv < ru);
+
+ for (auto t : topo) {
+ u32 r = regions[t];
+ if (r <= ru && r > rv) {
+ regions[t] = rv;
+ } else if (r > ru) {
+ regions[t] = rv + r - ru;
+ }
+ }
+ }
+}
+
+static
+void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g,
+ vector<NFAVertex> &topoOrder) {
+ // Start is last element of reverse topo ordering.
+ auto it = find(topoOrder.begin(), topoOrder.end(), w.start);
+ if (it != topoOrder.end() - 1) {
+ DEBUG_PRINTF("repositioning start\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.end(), w.start);
+ }
+
+ // StartDs is second-to-last element of reverse topo ordering.
+ it = find(topoOrder.begin(), topoOrder.end(), w.startDs);
+ if (it != topoOrder.end() - 2) {
+ DEBUG_PRINTF("repositioning start ds\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.end() - 1, w.startDs);
+ }
+
+ // AcceptEOD is first element of reverse topo ordering.
+ it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod);
+ if (it != topoOrder.begin()) {
+ DEBUG_PRINTF("repositioning accept\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.begin(), w.acceptEod);
+ }
+
+ // Accept is second element of reverse topo ordering, if it's connected.
+ it = find(topoOrder.begin(), topoOrder.end(), w.accept);
+ if (it != topoOrder.begin() + 1) {
+ DEBUG_PRINTF("repositioning accept\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ if (in_degree(w.accept, acyclic_g) != 0) {
+ topoOrder.insert(topoOrder.begin() + 1, w.accept);
+ }
+ }
+}
+
+static
+void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) {
unordered_set<NFAVertex> sinks;
- for (auto v : vertices_range(acyclic_g)) {
- if (is_special(v, acyclic_g)) {
- continue;
- }
-
- if (isLeafNode(v, acyclic_g)) {
+ for (auto v : vertices_range(acyclic_g)) {
+ if (is_special(v, acyclic_g)) {
+ continue;
+ }
+
+ if (isLeafNode(v, acyclic_g)) {
DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index);
sinks.insert(NFAVertex(v));
- }
- }
-
- if (sinks.empty()) {
- DEBUG_PRINTF("no sinks found\n");
- return;
- }
-
- bool changed;
- do {
- DEBUG_PRINTF("look\n");
- changed = false;
- for (auto v : vertices_range(acyclic_g)) {
+ }
+ }
+
+ if (sinks.empty()) {
+ DEBUG_PRINTF("no sinks found\n");
+ return;
+ }
+
+ bool changed;
+ do {
+ DEBUG_PRINTF("look\n");
+ changed = false;
+ for (auto v : vertices_range(acyclic_g)) {
if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) {
- continue;
- }
-
- for (auto w : adjacent_vertices_range(v, acyclic_g)) {
+ continue;
+ }
+
+ for (auto w : adjacent_vertices_range(v, acyclic_g)) {
if (!contains(sinks, NFAVertex(w))) {
- goto next;
- }
- }
-
+ goto next;
+ }
+ }
+
DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index);
sinks.insert(NFAVertex(v));
- changed = true;
- next:;
- }
- } while (changed);
-
- for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) {
- if (!contains(sinks, *ri)) {
- continue;
- }
- NFAVertex s = *ri;
+ changed = true;
+ next:;
+ }
+ } while (changed);
+
+ for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) {
+ if (!contains(sinks, *ri)) {
+ continue;
+ }
+ NFAVertex s = *ri;
DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index);
unordered_set<NFAVertex> parents;
- for (const auto &e : in_edges_range(s, acyclic_g)) {
+ for (const auto &e : in_edges_range(s, acyclic_g)) {
parents.insert(NFAVertex(source(e, acyclic_g)));
- }
-
- /* vertex has no children not reachable on a back edge, bubble the
- * vertex up the topo order to be near its parents */
- vector<NFAVertex>::reverse_iterator rj = ri;
- --rj;
- while (rj != topoOrder.rbegin() && !contains(parents, *rj)) {
- /* sink is in rj + 1 */
- assert(*(rj + 1) == s);
- DEBUG_PRINTF("lifting\n");
- using std::swap;
- swap(*rj, *(rj + 1));
- --rj;
- }
- }
-}
-
+ }
+
+ /* vertex has no children not reachable on a back edge, bubble the
+ * vertex up the topo order to be near its parents */
+ vector<NFAVertex>::reverse_iterator rj = ri;
+ --rj;
+ while (rj != topoOrder.rbegin() && !contains(parents, *rj)) {
+ /* sink is in rj + 1 */
+ assert(*(rj + 1) == s);
+ DEBUG_PRINTF("lifting\n");
+ using std::swap;
+ swap(*rj, *(rj + 1));
+ --rj;
+ }
+ }
+}
+
using ColorMap = decltype(make_small_color_map(NGHolder()));
-/** Build a reverse topo ordering (with only the specials that are in use). We
- * also want to ensure vertices which only lead to back edges are placed near
- * their parents. */
-static
-vector<NFAVertex> buildTopoOrder(const NGHolder &w,
- const AcyclicGraph &acyclic_g,
+/** Build a reverse topo ordering (with only the specials that are in use). We
+ * also want to ensure vertices which only lead to back edges are placed near
+ * their parents. */
+static
+vector<NFAVertex> buildTopoOrder(const NGHolder &w,
+ const AcyclicGraph &acyclic_g,
ColorMap &colours) {
- vector<NFAVertex> topoOrder;
+ vector<NFAVertex> topoOrder;
topoOrder.reserve(num_vertices(w));
-
+
topological_sort(acyclic_g, back_inserter(topoOrder),
color_map(colours));
-
- reorderSpecials(w, acyclic_g, topoOrder);
-
- if (topoOrder.empty()) {
- return topoOrder;
- }
-
- liftSinks(acyclic_g, topoOrder);
-
- DEBUG_PRINTF("TOPO ORDER\n");
- for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) {
+
+ reorderSpecials(w, acyclic_g, topoOrder);
+
+ if (topoOrder.empty()) {
+ return topoOrder;
+ }
+
+ liftSinks(acyclic_g, topoOrder);
+
+ DEBUG_PRINTF("TOPO ORDER\n");
+ for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) {
DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index);
- }
- DEBUG_PRINTF("----------\n");
-
- return topoOrder;
-}
-
+ }
+ DEBUG_PRINTF("----------\n");
+
+ return topoOrder;
+}
+
unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const u32 numVertices = num_vertices(g);
- DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const u32 numVertices = num_vertices(g);
+ DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices);
+
auto colours = make_small_color_map(g);
-
- // Build an acyclic graph for this NGHolder.
- BackEdgeSet deadEdges;
+
+ // Build an acyclic graph for this NGHolder.
+ BackEdgeSet deadEdges;
depth_first_search(g,
visitor(BackEdges<BackEdgeSet>(deadEdges))
.root_vertex(g.start)
.color_map(colours));
-
+
auto af = make_bad_edge_filter(&deadEdges);
AcyclicGraph acyclic_g(g, af);
-
- // Build a (reverse) topological ordering.
- vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours);
-
- // Everybody starts in region 0.
+
+ // Build a (reverse) topological ordering.
+ vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours);
+
+ // Everybody starts in region 0.
unordered_map<NFAVertex, u32> regions;
- regions.reserve(numVertices);
- for (auto v : vertices_range(g)) {
- regions.emplace(v, 0);
- }
-
- findDagLeaders(g, acyclic_g, topoOrder, regions);
- mergeUnderBackEdges(g, topoOrder, deadEdges, regions);
-
- return regions;
-}
-
-} // namespace ue2
+ regions.reserve(numVertices);
+ for (auto v : vertices_range(g)) {
+ regions.emplace(v, 0);
+ }
+
+ findDagLeaders(g, acyclic_g, topoOrder, regions);
+ mergeUnderBackEdges(g, topoOrder, deadEdges, regions);
+
+ return regions;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.h b/contrib/libs/hyperscan/src/nfagraph/ng_region.h
index a4708a582e..dec8ea7a04 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.h
@@ -1,219 +1,219 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region analysis and utility functions.
- */
-
-#ifndef NG_REGION_H
-#define NG_REGION_H
-
-#include "ng_holder.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region analysis and utility functions.
+ */
+
+#ifndef NG_REGION_H
+#define NG_REGION_H
+
+#include "ng_holder.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-/** \brief Assign a region ID to every vertex in the graph. */
+#include <vector>
+
+namespace ue2 {
+
+/** \brief Assign a region ID to every vertex in the graph. */
std::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g);
-
-/** \brief True if vertices \p a and \p b are in the same region. */
-template <class Graph>
-bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b,
+
+/** \brief True if vertices \p a and \p b are in the same region. */
+template <class Graph>
+bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- return region_map.at(a) == region_map.at(b) &&
- is_special(a, g) == is_special(b, g);
-}
-
-/** \brief True if vertex \p b is in a later region than vertex \p a. */
-template <class Graph>
-bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ return region_map.at(a) == region_map.at(b) &&
+ is_special(a, g) == is_special(b, g);
+}
+
+/** \brief True if vertex \p b is in a later region than vertex \p a. */
+template <class Graph>
+bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- u32 aa = g[a].index;
- u32 bb = g[b].index;
-
- if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
- return false;
- }
-
- if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
- return true;
- }
-
- if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
- return true;
- }
- if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
- return false;
- }
-
- return region_map.at(a) < region_map.at(b);
-}
-
-/** \brief True if vertex \p b is in an earlier region than vertex \p a. */
-template <class Graph>
-bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ u32 aa = g[a].index;
+ u32 bb = g[b].index;
+
+ if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
+ return true;
+ }
+
+ if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
+ return true;
+ }
+ if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
+ return false;
+ }
+
+ return region_map.at(a) < region_map.at(b);
+}
+
+/** \brief True if vertex \p b is in an earlier region than vertex \p a. */
+template <class Graph>
+bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- u32 aa = g[a].index;
- u32 bb = g[b].index;
-
- if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
- return true;
- }
-
- if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
- return false;
- }
-
- if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
- return false;
- }
- if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
- return true;
- }
-
- return region_map.at(b) < region_map.at(a);
-}
-
-/** \brief True if vertex \p v is an entry vertex for its region. */
-template <class Graph>
-bool isRegionEntry(const Graph &g, NFAVertex v,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ u32 aa = g[a].index;
+ u32 bb = g[b].index;
+
+ if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
+ return true;
+ }
+
+ if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
+ return false;
+ }
+ if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
+ return true;
+ }
+
+ return region_map.at(b) < region_map.at(a);
+}
+
+/** \brief True if vertex \p v is an entry vertex for its region. */
+template <class Graph>
+bool isRegionEntry(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- // Note that some graph types do not have inv_adjacent_vertices, so we must
- // use in_edges here.
- for (const auto &e : in_edges_range(v, g)) {
- if (!inSameRegion(g, v, source(e, g), region_map)) {
- return true;
- }
- }
-
- return false;
-}
-
-/** \brief True if vertex \p v is an exit vertex for its region. */
-template <class Graph>
-bool isRegionExit(const Graph &g, NFAVertex v,
+ // Note that some graph types do not have inv_adjacent_vertices, so we must
+ // use in_edges here.
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!inSameRegion(g, v, source(e, g), region_map)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** \brief True if vertex \p v is an exit vertex for its region. */
+template <class Graph>
+bool isRegionExit(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!inSameRegion(g, v, w, region_map)) {
- return true;
- }
- }
-
- return false;
-}
-
-/** \brief True if vertex \p v is in a region all on its own. */
-template <class Graph>
-bool isSingletonRegion(const Graph &g, NFAVertex v,
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!inSameRegion(g, v, w, region_map)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** \brief True if vertex \p v is in a region all on its own. */
+template <class Graph>
+bool isSingletonRegion(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- for (const auto &e : in_edges_range(v, g)) {
- auto u = source(e, g);
- if (u != v && inSameRegion(g, v, u, region_map)) {
- return false;
- }
-
- for (auto w : ue2::adjacent_vertices_range(u, g)) {
- if (w != v && inSameRegion(g, v, w, region_map)) {
- return false;
- }
- }
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && inSameRegion(g, v, w, region_map)) {
- return false;
- }
-
- for (const auto &e : in_edges_range(w, g)) {
- auto u = source(e, g);
- if (u != v && inSameRegion(g, v, u, region_map)) {
- return false;
- }
- }
-
- return true;
- }
-
- return true;
-}
-
-/**
- * \brief True if the region containing vertex \p v is optional. The vertex \p v
- * should be a region leader.
- */
-template <class Graph>
-bool isOptionalRegion(const Graph &g, NFAVertex v,
+ for (const auto &e : in_edges_range(v, g)) {
+ auto u = source(e, g);
+ if (u != v && inSameRegion(g, v, u, region_map)) {
+ return false;
+ }
+
+ for (auto w : ue2::adjacent_vertices_range(u, g)) {
+ if (w != v && inSameRegion(g, v, w, region_map)) {
+ return false;
+ }
+ }
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && inSameRegion(g, v, w, region_map)) {
+ return false;
+ }
+
+ for (const auto &e : in_edges_range(w, g)) {
+ auto u = source(e, g);
+ if (u != v && inSameRegion(g, v, u, region_map)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ return true;
+}
+
+/**
+ * \brief True if the region containing vertex \p v is optional. The vertex \p v
+ * should be a region leader.
+ */
+template <class Graph>
+bool isOptionalRegion(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(isRegionEntry(g, v, region_map));
-
+ assert(isRegionEntry(g, v, region_map));
+
DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n",
- region_map.at(v), g[v].index);
-
- // Region zero is never optional.
- assert(contains(region_map, v));
- if (region_map.at(v) == 0) {
- return false;
- }
-
- // Optional if v has a predecessor in an earlier region that has a
- // successor in a later one.
-
- for (const auto &e : in_edges_range(v, g)) {
- auto u = source(e, g);
- if (inSameRegion(g, v, u, region_map)) {
- continue;
- }
+ region_map.at(v), g[v].index);
+
+ // Region zero is never optional.
+ assert(contains(region_map, v));
+ if (region_map.at(v) == 0) {
+ return false;
+ }
+
+ // Optional if v has a predecessor in an earlier region that has a
+ // successor in a later one.
+
+ for (const auto &e : in_edges_range(v, g)) {
+ auto u = source(e, g);
+ if (inSameRegion(g, v, u, region_map)) {
+ continue;
+ }
DEBUG_PRINTF(" searching from u=%zu\n", g[u].index);
-
- assert(inEarlierRegion(g, v, u, region_map));
-
- for (auto w : adjacent_vertices_range(u, g)) {
+
+ assert(inEarlierRegion(g, v, u, region_map));
+
+ for (auto w : adjacent_vertices_range(u, g)) {
DEBUG_PRINTF(" searching to w=%zu\n", g[w].index);
- if (inLaterRegion(g, v, w, region_map)) {
- return true;
- }
- }
- return false;
- }
-
- return false;
-}
-
-} // namespace ue2
-
-#endif
+ if (inLaterRegion(g, v, w, region_map)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ return false;
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
index 1126d4d6c9..4eecb1f917 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
@@ -1,270 +1,270 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region Redundancy optimisation pass.
- *
- * Identifies and removes entire regions that are adjacent to a cyclic state
- * with a superset of their character reachability.
- */
-#include "ng_region_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_region.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Precalculated information about a region. */
-struct RegionInfo {
- NFAVertex entry; //!< arbitrary entry vertex
- CharReach cr; //!< union of the reach of all vertices in region
-};
-
-} // namespace
-
-static
-bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
- const flat_set<ReportID> &expected_reports,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region Redundancy optimisation pass.
+ *
+ * Identifies and removes entire regions that are adjacent to a cyclic state
+ * with a superset of their character reachability.
+ */
+#include "ng_region_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_region.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Precalculated information about a region. */
+struct RegionInfo {
+ NFAVertex entry; //!< arbitrary entry vertex
+ CharReach cr; //!< union of the reach of all vertices in region
+};
+
+} // namespace
+
+static
+bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
+ const flat_set<ReportID> &expected_reports,
const unordered_map<NFAVertex, u32> &region_map) {
- /* TODO: only check vertices connected to accept/acceptEOD */
- for (auto v : vertices_range(g)) {
- if (region != region_map.at(v)) {
- continue;
- }
-
- if (is_any_accept(v, g)) {
- return true; /* encountering an actual special in the region is
- * possible but definitely unexpected */
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_any_accept(w, g) && g[v].reports != expected_reports) {
- return true;
- }
- }
- }
- return false;
-}
-
-static
-void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
- const map<u32, RegionInfo> &info,
+ /* TODO: only check vertices connected to accept/acceptEOD */
+ for (auto v : vertices_range(g)) {
+ if (region != region_map.at(v)) {
+ continue;
+ }
+
+ if (is_any_accept(v, g)) {
+ return true; /* encountering an actual special in the region is
+ * possible but definitely unexpected */
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_any_accept(w, g) && g[v].reports != expected_reports) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static
+void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
+ const map<u32, RegionInfo> &info,
const unordered_map<NFAVertex, u32> &region_map,
- set<u32> &deadRegions) {
- u32 region = region_map.at(cyc);
- CharReach cr = h[cyc].char_reach;
- auto reports = h[cyc].reports;
-
+ set<u32> &deadRegions) {
+ u32 region = region_map.at(cyc);
+ CharReach cr = h[cyc].char_reach;
+ auto reports = h[cyc].reports;
+
DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index,
- region);
-
- map<u32, RegionInfo>::const_iterator it;
- while ((it = info.find(++region)) != info.end()) {
- NFAVertex v = it->second.entry;
- const CharReach &region_cr = it->second.cr;
- assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
+ region);
+
+ map<u32, RegionInfo>::const_iterator it;
+ while ((it = info.find(++region)) != info.end()) {
+ NFAVertex v = it->second.entry;
+ const CharReach &region_cr = it->second.cr;
+ assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
DEBUG_PRINTF("checking %zu\n", h[v].index);
-
- if (!region_cr.isSubsetOf(cr)) {
- DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
- break;
- }
-
- if (isOptionalRegion(h, v, region_map)
- && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
+
+ if (!region_cr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
+ break;
+ }
+
+ if (isOptionalRegion(h, v, region_map)
+ && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
DEBUG_PRINTF("cyclic state %zu leads to optional region leader"
" %zu\n", h[cyc].index, h[v].index);
- deadRegions.insert(region);
- } else if (isSingletonRegion(h, v, region_map)) {
- /* we can use this region as straw and suck in optional regions on
- * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
- cr = h[v].char_reach;
- reports = h[v].reports;
- DEBUG_PRINTF("%u is straw\n", region);
- assert(cr.isSubsetOf(h[cyc].char_reach));
- if (hasSelfLoop(v, h)) {
- DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
- remove_edge(v, v, h);
- }
- } else {
- break;
- }
- }
-}
-
-static
-void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
- const map<u32, RegionInfo> &info,
+ deadRegions.insert(region);
+ } else if (isSingletonRegion(h, v, region_map)) {
+ /* we can use this region as straw and suck in optional regions on
+ * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
+ cr = h[v].char_reach;
+ reports = h[v].reports;
+ DEBUG_PRINTF("%u is straw\n", region);
+ assert(cr.isSubsetOf(h[cyc].char_reach));
+ if (hasSelfLoop(v, h)) {
+ DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
+ remove_edge(v, v, h);
+ }
+ } else {
+ break;
+ }
+ }
+}
+
+static
+void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
+ const map<u32, RegionInfo> &info,
const unordered_map<NFAVertex, u32> &region_map,
- set<u32> &deadRegions) {
- u32 region = region_map.at(cyc);
- CharReach cr = h[cyc].char_reach;
- auto reports = h[cyc].reports;
-
+ set<u32> &deadRegions) {
+ u32 region = region_map.at(cyc);
+ CharReach cr = h[cyc].char_reach;
+ auto reports = h[cyc].reports;
+
DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region);
-
- map<u32, RegionInfo>::const_iterator it;
- while ((it = info.find(--region)) != info.end()) {
- NFAVertex v = it->second.entry;
- const CharReach &region_cr = it->second.cr;
- assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
+
+ map<u32, RegionInfo>::const_iterator it;
+ while ((it = info.find(--region)) != info.end()) {
+ NFAVertex v = it->second.entry;
+ const CharReach &region_cr = it->second.cr;
+ assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
DEBUG_PRINTF("checking %zu\n", h[v].index);
-
- if (!region_cr.isSubsetOf(cr)) {
- DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
- break;
- }
-
- if (isOptionalRegion(h, v, region_map)
- && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
+
+ if (!region_cr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
+ break;
+ }
+
+ if (isOptionalRegion(h, v, region_map)
+ && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n",
- h[cyc].index, h[v].index);
- deadRegions.insert(region);
- } else if (isSingletonRegion(h, v, region_map)) {
- /* we can use this region as a reverse straw and suck in optional
- * regions on the other side. This allows us to transform
- * /^a?a{n}.*b/ to /^a{n}.*b/ */
- cr = h[v].char_reach;
- reports = h[v].reports;
- DEBUG_PRINTF("%u is straw\n", region);
- assert(cr.isSubsetOf(h[cyc].char_reach));
- if (hasSelfLoop(v, h)) {
- DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
- remove_edge(v, v, h);
- }
- } else {
- break;
- }
-
- if (!region) { // No wrapping
- break;
- }
- }
-}
-
-static
-map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
+ h[cyc].index, h[v].index);
+ deadRegions.insert(region);
+ } else if (isSingletonRegion(h, v, region_map)) {
+ /* we can use this region as a reverse straw and suck in optional
+ * regions on the other side. This allows us to transform
+ * /^a?a{n}.*b/ to /^a{n}.*b/ */
+ cr = h[v].char_reach;
+ reports = h[v].reports;
+ DEBUG_PRINTF("%u is straw\n", region);
+ assert(cr.isSubsetOf(h[cyc].char_reach));
+ if (hasSelfLoop(v, h)) {
+ DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
+ remove_edge(v, v, h);
+ }
+ } else {
+ break;
+ }
+
+ if (!region) { // No wrapping
+ break;
+ }
+ }
+}
+
+static
+map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map) {
- map<u32, RegionInfo> info;
-
- for (auto v : vertices_range(g)) {
- u32 region = region_map.at(v);
- if (is_special(v, g) || region == 0) {
- continue;
- }
-
- RegionInfo &ri = info[region];
- ri.cr |= g[v].char_reach;
- if (isRegionEntry(g, v, region_map)) {
- ri.entry = v;
- }
- }
-
- return info;
-}
-
-static
-bool hasNoStartAnchoring(const NGHolder &h) {
- for (auto v : adjacent_vertices_range(h.start, h)) {
- if (!edge(h.startDs, v, h).second) {
- return false;
- }
- }
- return true;
-}
-
-void removeRegionRedundancy(NGHolder &g, som_type som) {
- auto region_map = assignRegions(g);
-
- map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
-
- set<u32> deadRegions;
-
- /* if we are not tracking som, we can treat sds as a cyclic region if there
- * is no anchoring */
- if (!som && hasNoStartAnchoring(g)) {
- processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
- }
-
- // Walk the region mapping, looking for regions that consist of a single
- // cyclic node.
-
- for (const auto &m : info) {
- // Must not have already been removed
- if (contains(deadRegions, m.first)) {
- continue;
- }
-
- NFAVertex v = m.second.entry;
- /* require a singleton cyclic region */
- if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
- continue;
- }
-
- if (som && is_virtual_start(v, g)) {
- continue;
- }
-
- processCyclicStateForward(g, v, info, region_map, deadRegions);
- processCyclicStateReverse(g, v, info, region_map, deadRegions);
- }
-
- if (deadRegions.empty()) {
- return;
- }
-
- vector<NFAVertex> dead;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- u32 region = region_map.at(v);
- if (contains(deadRegions, region)) {
- dead.push_back(v);
- }
- }
-
- if (!dead.empty()) {
- DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
- dead.size(), deadRegions.size());
- remove_vertices(dead, g);
- }
-}
-
-} // namespace ue2
+ map<u32, RegionInfo> info;
+
+ for (auto v : vertices_range(g)) {
+ u32 region = region_map.at(v);
+ if (is_special(v, g) || region == 0) {
+ continue;
+ }
+
+ RegionInfo &ri = info[region];
+ ri.cr |= g[v].char_reach;
+ if (isRegionEntry(g, v, region_map)) {
+ ri.entry = v;
+ }
+ }
+
+ return info;
+}
+
+static
+bool hasNoStartAnchoring(const NGHolder &h) {
+ for (auto v : adjacent_vertices_range(h.start, h)) {
+ if (!edge(h.startDs, v, h).second) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void removeRegionRedundancy(NGHolder &g, som_type som) {
+ auto region_map = assignRegions(g);
+
+ map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
+
+ set<u32> deadRegions;
+
+ /* if we are not tracking som, we can treat sds as a cyclic region if there
+ * is no anchoring */
+ if (!som && hasNoStartAnchoring(g)) {
+ processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
+ }
+
+ // Walk the region mapping, looking for regions that consist of a single
+ // cyclic node.
+
+ for (const auto &m : info) {
+ // Must not have already been removed
+ if (contains(deadRegions, m.first)) {
+ continue;
+ }
+
+ NFAVertex v = m.second.entry;
+ /* require a singleton cyclic region */
+ if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
+ continue;
+ }
+
+ if (som && is_virtual_start(v, g)) {
+ continue;
+ }
+
+ processCyclicStateForward(g, v, info, region_map, deadRegions);
+ processCyclicStateReverse(g, v, info, region_map, deadRegions);
+ }
+
+ if (deadRegions.empty()) {
+ return;
+ }
+
+ vector<NFAVertex> dead;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ u32 region = region_map.at(v);
+ if (contains(deadRegions, region)) {
+ dead.push_back(v);
+ }
+ }
+
+ if (!dead.empty()) {
+ DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
+ dead.size(), deadRegions.size());
+ remove_vertices(dead, g);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
index f0b396ca43..c4b4fc958c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region Redundancy optimisation pass.
- *
- * Identifies and removes entire regions that are adjacent to a cyclic state
- * with a superset of their character reachability.
- */
-
-#ifndef NG_REGION_REDUNDANCY_H
-#define NG_REGION_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-
-void removeRegionRedundancy(NGHolder &g, som_type som);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region Redundancy optimisation pass.
+ *
+ * Identifies and removes entire regions that are adjacent to a cyclic state
+ * with a superset of their character reachability.
+ */
+
+#ifndef NG_REGION_REDUNDANCY_H
+#define NG_REGION_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+
+void removeRegionRedundancy(NGHolder &g, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
index 1f63ad3c6f..72c7eee3f3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
@@ -1,329 +1,329 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat analysis.
- */
-#include "ng_repeat.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_limex_accel.h"
-#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_som_util.h"
-#include "ng_util.h"
-#include "nfa/accel.h"
-#include "nfa/limex_limits.h"
-#include "nfa/repeat_internal.h"
-#include "nfa/repeatcompile.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat analysis.
+ */
+#include "ng_repeat.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_limex_accel.h"
+#include "ng_prune.h"
+#include "ng_reports.h"
+#include "ng_som_util.h"
+#include "ng_util.h"
+#include "nfa/accel.h"
+#include "nfa/limex_limits.h"
+#include "nfa/repeat_internal.h"
+#include "nfa/repeatcompile.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/graph_undirected.h"
-#include "util/report_manager.h"
+#include "util/report_manager.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <map>
-#include <queue>
+
+#include <algorithm>
+#include <map>
+#include <queue>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/graph/connected_components.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-#include <boost/icl/interval_set.hpp>
-
-using namespace std;
+
+#include <boost/graph/connected_components.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/icl/interval_set.hpp>
+
+using namespace std;
using boost::depth_first_search;
using boost::depth_first_visit;
using boost::make_assoc_property_map;
-
-namespace ue2 {
-
-namespace {
-
+
+namespace ue2 {
+
+namespace {
+
/**
* \brief Filter that retains only edges between vertices with the same
* reachability. Special vertices are dropped.
*/
-template<class Graph>
-struct ReachFilter {
+template<class Graph>
+struct ReachFilter {
ReachFilter() = default;
- explicit ReachFilter(const Graph *g_in) : g(g_in) {}
-
- // Convenience typedefs.
+ explicit ReachFilter(const Graph *g_in) : g(g_in) {}
+
+ // Convenience typedefs.
using Traits = typename boost::graph_traits<Graph>;
using VertexDescriptor = typename Traits::vertex_descriptor;
using EdgeDescriptor = typename Traits::edge_descriptor;
-
+
bool operator()(const VertexDescriptor &v) const {
- assert(g);
- // Disallow special vertices, as otherwise we will try to remove them
- // later.
+ assert(g);
+ // Disallow special vertices, as otherwise we will try to remove them
+ // later.
return !is_special(v, *g);
}
-
+
bool operator()(const EdgeDescriptor &e) const {
assert(g);
- // Vertices must have the same reach.
+ // Vertices must have the same reach.
auto u = source(e, *g), v = target(e, *g);
- const CharReach &cr_u = (*g)[u].char_reach;
- const CharReach &cr_v = (*g)[v].char_reach;
- return cr_u == cr_v;
- }
-
- const Graph *g = nullptr;
-};
-
+ const CharReach &cr_u = (*g)[u].char_reach;
+ const CharReach &cr_v = (*g)[v].char_reach;
+ return cr_u == cr_v;
+ }
+
+ const Graph *g = nullptr;
+};
+
using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>,
ReachFilter<NGHolder>>;
-
-struct ReachSubgraph {
- vector<NFAVertex> vertices;
+
+struct ReachSubgraph {
+ vector<NFAVertex> vertices;
depth repeatMin{0};
depth repeatMax{0};
- u32 minPeriod = 1;
- bool is_reset = false;
- enum RepeatType historyType = REPEAT_RING;
- bool bad = false; // if true, ignore this case
-};
-
-} // namespace
-
-static
-void findInitDepths(const NGHolder &g,
+ u32 minPeriod = 1;
+ bool is_reset = false;
+ enum RepeatType historyType = REPEAT_RING;
+ bool bad = false; // if true, ignore this case
+};
+
+} // namespace
+
+static
+void findInitDepths(const NGHolder &g,
unordered_map<NFAVertex, NFAVertexDepth> &depths) {
auto d = calcDepths(g);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
size_t idx = g[v].index;
- assert(idx < d.size());
+ assert(idx < d.size());
depths.emplace(v, d[idx]);
- }
-}
-
-static
+ }
+}
+
+static
vector<NFAVertex> buildTopoOrder(const RepeatGraph &g) {
/* Note: RepeatGraph is a filtered version of NGHolder and still has
* NFAVertex as its vertex descriptor */
typedef unordered_set<NFAEdge> EdgeSet;
- EdgeSet deadEdges;
-
- // We don't have indices spanning [0,N] on our filtered graph, so we
- // provide a colour map.
+ EdgeSet deadEdges;
+
+ // We don't have indices spanning [0,N] on our filtered graph, so we
+ // provide a colour map.
unordered_map<NFAVertex, boost::default_color_type> colours;
-
- depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)).
- color_map(make_assoc_property_map(colours)));
+
+ depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)).
+ color_map(make_assoc_property_map(colours)));
auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges));
-
+
vector<NFAVertex> topoOrder;
- topological_sort(acyclic_g, back_inserter(topoOrder),
- color_map(make_assoc_property_map(colours)));
-
- reverse(topoOrder.begin(), topoOrder.end());
+ topological_sort(acyclic_g, back_inserter(topoOrder),
+ color_map(make_assoc_property_map(colours)));
+
+ reverse(topoOrder.begin(), topoOrder.end());
return topoOrder;
-}
-
-static
-void proper_pred(const NGHolder &g, NFAVertex v,
+}
+
+static
+void proper_pred(const NGHolder &g, NFAVertex v,
unordered_set<NFAVertex> &p) {
- pred(g, v, &p);
- p.erase(v); // self-loops
-}
-
-static
-void proper_succ(const NGHolder &g, NFAVertex v,
+ pred(g, v, &p);
+ p.erase(v); // self-loops
+}
+
+static
+void proper_succ(const NGHolder &g, NFAVertex v,
unordered_set<NFAVertex> &s) {
- succ(g, v, &s);
- s.erase(v); // self-loops
-}
-
-static
-bool roguePredecessor(const NGHolder &g, NFAVertex v,
+ succ(g, v, &s);
+ s.erase(v); // self-loops
+}
+
+static
+bool roguePredecessor(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &pred) {
- u32 seen = 0;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(involved, u)) {
- continue;
- }
- if (!contains(pred, u)) {
+ u32 seen = 0;
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(involved, u)) {
+ continue;
+ }
+ if (!contains(pred, u)) {
DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index);
- return true;
- }
-
- seen++;
- }
-
- // We must have edges from either (a) none of our external predecessors, or
- // (b) all of our external predecessors.
- if (!seen) {
- return false;
- }
- return pred.size() != seen;
-}
-
-static
-bool rogueSuccessor(const NGHolder &g, NFAVertex v,
+ return true;
+ }
+
+ seen++;
+ }
+
+ // We must have edges from either (a) none of our external predecessors, or
+ // (b) all of our external predecessors.
+ if (!seen) {
+ return false;
+ }
+ return pred.size() != seen;
+}
+
+static
+bool rogueSuccessor(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &succ) {
- u32 seen = 0;
- for (auto w : adjacent_vertices_range(v, g)) {
- if (contains(involved, w)) {
- continue;
- }
-
- if (!contains(succ, w)) {
+ u32 seen = 0;
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (contains(involved, w)) {
+ continue;
+ }
+
+ if (!contains(succ, w)) {
DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index);
- return true;
- }
-
- seen++;
- }
-
- // We must have edges to either (a) none of our external successors, or
- // (b) all of our external successors.
- if (!seen) {
- return false;
- }
- return succ.size() != seen;
-}
-
-static
-bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) {
+ return true;
+ }
+
+ seen++;
+ }
+
+ // We must have edges to either (a) none of our external successors, or
+ // (b) all of our external successors.
+ if (!seen) {
+ return false;
+ }
+ return succ.size() != seen;
+}
+
+static
+bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) {
/* TODO: check that we need this now that we allow multiple tops */
const flat_set<u32> *tops = nullptr;
-
- for (auto v : verts) {
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (u != g.start && u != g.startDs) {
- continue; // Only edges from starts have valid top properties.
- }
+
+ for (auto v : verts) {
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (u != g.start && u != g.startDs) {
+ continue; // Only edges from starts have valid top properties.
+ }
DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index,
g[v].index, g[e].tops.size());
if (!tops) {
tops = &g[e].tops;
} else if (g[e].tops != *tops) {
return true; // More than one set of tops.
- }
- }
- }
-
- return false;
-}
-
-static
-bool vertexIsBad(const NGHolder &g, NFAVertex v,
+ }
+ }
+ }
+
+ return false;
+}
+
+static
+bool vertexIsBad(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &tail,
const unordered_set<NFAVertex> &pred,
const unordered_set<NFAVertex> &succ,
- const flat_set<ReportID> &reports) {
+ const flat_set<ReportID> &reports) {
DEBUG_PRINTF("check vertex %zu\n", g[v].index);
-
- // We must drop any vertex that is the target of a back-edge within
- // our subgraph. The tail set contains all vertices that are after v in a
- // topo ordering.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(tail, u)) {
+
+ // We must drop any vertex that is the target of a back-edge within
+ // our subgraph. The tail set contains all vertices that are after v in a
+ // topo ordering.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(tail, u)) {
DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n",
- g[u].index, g[v].index);
- return true;
- }
- }
-
- // If this vertex has an entry from outside our subgraph, it must have
- // edges from *all* the vertices in pred and no other external entries.
- // Similarly for exits.
- if (roguePredecessor(g, v, involved, pred)) {
+ g[u].index, g[v].index);
+ return true;
+ }
+ }
+
+ // If this vertex has an entry from outside our subgraph, it must have
+ // edges from *all* the vertices in pred and no other external entries.
+ // Similarly for exits.
+ if (roguePredecessor(g, v, involved, pred)) {
DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index);
- return true;
- }
-
- if (rogueSuccessor(g, v, involved, succ)) {
+ return true;
+ }
+
+ if (rogueSuccessor(g, v, involved, succ)) {
DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index);
- return true;
- }
-
- // All reporting vertices should have the same reports.
- if (is_match_vertex(v, g) && reports != g[v].reports) {
+ return true;
+ }
+
+ // All reporting vertices should have the same reports.
+ if (is_match_vertex(v, g) && reports != g[v].reports) {
DEBUG_PRINTF("report mismatch to %zu\n", g[v].index);
- return true;
- }
-
- return false;
-}
-
-static
-void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
- const u32 minNumVertices, queue<ReachSubgraph> &q) {
- DEBUG_PRINTF("entry\n");
-
- // We construct a copy of the graph using just the vertices we want, rather
- // than using a filtered_graph -- this way is faster.
- NGHolder verts_g;
+ return true;
+ }
+
+ return false;
+}
+
+static
+void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
+ const u32 minNumVertices, queue<ReachSubgraph> &q) {
+ DEBUG_PRINTF("entry\n");
+
+ // We construct a copy of the graph using just the vertices we want, rather
+ // than using a filtered_graph -- this way is faster.
+ NGHolder verts_g;
unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g
- fillHolder(&verts_g, g, verts, &verts_map);
-
+ fillHolder(&verts_g, g, verts, &verts_map);
+
const auto ug = make_undirected_graph(verts_g);
-
+
unordered_map<NFAVertex, u32> repeatMap;
-
- size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
- DEBUG_PRINTF("found %zu connected repeat components\n", num);
- assert(num > 0);
-
- vector<ReachSubgraph> rs(num);
-
- for (auto v : verts) {
+
+ size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
+ DEBUG_PRINTF("found %zu connected repeat components\n", num);
+ assert(num > 0);
+
+ vector<ReachSubgraph> rs(num);
+
+ for (auto v : verts) {
assert(!is_special(v, g));
auto vu = verts_map.at(v);
- auto rit = repeatMap.find(vu);
- if (rit == repeatMap.end()) {
- continue; /* not part of a repeat */
- }
- u32 comp_id = rit->second;
- assert(comp_id < num);
- rs[comp_id].vertices.push_back(v);
- }
-
- for (const auto &rsi : rs) {
+ auto rit = repeatMap.find(vu);
+ if (rit == repeatMap.end()) {
+ continue; /* not part of a repeat */
+ }
+ u32 comp_id = rit->second;
+ assert(comp_id < num);
+ rs[comp_id].vertices.push_back(v);
+ }
+
+ for (const auto &rsi : rs) {
if (rsi.vertices.empty()) {
// Empty elements can happen when connected_components finds a
// subgraph consisting entirely of specials (which aren't added to
@@ -331,448 +331,448 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
// these, so we skip them.
continue;
}
- DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
- if (rsi.vertices.size() >= minNumVertices) {
- DEBUG_PRINTF("enqueuing\n");
- q.push(rsi);
- }
- }
-}
-
-static
-void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi,
- flat_set<ReportID> &reports) {
- for (auto v : rsi.vertices) {
- if (is_match_vertex(v, g)) {
- reports = g[v].reports;
- return;
- }
- }
-}
-
-static
-void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
- const u32 minNumVertices) {
- if (rs.empty()) {
- return;
- }
-
- DEBUG_PRINTF("%zu subgraphs\n", rs.size());
-
- vector<ReachSubgraph> rs_out;
-
- queue<ReachSubgraph> q;
- for (const auto &rsi : rs) {
- if (rsi.vertices.size() < minNumVertices) {
- continue;
- }
- q.push(rsi);
- }
-
- while (!q.empty()) {
- const ReachSubgraph &rsi = q.front();
-
- if (rsi.vertices.size() < minNumVertices) {
- q.pop(); // Too small for consideration as a repeat.
- continue;
- }
-
- DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size());
-
- // Check that all the edges from outside have the same tops. TODO: we
- // don't have to throw the whole subgraph out, we could do this check
- // on a per vertex basis.
- if (hasDifferentTops(g, rsi.vertices)) {
- DEBUG_PRINTF("different tops!\n");
- q.pop();
- continue;
- }
-
+ DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
+ if (rsi.vertices.size() >= minNumVertices) {
+ DEBUG_PRINTF("enqueuing\n");
+ q.push(rsi);
+ }
+ }
+}
+
+static
+void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi,
+ flat_set<ReportID> &reports) {
+ for (auto v : rsi.vertices) {
+ if (is_match_vertex(v, g)) {
+ reports = g[v].reports;
+ return;
+ }
+ }
+}
+
+static
+void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
+ const u32 minNumVertices) {
+ if (rs.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("%zu subgraphs\n", rs.size());
+
+ vector<ReachSubgraph> rs_out;
+
+ queue<ReachSubgraph> q;
+ for (const auto &rsi : rs) {
+ if (rsi.vertices.size() < minNumVertices) {
+ continue;
+ }
+ q.push(rsi);
+ }
+
+ while (!q.empty()) {
+ const ReachSubgraph &rsi = q.front();
+
+ if (rsi.vertices.size() < minNumVertices) {
+ q.pop(); // Too small for consideration as a repeat.
+ continue;
+ }
+
+ DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size());
+
+ // Check that all the edges from outside have the same tops. TODO: we
+ // don't have to throw the whole subgraph out, we could do this check
+ // on a per vertex basis.
+ if (hasDifferentTops(g, rsi.vertices)) {
+ DEBUG_PRINTF("different tops!\n");
+ q.pop();
+ continue;
+ }
+
unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end());
unordered_set<NFAVertex> tail(involved); // to look for back-edges.
unordered_set<NFAVertex> pred, succ;
- proper_pred(g, rsi.vertices.front(), pred);
- proper_succ(g, rsi.vertices.back(), succ);
-
- flat_set<ReportID> reports;
- findFirstReports(g, rsi, reports);
-
- bool recalc = false;
- deque<NFAVertex> verts;
-
- for (auto v : rsi.vertices) {
- tail.erase(v); // now contains all vertices _after_ this one.
-
- if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) {
- recalc = true;
- continue;
- }
-
- verts.push_back(v);
- }
-
- if (recalc) {
- if (verts.size() < minNumVertices) {
- DEBUG_PRINTF("subgraph got too small\n");
- q.pop();
- continue;
- }
- splitSubgraph(g, verts, minNumVertices, q);
- } else {
- DEBUG_PRINTF("subgraph is ok\n");
- rs_out.push_back(rsi);
- }
- q.pop();
- }
-
- rs.swap(rs_out);
-}
-
-namespace {
-class DistanceSet {
-private:
- // We use boost::icl to do the heavy lifting.
- typedef boost::icl::closed_interval<u32> ClosedInterval;
- typedef boost::icl::interval_set<u32, std::less, ClosedInterval>
- IntervalSet;
- IntervalSet distances;
-public:
- // Add a distance.
- void insert(u32 d) {
- distances.insert(d);
- }
-
- void add(const DistanceSet &a) {
- distances += a.distances; // union operation
- }
-
- // Increment all the distances by one and add.
- void add_incremented(const DistanceSet &a) {
- for (const auto &d : a.distances) {
- u32 lo = lower(d) + 1;
- u32 hi = upper(d) + 1;
- distances.insert(boost::icl::construct<ClosedInterval>(lo, hi));
- }
- }
-
-#ifdef DEBUG
- void dump() const {
- if (distances.empty()) {
- printf("<empty>");
- return;
- }
-
- for (const auto &d : distances) {
- printf("[%u,%u] ", lower(d), upper(d));
- }
- }
-#endif
-
- // True if this distance set is a single contiguous interval.
- bool is_contiguous() const {
- IntervalSet::const_iterator it = distances.begin();
- if (it == distances.end()) {
- return false;
- }
- ++it;
- return (it == distances.end());
- }
-
- pair<u32, u32> get_range() const {
- assert(is_contiguous());
- return make_pair(lower(distances), upper(distances));
- }
-};
-}
-
-/**
- * Returns false if the given bounds are too large to be implemented with our
- * runtime engines that handle bounded repeats.
- */
-static
-bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) {
- if (!repeatMin.is_finite()) {
- DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str());
- assert(0); // this is a surprise!
- return true;
- }
-
- if ((u32)repeatMin >= REPEAT_INF) {
- DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str());
- return true;
- }
-
- if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) {
- DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str());
- return true;
- }
-
- return false;
-}
-
-/** Returns false if the graph is not a supported bounded repeat. */
-static
-bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi,
- u32 minNumVertices) {
- DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size());
-
- if (rsi.vertices.size() < minNumVertices) {
- DEBUG_PRINTF("too small, min is %u\n", minNumVertices);
- return false;
- }
-
- NFAVertex first = rsi.vertices.front();
- NFAVertex last = rsi.vertices.back();
-
+ proper_pred(g, rsi.vertices.front(), pred);
+ proper_succ(g, rsi.vertices.back(), succ);
+
+ flat_set<ReportID> reports;
+ findFirstReports(g, rsi, reports);
+
+ bool recalc = false;
+ deque<NFAVertex> verts;
+
+ for (auto v : rsi.vertices) {
+ tail.erase(v); // now contains all vertices _after_ this one.
+
+ if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) {
+ recalc = true;
+ continue;
+ }
+
+ verts.push_back(v);
+ }
+
+ if (recalc) {
+ if (verts.size() < minNumVertices) {
+ DEBUG_PRINTF("subgraph got too small\n");
+ q.pop();
+ continue;
+ }
+ splitSubgraph(g, verts, minNumVertices, q);
+ } else {
+ DEBUG_PRINTF("subgraph is ok\n");
+ rs_out.push_back(rsi);
+ }
+ q.pop();
+ }
+
+ rs.swap(rs_out);
+}
+
+namespace {
+class DistanceSet {
+private:
+ // We use boost::icl to do the heavy lifting.
+ typedef boost::icl::closed_interval<u32> ClosedInterval;
+ typedef boost::icl::interval_set<u32, std::less, ClosedInterval>
+ IntervalSet;
+ IntervalSet distances;
+public:
+ // Add a distance.
+ void insert(u32 d) {
+ distances.insert(d);
+ }
+
+ void add(const DistanceSet &a) {
+ distances += a.distances; // union operation
+ }
+
+ // Increment all the distances by one and add.
+ void add_incremented(const DistanceSet &a) {
+ for (const auto &d : a.distances) {
+ u32 lo = lower(d) + 1;
+ u32 hi = upper(d) + 1;
+ distances.insert(boost::icl::construct<ClosedInterval>(lo, hi));
+ }
+ }
+
+#ifdef DEBUG
+ void dump() const {
+ if (distances.empty()) {
+ printf("<empty>");
+ return;
+ }
+
+ for (const auto &d : distances) {
+ printf("[%u,%u] ", lower(d), upper(d));
+ }
+ }
+#endif
+
+ // True if this distance set is a single contiguous interval.
+ bool is_contiguous() const {
+ IntervalSet::const_iterator it = distances.begin();
+ if (it == distances.end()) {
+ return false;
+ }
+ ++it;
+ return (it == distances.end());
+ }
+
+ pair<u32, u32> get_range() const {
+ assert(is_contiguous());
+ return make_pair(lower(distances), upper(distances));
+ }
+};
+}
+
+/**
+ * Returns false if the given bounds are too large to be implemented with our
+ * runtime engines that handle bounded repeats.
+ */
+static
+bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) {
+ if (!repeatMin.is_finite()) {
+ DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str());
+ assert(0); // this is a surprise!
+ return true;
+ }
+
+ if ((u32)repeatMin >= REPEAT_INF) {
+ DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str());
+ return true;
+ }
+
+ if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) {
+ DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str());
+ return true;
+ }
+
+ return false;
+}
+
+/** Returns false if the graph is not a supported bounded repeat. */
+static
+bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi,
+ u32 minNumVertices) {
+ DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size());
+
+ if (rsi.vertices.size() < minNumVertices) {
+ DEBUG_PRINTF("too small, min is %u\n", minNumVertices);
+ return false;
+ }
+
+ NFAVertex first = rsi.vertices.front();
+ NFAVertex last = rsi.vertices.back();
+
typedef unordered_map<NFAVertex, DistanceSet> DistanceMap;
- DistanceMap dist;
-
- // Initial distance sets.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- if (u == first) {
- continue; // no self-loops
- }
+ DistanceMap dist;
+
+ // Initial distance sets.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ if (u == first) {
+ continue; // no self-loops
+ }
DEBUG_PRINTF("pred vertex %zu\n", g[u].index);
- dist[u].insert(0);
- }
-
- for (auto v : rsi.vertices) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // no self-loops
- }
-
- auto di = dist.find(u);
- if (di == dist.end()) {
- assert(0);
- return false;
- }
-
- dist[v].add_incremented(di->second);
- }
- }
-
- // Remove pred distances from our map.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- if (u == first) {
- continue; // no self-loops
- }
- dist.erase(u);
- }
-
- // Calculate final union of distances.
- DistanceSet final_d;
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last) {
- continue; // no self-loops
- }
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // no self-loops
- }
- auto di = dist.find(u);
- if (di == dist.end()) {
- continue;
- }
- final_d.add(di->second);
- }
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("final_d dists: ");
- final_d.dump();
- printf("\n");
-#endif
-
- if (!final_d.is_contiguous()) {
- // not handled right now
- DEBUG_PRINTF("not contiguous!\n");
- return false;
- }
-
- pair<u32, u32> range = final_d.get_range();
- if (range.first > depth::max_value() || range.second > depth::max_value()) {
- DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n",
- range.first, range.second);
- return false;
- }
+ dist[u].insert(0);
+ }
+
+ for (auto v : rsi.vertices) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // no self-loops
+ }
+
+ auto di = dist.find(u);
+ if (di == dist.end()) {
+ assert(0);
+ return false;
+ }
+
+ dist[v].add_incremented(di->second);
+ }
+ }
+
+ // Remove pred distances from our map.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ if (u == first) {
+ continue; // no self-loops
+ }
+ dist.erase(u);
+ }
+
+ // Calculate final union of distances.
+ DistanceSet final_d;
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last) {
+ continue; // no self-loops
+ }
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // no self-loops
+ }
+ auto di = dist.find(u);
+ if (di == dist.end()) {
+ continue;
+ }
+ final_d.add(di->second);
+ }
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("final_d dists: ");
+ final_d.dump();
+ printf("\n");
+#endif
+
+ if (!final_d.is_contiguous()) {
+ // not handled right now
+ DEBUG_PRINTF("not contiguous!\n");
+ return false;
+ }
+
+ pair<u32, u32> range = final_d.get_range();
+ if (range.first > depth::max_value() || range.second > depth::max_value()) {
+ DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n",
+ range.first, range.second);
+ return false;
+ }
rsi.repeatMin = depth(range.first);
rsi.repeatMax = depth(range.second);
-
- // If we've got a self-loop anywhere, we've got inf max.
- if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) {
- DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n");
- rsi.repeatMax = depth::infinity();
- }
-
- // If our pattern contains a bounded repeat that we wouldn't be able to
- // implement as runtime, then we have no strategy that leads to
- // implementation -- it's not like falling back to a DFA or other
- // non-repeat engine is going to succeed.
- if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) {
- throw CompileError("Pattern too large.");
- }
-
- return true;
-}
-
-static
-bool allPredsInSubgraph(NFAVertex v, const NGHolder &g,
+
+ // If we've got a self-loop anywhere, we've got inf max.
+ if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) {
+ DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n");
+ rsi.repeatMax = depth::infinity();
+ }
+
+ // If our pattern contains a bounded repeat that we wouldn't be able to
+ // implement as runtime, then we have no strategy that leads to
+ // implementation -- it's not like falling back to a DFA or other
+ // non-repeat engine is going to succeed.
+ if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) {
+ throw CompileError("Pattern too large.");
+ }
+
+ return true;
+}
+
+static
+bool allPredsInSubgraph(NFAVertex v, const NGHolder &g,
const unordered_set<NFAVertex> &involved) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!contains(involved, u)) {
- return false;
- }
- }
- return true;
-}
-
-static
-void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v,
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!contains(involved, u)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v,
const unordered_set<NFAVertex> &involved,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
- vector<NFAVertex> &tugs) {
- if (allPredsInSubgraph(v, g, involved)) {
- // We can transform this vertex into a tug trigger in-place.
+ vector<NFAVertex> &tugs) {
+ if (allPredsInSubgraph(v, g, involved)) {
+ // We can transform this vertex into a tug trigger in-place.
DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n",
- g[v].index);
- add_edge(cyclic, v, g);
- tugs.push_back(v);
- return;
- }
-
- // Some predecessors of v are not in the subgraph, so we need to clone v
- // and split up its in-edges.
- NFAVertex t = clone_vertex(g, v);
- depths[t] = depths[v];
-
+ g[v].index);
+ add_edge(cyclic, v, g);
+ tugs.push_back(v);
+ return;
+ }
+
+ // Some predecessors of v are not in the subgraph, so we need to clone v
+ // and split up its in-edges.
+ NFAVertex t = clone_vertex(g, v);
+ depths[t] = depths[v];
+
DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n",
- g[t].index, g[v].index);
-
- tugs.push_back(t);
- add_edge(cyclic, t, g);
-
- // New vertex gets all of v's successors, including v itself if it's
- // cyclic.
- clone_out_edges(g, v, t);
-}
-
-static
-NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- NFAVertex cyclic = clone_vertex(g, last);
- add_edge(cyclic, cyclic, g);
-
+ g[t].index, g[v].index);
+
+ tugs.push_back(t);
+ add_edge(cyclic, t, g);
+
+ // New vertex gets all of v's successors, including v itself if it's
+ // cyclic.
+ clone_out_edges(g, v, t);
+}
+
+static
+NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ NFAVertex cyclic = clone_vertex(g, last);
+ add_edge(cyclic, cyclic, g);
+
DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index);
- return cyclic;
-}
-
-static
-NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) {
- NFAVertex pos = add_vertex(g);
- NFAVertex first = rsi.vertices.front();
-
- g[pos].char_reach = g[first].char_reach;
-
+ return cyclic;
+}
+
+static
+NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) {
+ NFAVertex pos = add_vertex(g);
+ NFAVertex first = rsi.vertices.front();
+
+ g[pos].char_reach = g[first].char_reach;
+
DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index);
- return pos;
-}
-
-// 2 if v is directly connected to an accept, or 1 if one hop away,
-// or 0 otherwise.
-static
-u32 isCloseToAccept(const NGHolder &g, NFAVertex v) {
- if (is_any_accept(v, g)) {
- return 2;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_any_accept(w, g)) {
- return 1;
- }
- }
-
- return 0;
-}
-
-static
-u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) {
- const NFAVertex last = rsi.vertices.back();
- u32 rv = 0;
-
- for (auto v : adjacent_vertices_range(last, g)) {
- rv = max(rv, isCloseToAccept(g, v));
- }
-
- return rv;
-}
-
-static
-void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi,
+ return pos;
+}
+
+// 2 if v is directly connected to an accept, or 1 if one hop away,
+// or 0 otherwise.
+static
+u32 isCloseToAccept(const NGHolder &g, NFAVertex v) {
+ if (is_any_accept(v, g)) {
+ return 2;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_any_accept(w, g)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static
+u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) {
+ const NFAVertex last = rsi.vertices.back();
+ u32 rv = 0;
+
+ for (auto v : adjacent_vertices_range(last, g)) {
+ rv = max(rv, isCloseToAccept(g, v));
+ }
+
+ return rv;
+}
+
+static
+void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
- vector<NFAVertex> *succs) {
- u32 unpeel = unpeelAmount(g, rsi);
- DEBUG_PRINTF("unpeeling %u vertices\n", unpeel);
-
- while (unpeel) {
- NFAVertex last = rsi.vertices.back();
- NFAVertex first = rsi.vertices.front();
-
- NFAVertex d = clone_vertex(g, last);
- depths[d] = depths[last];
+ vector<NFAVertex> *succs) {
+ u32 unpeel = unpeelAmount(g, rsi);
+ DEBUG_PRINTF("unpeeling %u vertices\n", unpeel);
+
+ while (unpeel) {
+ NFAVertex last = rsi.vertices.back();
+ NFAVertex first = rsi.vertices.front();
+
+ NFAVertex d = clone_vertex(g, last);
+ depths[d] = depths[last];
DEBUG_PRINTF("created vertex %zu\n", g[d].index);
-
- for (auto v : *succs) {
- add_edge(d, v, g);
- }
-
- if (rsi.repeatMin > depth(1)) {
- rsi.repeatMin -= 1;
- } else {
- /* Skip edge for the cyclic state; note that we must clone their
- * edge properties as they may include tops. */
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), d, g[e], g);
- }
- }
-
- succs->clear();
- succs->push_back(d);
-
- rsi.repeatMax -= 1;
-
- assert(rsi.repeatMin > depth(0));
- assert(rsi.repeatMax > depth(0));
-
- unpeel--;
- }
-}
-
-/** Fetch the set of successor vertices of this subgraph. */
-static
-void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi,
- vector<NFAVertex> *succs) {
- assert(!rsi.vertices.empty());
- // Successors come from successors of last vertex.
- NFAVertex last = rsi.vertices.back();
-
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last) { /* ignore self loop */
- continue;
- }
- succs->push_back(v);
- }
-}
-
-/** Disconnect the given subgraph from its predecessors and successors in the
- * NFA graph and replace it with a cyclic state. */
-static
-void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
+
+ for (auto v : *succs) {
+ add_edge(d, v, g);
+ }
+
+ if (rsi.repeatMin > depth(1)) {
+ rsi.repeatMin -= 1;
+ } else {
+ /* Skip edge for the cyclic state; note that we must clone their
+ * edge properties as they may include tops. */
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), d, g[e], g);
+ }
+ }
+
+ succs->clear();
+ succs->push_back(d);
+
+ rsi.repeatMax -= 1;
+
+ assert(rsi.repeatMin > depth(0));
+ assert(rsi.repeatMax > depth(0));
+
+ unpeel--;
+ }
+}
+
+/** Fetch the set of successor vertices of this subgraph. */
+static
+void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi,
+ vector<NFAVertex> *succs) {
+ assert(!rsi.vertices.empty());
+ // Successors come from successors of last vertex.
+ NFAVertex last = rsi.vertices.back();
+
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last) { /* ignore self loop */
+ continue;
+ }
+ succs->push_back(v);
+ }
+}
+
+/** Disconnect the given subgraph from its predecessors and successors in the
+ * NFA graph and replace it with a cyclic state. */
+static
+void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
vector<BoundedRepeatData> *repeats,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
+ assert(!rsi.bad);
/* As we may need to unpeel 2 vertices, we need the width to be more than 2.
* This should only happen if the graph did not have redundancy pass
* performed on as vertex count checks would be prevent us reaching here.
@@ -780,396 +780,396 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
if (rsi.repeatMax <= depth(2)) {
return;
}
- assert(rsi.repeatMin > depth(0));
- assert(rsi.repeatMax >= rsi.repeatMin);
+ assert(rsi.repeatMin > depth(0));
+ assert(rsi.repeatMax >= rsi.repeatMin);
assert(rsi.repeatMax > depth(2));
-
- DEBUG_PRINTF("entry\n");
-
+
+ DEBUG_PRINTF("entry\n");
+
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
- rsi.vertices.end());
- vector<NFAVertex> succs;
- getSuccessors(g, rsi, &succs);
-
- unpeelNearEnd(g, rsi, depths, &succs);
-
- // Create our replacement cyclic state with the same reachability and
- // report info as the last vertex in our topo-ordered list.
- NFAVertex cyclic = createCyclic(g, rsi);
- created.insert(cyclic);
-
- // One more special vertex is necessary: the positive trigger (same
- // reach as cyclic).
- NFAVertex pos_trigger = createPos(g, rsi);
- created.insert(pos_trigger);
- add_edge(pos_trigger, cyclic, g);
-
- // Update depths for our new vertices.
- NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
- depths[pos_trigger] = depths[first];
- depths[cyclic].fromStart =
- unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
- depths[cyclic].fromStartDotStar = unionDepthMinMax(
- depths[first].fromStartDotStar, depths[last].fromStartDotStar);
-
- // Wire predecessors to positive trigger.
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), pos_trigger, g[e], g);
- }
-
- // Wire cyclic state to tug trigger states built from successors.
- vector<NFAVertex> tugs;
- for (auto v : succs) {
- buildTugTrigger(g, cyclic, v, involved, depths, tugs);
- }
- created.insert(tugs.begin(), tugs.end());
- assert(!tugs.empty());
-
- // Wire pos trigger to tugs if min repeat is one -- this deals with cases
- // where we can get a pos and tug trigger on the same byte.
- if (rsi.repeatMin == depth(1)) {
- for (auto v : tugs) {
- add_edge(pos_trigger, v, g);
- }
- }
-
- // Remove the vertices/edges in the subgraph.
- remove_vertices(rsi.vertices, g, false);
- erase_all(&depths, rsi.vertices);
-
- repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
- rsi.repeatMax, rsi.minPeriod, cyclic,
- pos_trigger, tugs));
-}
-
-/** Variant for Rose-specific graphs that terminate in a sole accept, so we can
- * use a "lazy tug". See UE-1636. */
-static
-void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
- vector<BoundedRepeatData> *repeats,
+ rsi.vertices.end());
+ vector<NFAVertex> succs;
+ getSuccessors(g, rsi, &succs);
+
+ unpeelNearEnd(g, rsi, depths, &succs);
+
+ // Create our replacement cyclic state with the same reachability and
+ // report info as the last vertex in our topo-ordered list.
+ NFAVertex cyclic = createCyclic(g, rsi);
+ created.insert(cyclic);
+
+ // One more special vertex is necessary: the positive trigger (same
+ // reach as cyclic).
+ NFAVertex pos_trigger = createPos(g, rsi);
+ created.insert(pos_trigger);
+ add_edge(pos_trigger, cyclic, g);
+
+ // Update depths for our new vertices.
+ NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
+ depths[pos_trigger] = depths[first];
+ depths[cyclic].fromStart =
+ unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
+ depths[cyclic].fromStartDotStar = unionDepthMinMax(
+ depths[first].fromStartDotStar, depths[last].fromStartDotStar);
+
+ // Wire predecessors to positive trigger.
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), pos_trigger, g[e], g);
+ }
+
+ // Wire cyclic state to tug trigger states built from successors.
+ vector<NFAVertex> tugs;
+ for (auto v : succs) {
+ buildTugTrigger(g, cyclic, v, involved, depths, tugs);
+ }
+ created.insert(tugs.begin(), tugs.end());
+ assert(!tugs.empty());
+
+ // Wire pos trigger to tugs if min repeat is one -- this deals with cases
+ // where we can get a pos and tug trigger on the same byte.
+ if (rsi.repeatMin == depth(1)) {
+ for (auto v : tugs) {
+ add_edge(pos_trigger, v, g);
+ }
+ }
+
+ // Remove the vertices/edges in the subgraph.
+ remove_vertices(rsi.vertices, g, false);
+ erase_all(&depths, rsi.vertices);
+
+ repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
+ rsi.repeatMax, rsi.minPeriod, cyclic,
+ pos_trigger, tugs));
+}
+
+/** Variant for Rose-specific graphs that terminate in a sole accept, so we can
+ * use a "lazy tug". See UE-1636. */
+static
+void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
+ vector<BoundedRepeatData> *repeats,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
- assert(rsi.repeatMin);
- assert(rsi.repeatMax >= rsi.repeatMin);
-
- DEBUG_PRINTF("entry\n");
-
+ assert(!rsi.bad);
+ assert(rsi.repeatMin);
+ assert(rsi.repeatMax >= rsi.repeatMin);
+
+ DEBUG_PRINTF("entry\n");
+
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end());
- vector<NFAVertex> succs;
- getSuccessors(g, rsi, &succs);
-
- // Create our replacement cyclic state with the same reachability and
- // report info as the last vertex in our topo-ordered list.
- NFAVertex cyclic = createCyclic(g, rsi);
- created.insert(cyclic);
-
- // One more special vertex is necessary: the positive trigger (same
- // reach as cyclic).
- NFAVertex pos_trigger = createPos(g, rsi);
- created.insert(pos_trigger);
- add_edge(pos_trigger, cyclic, g);
-
- // Update depths for our new vertices.
- NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
- depths[pos_trigger] = depths[first];
- depths[cyclic].fromStart =
- unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
- depths[cyclic].fromStartDotStar = unionDepthMinMax(
- depths[first].fromStartDotStar, depths[last].fromStartDotStar);
-
- // Wire predecessors to positive trigger.
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), pos_trigger, g[e], g);
- }
-
- // In the rose case, our tug is our cyclic, and it's wired to our
- // successors (which should be just the accept).
- vector<NFAVertex> tugs;
- assert(succs.size() == 1);
- for (auto v : succs) {
- add_edge(cyclic, v, g);
- }
-
- // Wire pos trigger to accept if min repeat is one -- this deals with cases
- // where we can get a pos and tug trigger on the same byte.
- if (rsi.repeatMin == depth(1)) {
- for (auto v : succs) {
- add_edge(pos_trigger, v, g);
- g[pos_trigger].reports = g[cyclic].reports;
- }
- }
-
- // Remove the vertices/edges in the subgraph.
- remove_vertices(rsi.vertices, g, false);
- erase_all(&depths, rsi.vertices);
-
- repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
- rsi.repeatMax, rsi.minPeriod, cyclic,
- pos_trigger, tugs));
-}
-
-static
-bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) {
- // filtered_graph doesn't filter the num_vertices call.
- size_t n = 0;
- RepeatGraph::vertex_iterator vi, ve;
- for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) {
- if (++n >= minRepeat) {
- return true;
- }
- }
- return false;
-}
-
-// Marks the subgraph as bad if it can't be handled.
-static
-void reprocessSubgraph(const NGHolder &h, const Grey &grey,
- ReachSubgraph &rsi) {
- vector<ReachSubgraph> rs(1, rsi);
- checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize);
- if (rs.size() != 1) {
- DEBUG_PRINTF("subgraph split into %zu\n", rs.size());
- rsi.bad = true;
- return;
- }
-
- rsi = rs.back(); // Potentially modified.
-
- if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) {
- DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n",
- rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
- } else {
- DEBUG_PRINTF("reprocessed subgraph is bad\n");
- rsi.bad = true;
- }
-}
-
-/** Remove vertices from the beginning and end of the vertex set that are
- * involved in other repeats as a result of earlier repeat transformations. */
-static
-bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi,
+ vector<NFAVertex> succs;
+ getSuccessors(g, rsi, &succs);
+
+ // Create our replacement cyclic state with the same reachability and
+ // report info as the last vertex in our topo-ordered list.
+ NFAVertex cyclic = createCyclic(g, rsi);
+ created.insert(cyclic);
+
+ // One more special vertex is necessary: the positive trigger (same
+ // reach as cyclic).
+ NFAVertex pos_trigger = createPos(g, rsi);
+ created.insert(pos_trigger);
+ add_edge(pos_trigger, cyclic, g);
+
+ // Update depths for our new vertices.
+ NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
+ depths[pos_trigger] = depths[first];
+ depths[cyclic].fromStart =
+ unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
+ depths[cyclic].fromStartDotStar = unionDepthMinMax(
+ depths[first].fromStartDotStar, depths[last].fromStartDotStar);
+
+ // Wire predecessors to positive trigger.
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), pos_trigger, g[e], g);
+ }
+
+ // In the rose case, our tug is our cyclic, and it's wired to our
+ // successors (which should be just the accept).
+ vector<NFAVertex> tugs;
+ assert(succs.size() == 1);
+ for (auto v : succs) {
+ add_edge(cyclic, v, g);
+ }
+
+ // Wire pos trigger to accept if min repeat is one -- this deals with cases
+ // where we can get a pos and tug trigger on the same byte.
+ if (rsi.repeatMin == depth(1)) {
+ for (auto v : succs) {
+ add_edge(pos_trigger, v, g);
+ g[pos_trigger].reports = g[cyclic].reports;
+ }
+ }
+
+ // Remove the vertices/edges in the subgraph.
+ remove_vertices(rsi.vertices, g, false);
+ erase_all(&depths, rsi.vertices);
+
+ repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
+ rsi.repeatMax, rsi.minPeriod, cyclic,
+ pos_trigger, tugs));
+}
+
+static
+bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) {
+ // filtered_graph doesn't filter the num_vertices call.
+ size_t n = 0;
+ RepeatGraph::vertex_iterator vi, ve;
+ for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) {
+ if (++n >= minRepeat) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Marks the subgraph as bad if it can't be handled.
+static
+void reprocessSubgraph(const NGHolder &h, const Grey &grey,
+ ReachSubgraph &rsi) {
+ vector<ReachSubgraph> rs(1, rsi);
+ checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize);
+ if (rs.size() != 1) {
+ DEBUG_PRINTF("subgraph split into %zu\n", rs.size());
+ rsi.bad = true;
+ return;
+ }
+
+ rsi = rs.back(); // Potentially modified.
+
+ if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) {
+ DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n",
+ rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
+ } else {
+ DEBUG_PRINTF("reprocessed subgraph is bad\n");
+ rsi.bad = true;
+ }
+}
+
+/** Remove vertices from the beginning and end of the vertex set that are
+ * involved in other repeats as a result of earlier repeat transformations. */
+static
+bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi,
const unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
-
- if (created.empty()) {
- return true;
- }
-
- if (rsi.vertices.empty()) {
- return false;
- }
-
- // Peel involved vertices from the front.
- vector<NFAVertex>::iterator zap = rsi.vertices.end();
- for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite;
- ++it) {
- if (!contains(created, *it)) {
- zap = it;
- break;
- } else {
+ assert(!rsi.bad);
+
+ if (created.empty()) {
+ return true;
+ }
+
+ if (rsi.vertices.empty()) {
+ return false;
+ }
+
+ // Peel involved vertices from the front.
+ vector<NFAVertex>::iterator zap = rsi.vertices.end();
+ for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite;
+ ++it) {
+ if (!contains(created, *it)) {
+ zap = it;
+ break;
+ } else {
DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index);
- }
- }
- DEBUG_PRINTF("peeling %zu vertices from front\n",
- distance(rsi.vertices.begin(), zap));
- rsi.vertices.erase(rsi.vertices.begin(), zap);
-
- // Peel involved vertices and vertices with edges to involved vertices from
- // the back; otherwise we may try to transform a POS into a TUG.
- zap = rsi.vertices.begin();
- for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite;
- ++it) {
- if (!contains(created, *it) &&
- !contains_any_of(created, adjacent_vertices(*it, g))) {
- zap = it.base(); // Note: erases everything after it.
- break;
- } else {
+ }
+ }
+ DEBUG_PRINTF("peeling %zu vertices from front\n",
+ distance(rsi.vertices.begin(), zap));
+ rsi.vertices.erase(rsi.vertices.begin(), zap);
+
+ // Peel involved vertices and vertices with edges to involved vertices from
+ // the back; otherwise we may try to transform a POS into a TUG.
+ zap = rsi.vertices.begin();
+ for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite;
+ ++it) {
+ if (!contains(created, *it) &&
+ !contains_any_of(created, adjacent_vertices(*it, g))) {
+ zap = it.base(); // Note: erases everything after it.
+ break;
+ } else {
DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index);
- }
- }
- DEBUG_PRINTF("peeling %zu vertices from back\n",
- distance(zap, rsi.vertices.end()));
- rsi.vertices.erase(zap, rsi.vertices.end());
-
- // If vertices in the middle are involved in other repeats, it's a definite
- // no-no.
- for (auto v : rsi.vertices) {
- if (contains(created, v)) {
+ }
+ }
+ DEBUG_PRINTF("peeling %zu vertices from back\n",
+ distance(zap, rsi.vertices.end()));
+ rsi.vertices.erase(zap, rsi.vertices.end());
+
+ // If vertices in the middle are involved in other repeats, it's a definite
+ // no-no.
+ for (auto v : rsi.vertices) {
+ if (contains(created, v)) {
DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index);
- return false;
- }
- }
-
- reprocessSubgraph(g, grey, rsi);
- return !rsi.bad;
-}
-
-/** For performance reasons, it's nice not to have an exceptional state right
- * next to a startDs state: that way we can do double-byte accel, whereas
- * otherwise the NEG trigger would limit us to single. This might be a good
- * idea to extend to cyclic states, too. */
-static
-void peelStartDotStar(const NGHolder &g,
+ return false;
+ }
+ }
+
+ reprocessSubgraph(g, grey, rsi);
+ return !rsi.bad;
+}
+
+/** For performance reasons, it's nice not to have an exceptional state right
+ * next to a startDs state: that way we can do double-byte accel, whereas
+ * otherwise the NEG trigger would limit us to single. This might be a good
+ * idea to extend to cyclic states, too. */
+static
+void peelStartDotStar(const NGHolder &g,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const Grey &grey, ReachSubgraph &rsi) {
- if (rsi.vertices.size() < 1) {
- return;
- }
-
- NFAVertex first = rsi.vertices.front();
- if (depths.at(first).fromStartDotStar.min == depth(1)) {
+ if (rsi.vertices.size() < 1) {
+ return;
+ }
+
+ NFAVertex first = rsi.vertices.front();
+ if (depths.at(first).fromStartDotStar.min == depth(1)) {
DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index);
- rsi.vertices.erase(rsi.vertices.begin());
- reprocessSubgraph(g, grey, rsi);
- }
-}
-
-static
-void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
- const u32 minNumVertices) {
+ rsi.vertices.erase(rsi.vertices.begin());
+ reprocessSubgraph(g, grey, rsi);
+ }
+}
+
+static
+void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
+ const u32 minNumVertices) {
const ReachFilter<NGHolder> fil(&g);
const RepeatGraph rg(g, fil, fil);
-
- if (!isCompBigEnough(rg, minNumVertices)) {
- DEBUG_PRINTF("component not big enough, bailing\n");
- return;
- }
-
+
+ if (!isCompBigEnough(rg, minNumVertices)) {
+ DEBUG_PRINTF("component not big enough, bailing\n");
+ return;
+ }
+
const auto ug = make_undirected_graph(rg);
-
+
unordered_map<NFAVertex, u32> repeatMap;
-
- unsigned int num;
- num = connected_components(ug, make_assoc_property_map(repeatMap));
- DEBUG_PRINTF("found %u connected repeat components\n", num);
-
- // Now, we build a set of topo-ordered ReachSubgraphs.
+
+ unsigned int num;
+ num = connected_components(ug, make_assoc_property_map(repeatMap));
+ DEBUG_PRINTF("found %u connected repeat components\n", num);
+
+ // Now, we build a set of topo-ordered ReachSubgraphs.
vector<NFAVertex> topoOrder = buildTopoOrder(rg);
-
- rs.resize(num);
-
- for (auto v : topoOrder) {
+
+ rs.resize(num);
+
+ for (auto v : topoOrder) {
auto rit = repeatMap.find(v);
- if (rit == repeatMap.end()) {
- continue; /* not part of a repeat */
- }
- u32 comp_id = rit->second;
- assert(comp_id < num);
- rs[comp_id].vertices.push_back(v);
- }
-
-#ifdef DEBUG
- for (size_t i = 0; i < rs.size(); i++) {
- DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size());
- }
-#endif
-}
-
-static
-bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) {
- assert(!rsi.vertices.empty());
-
- const NFAVertex first = rsi.vertices.front();
- const NFAVertex last = rsi.vertices.back();
-
- // All of the preds of first must have edges to all the successors of last.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- for (auto v : adjacent_vertices_range(last, g)) {
- if (!edge(u, v, g).second) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-/* depth info is valid as calculated at entry */
-static
-bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g,
+ if (rit == repeatMap.end()) {
+ continue; /* not part of a repeat */
+ }
+ u32 comp_id = rit->second;
+ assert(comp_id < num);
+ rs[comp_id].vertices.push_back(v);
+ }
+
+#ifdef DEBUG
+ for (size_t i = 0; i < rs.size(); i++) {
+ DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size());
+ }
+#endif
+}
+
+static
+bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) {
+ assert(!rsi.vertices.empty());
+
+ const NFAVertex first = rsi.vertices.front();
+ const NFAVertex last = rsi.vertices.back();
+
+ // All of the preds of first must have edges to all the successors of last.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (!edge(u, v, g).second) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/* depth info is valid as calculated at entry */
+static
+bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops) {
- DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n",
- reached_by_fixed_tops.size());
- if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) {
- /* can't do this for infix/suffixes unless we know trigger literals
- * can only occur at one offset */
+ DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n",
+ reached_by_fixed_tops.size());
+ if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) {
+ /* can't do this for infix/suffixes unless we know trigger literals
+ * can only occur at one offset */
DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index);
- return false;
- }
-
- if (depths.at(v).fromStartDotStar.min.is_reachable()) {
- DEBUG_PRINTF("reachable from startDs\n");
- return false;
- }
-
- /* look at preds as v may be cyclic */
- const depth &first = depths.at(v).fromStart.min;
- assert(first.is_reachable());
- if (!first.is_finite()) {
- DEBUG_PRINTF("first not finite\n");
- return false;
- }
- DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str());
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- const depth &u_max_depth = depths.at(u).fromStart.max;
+ return false;
+ }
+
+ if (depths.at(v).fromStartDotStar.min.is_reachable()) {
+ DEBUG_PRINTF("reachable from startDs\n");
+ return false;
+ }
+
+ /* look at preds as v may be cyclic */
+ const depth &first = depths.at(v).fromStart.min;
+ assert(first.is_reachable());
+ if (!first.is_finite()) {
+ DEBUG_PRINTF("first not finite\n");
+ return false;
+ }
+ DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str());
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ const depth &u_max_depth = depths.at(u).fromStart.max;
DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index,
u_max_depth.str().c_str());
- if (u_max_depth != first - depth(1)) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
- u32 top) {
- NFAVertex u = g.start;
- for (const auto &cr : trigger) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = cr;
- add_edge(u, v, g);
- if (u == g.start) {
+ if (u_max_depth != first - depth(1)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
+ u32 top) {
+ NFAVertex u = g.start;
+ for (const auto &cr : trigger) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = cr;
+ add_edge(u, v, g);
+ if (u == g.start) {
g[edge(u, v, g)].tops.insert(top);
- }
- u = v;
- }
-
+ }
+ u = v;
+ }
+
DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index);
- return u;
-}
-
-/**
- * For triggered graphs, replace the "top" edges from start with the triggers
- * they represent, for the purposes of determining sole entry.
- */
-static
-void addTriggers(NGHolder &g,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- if (!is_triggered(g)) {
- assert(triggers.empty());
- return;
- }
-
- vector<NFAEdge> dead;
- map<u32, vector<NFAVertex>> starts_by_top;
-
- for (const auto &e : out_edges_range(g.start, g)) {
- const NFAVertex &v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
-
+ return u;
+}
+
+/**
+ * For triggered graphs, replace the "top" edges from start with the triggers
+ * they represent, for the purposes of determining sole entry.
+ */
+static
+void addTriggers(NGHolder &g,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ if (!is_triggered(g)) {
+ assert(triggers.empty());
+ return;
+ }
+
+ vector<NFAEdge> dead;
+ map<u32, vector<NFAVertex>> starts_by_top;
+
+ for (const auto &e : out_edges_range(g.start, g)) {
+ const NFAVertex &v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+
const auto &tops = g[e].tops;
-
- // The caller may not have given us complete trigger information. If we
- // don't have any triggers for a particular top, we should just leave
- // it alone.
+
+ // The caller may not have given us complete trigger information. If we
+ // don't have any triggers for a particular top, we should just leave
+ // it alone.
for (u32 top : tops) {
if (!contains(triggers, top)) {
DEBUG_PRINTF("no triggers for top %u\n", top);
@@ -1177,946 +1177,946 @@ void addTriggers(NGHolder &g,
}
starts_by_top[top].push_back(v);
- }
- dead.push_back(e);
+ }
+ dead.push_back(e);
next_edge:;
- }
-
- remove_edges(dead, g);
-
- for (const auto &m : starts_by_top) {
- const auto &top = m.first;
- const auto &starts = m.second;
-
- assert(contains(triggers, top));
- const auto &top_triggers = triggers.at(top);
-
- for (const auto &trigger : top_triggers) {
- NFAVertex u = buildTriggerStates(g, trigger, top);
- for (const auto &v : starts) {
- add_edge_if_not_present(u, v, g);
- }
- }
- }
-}
-
-static
-CharReach predReach(const NGHolder &g, NFAVertex v) {
- CharReach cr;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- cr |= g[u].char_reach;
- }
- return cr;
-}
-
-/**
- * Filter the given vertex map (which maps from vertices in another graph to
- * vertices in subg) so that it only contains vertices that actually exist in
- * subg.
- */
-static
-void filterMap(const NGHolder &subg,
+ }
+
+ remove_edges(dead, g);
+
+ for (const auto &m : starts_by_top) {
+ const auto &top = m.first;
+ const auto &starts = m.second;
+
+ assert(contains(triggers, top));
+ const auto &top_triggers = triggers.at(top);
+
+ for (const auto &trigger : top_triggers) {
+ NFAVertex u = buildTriggerStates(g, trigger, top);
+ for (const auto &v : starts) {
+ add_edge_if_not_present(u, v, g);
+ }
+ }
+ }
+}
+
+static
+CharReach predReach(const NGHolder &g, NFAVertex v) {
+ CharReach cr;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ cr |= g[u].char_reach;
+ }
+ return cr;
+}
+
+/**
+ * Filter the given vertex map (which maps from vertices in another graph to
+ * vertices in subg) so that it only contains vertices that actually exist in
+ * subg.
+ */
+static
+void filterMap(const NGHolder &subg,
unordered_map<NFAVertex, NFAVertex> &vmap) {
NGHolder::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(subg);
+ tie(vi, ve) = vertices(subg);
const unordered_set<NFAVertex> remaining_verts(vi, ve);
-
+
unordered_map<NFAVertex, NFAVertex> fmap; // filtered map
-
- for (const auto &m : vmap) {
- if (contains(remaining_verts, m.second)) {
- fmap.insert(m);
- }
- }
-
- vmap.swap(fmap);
-}
-
-/** Construct a graph for sole entry analysis that only considers paths through
- * the bounded repeat. */
-static
-void buildRepeatGraph(NGHolder &rg,
+
+ for (const auto &m : vmap) {
+ if (contains(remaining_verts, m.second)) {
+ fmap.insert(m);
+ }
+ }
+
+ vmap.swap(fmap);
+}
+
+/** Construct a graph for sole entry analysis that only considers paths through
+ * the bounded repeat. */
+static
+void buildRepeatGraph(NGHolder &rg,
unordered_map<NFAVertex, NFAVertex> &rg_map,
- const NGHolder &g, const ReachSubgraph &rsi,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- cloneHolder(rg, g, &rg_map);
- assert(rg.kind == g.kind);
-
- clear_in_edges(rg.accept, rg);
- clear_in_edges(rg.acceptEod, rg);
- add_edge(rg.accept, rg.acceptEod, rg);
-
- // Find the set of vertices in rg involved in the repeat.
+ const NGHolder &g, const ReachSubgraph &rsi,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ cloneHolder(rg, g, &rg_map);
+ assert(rg.kind == g.kind);
+
+ clear_in_edges(rg.accept, rg);
+ clear_in_edges(rg.acceptEod, rg);
+ add_edge(rg.accept, rg.acceptEod, rg);
+
+ // Find the set of vertices in rg involved in the repeat.
unordered_set<NFAVertex> rg_involved;
- for (const auto &v : rsi.vertices) {
- assert(contains(rg_map, v));
- rg_involved.insert(rg_map.at(v));
- }
-
- // Remove all out-edges from repeat vertices that aren't to other repeat
- // vertices, then connect terminal repeat vertices to accept.
- for (const auto &v : rsi.vertices) {
- NFAVertex rv = rg_map.at(v);
- remove_out_edge_if(rv, [&](const NFAEdge &e) {
- return !contains(rg_involved, target(e, rg));
- }, rg);
- if (!has_successor(rv, rg)) { // no interior out-edges
- add_edge(rv, rg.accept, rg);
- }
- }
-
- pruneUseless(rg);
-
- if (is_triggered(rg)) {
- // Add vertices for all our triggers
- addTriggers(rg, triggers);
+ for (const auto &v : rsi.vertices) {
+ assert(contains(rg_map, v));
+ rg_involved.insert(rg_map.at(v));
+ }
+
+ // Remove all out-edges from repeat vertices that aren't to other repeat
+ // vertices, then connect terminal repeat vertices to accept.
+ for (const auto &v : rsi.vertices) {
+ NFAVertex rv = rg_map.at(v);
+ remove_out_edge_if(rv, [&](const NFAEdge &e) {
+ return !contains(rg_involved, target(e, rg));
+ }, rg);
+ if (!has_successor(rv, rg)) { // no interior out-edges
+ add_edge(rv, rg.accept, rg);
+ }
+ }
+
+ pruneUseless(rg);
+
+ if (is_triggered(rg)) {
+ // Add vertices for all our triggers
+ addTriggers(rg, triggers);
renumber_vertices(rg);
-
- // We don't know anything about how often this graph is triggered, so we
- // make the start vertex cyclic for the purposes of this analysis ONLY.
- add_edge(rg.start, rg.start, rg);
- }
-
- filterMap(rg, rg_map);
-
- // All of our repeat vertices should have vertices in rg.
- assert(all_of(begin(rsi.vertices), end(rsi.vertices),
- [&](const NFAVertex &v) { return contains(rg_map, v); }));
-}
-
-/**
- * Construct an input DAG which accepts on all entries to the repeat.
- */
-static
-void buildInputGraph(NGHolder &lhs,
+
+ // We don't know anything about how often this graph is triggered, so we
+ // make the start vertex cyclic for the purposes of this analysis ONLY.
+ add_edge(rg.start, rg.start, rg);
+ }
+
+ filterMap(rg, rg_map);
+
+ // All of our repeat vertices should have vertices in rg.
+ assert(all_of(begin(rsi.vertices), end(rsi.vertices),
+ [&](const NFAVertex &v) { return contains(rg_map, v); }));
+}
+
+/**
+ * Construct an input DAG which accepts on all entries to the repeat.
+ */
+static
+void buildInputGraph(NGHolder &lhs,
unordered_map<NFAVertex, NFAVertex> &lhs_map,
- const NGHolder &g, const NFAVertex first,
- const map<u32, vector<vector<CharReach>>> &triggers) {
+ const NGHolder &g, const NFAVertex first,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index);
- cloneHolder(lhs, g, &lhs_map);
- assert(g.kind == lhs.kind);
- addTriggers(lhs, triggers);
+ cloneHolder(lhs, g, &lhs_map);
+ assert(g.kind == lhs.kind);
+ addTriggers(lhs, triggers);
renumber_vertices(lhs);
-
- // Replace each back-edge (u,v) with an edge (startDs,v), which will
- // generate entries at at least the rate of the loop created by that
- // back-edge.
- set<NFAEdge> dead;
- BackEdges<set<NFAEdge> > backEdgeVisitor(dead);
+
+ // Replace each back-edge (u,v) with an edge (startDs,v), which will
+ // generate entries at at least the rate of the loop created by that
+ // back-edge.
+ set<NFAEdge> dead;
+ BackEdges<set<NFAEdge> > backEdgeVisitor(dead);
depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start));
- for (const auto &e : dead) {
- const NFAVertex u = source(e, lhs), v = target(e, lhs);
- if (u == v) {
- continue; // Self-loops are OK.
- }
-
+ for (const auto &e : dead) {
+ const NFAVertex u = source(e, lhs), v = target(e, lhs);
+ if (u == v) {
+ continue; // Self-loops are OK.
+ }
+
DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n",
lhs[u].index, lhs[v].index, lhs[v].index);
-
- add_edge_if_not_present(lhs.startDs, v, lhs);
- remove_edge(e, lhs);
- }
-
- clear_in_edges(lhs.accept, lhs);
- clear_in_edges(lhs.acceptEod, lhs);
- add_edge(lhs.accept, lhs.acceptEod, lhs);
-
- // Wire the predecessors of the first repeat vertex to accept, then prune.
- NFAVertex lhs_first = lhs_map.at(first);
- for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) {
- add_edge_if_not_present(u, lhs.accept, lhs);
- }
-
- pruneUseless(lhs);
- filterMap(lhs, lhs_map);
-}
-
-/**
- * Maximum number of vertices in the input DAG to actually allow sole entry
- * calculation (as very large cases make sentClearsTail take a long, long time
- * to complete.)
- */
-static const size_t MAX_SOLE_ENTRY_VERTICES = 10000;
-
-/** True if (1) fixed offset or (2) reentries to this subgraph must involve a
- * character which escapes the repeat, meaning that we only need to store a
- * single offset at runtime. See UE-1361. */
-static
-bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi,
+
+ add_edge_if_not_present(lhs.startDs, v, lhs);
+ remove_edge(e, lhs);
+ }
+
+ clear_in_edges(lhs.accept, lhs);
+ clear_in_edges(lhs.acceptEod, lhs);
+ add_edge(lhs.accept, lhs.acceptEod, lhs);
+
+ // Wire the predecessors of the first repeat vertex to accept, then prune.
+ NFAVertex lhs_first = lhs_map.at(first);
+ for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) {
+ add_edge_if_not_present(u, lhs.accept, lhs);
+ }
+
+ pruneUseless(lhs);
+ filterMap(lhs, lhs_map);
+}
+
+/**
+ * Maximum number of vertices in the input DAG to actually allow sole entry
+ * calculation (as very large cases make sentClearsTail take a long, long time
+ * to complete.)
+ */
+static const size_t MAX_SOLE_ENTRY_VERTICES = 10000;
+
+/** True if (1) fixed offset or (2) reentries to this subgraph must involve a
+ * character which escapes the repeat, meaning that we only need to store a
+ * single offset at runtime. See UE-1361. */
+static
+bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
- NFAVertex first = rsi.vertices.front();
- const CharReach &repeatReach = g[first].char_reach;
-
- /* trivial case first is at a fixed depth */
- if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) {
- DEBUG_PRINTF("fixed depth\n");
- return true;
- }
-
- DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str());
-
- // Nothing can escape a dot repeat.
- if (repeatReach.all()) {
- DEBUG_PRINTF("dot repeat cannot be escaped\n");
- return false;
- }
-
- // Another easy case: if the union of the reach of all entries to the
- // repeat will always escape the repeat, we have sole entry.
- if (predReach(g, first).isSubsetOf(~repeatReach)) {
- DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n",
- describeClass(predReach(g, first)).c_str());
- return true;
- }
-
- NGHolder rg;
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+ NFAVertex first = rsi.vertices.front();
+ const CharReach &repeatReach = g[first].char_reach;
+
+ /* trivial case first is at a fixed depth */
+ if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) {
+ DEBUG_PRINTF("fixed depth\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str());
+
+ // Nothing can escape a dot repeat.
+ if (repeatReach.all()) {
+ DEBUG_PRINTF("dot repeat cannot be escaped\n");
+ return false;
+ }
+
+ // Another easy case: if the union of the reach of all entries to the
+ // repeat will always escape the repeat, we have sole entry.
+ if (predReach(g, first).isSubsetOf(~repeatReach)) {
+ DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n",
+ describeClass(predReach(g, first)).c_str());
+ return true;
+ }
+
+ NGHolder rg;
unordered_map<NFAVertex, NFAVertex> rg_map;
- buildRepeatGraph(rg, rg_map, g, rsi, triggers);
- assert(rg.kind == g.kind);
-
- NGHolder lhs;
+ buildRepeatGraph(rg, rg_map, g, rsi, triggers);
+ assert(rg.kind == g.kind);
+
+ NGHolder lhs;
unordered_map<NFAVertex, NFAVertex> lhs_map;
- buildInputGraph(lhs, lhs_map, g, first, triggers);
- assert(lhs.kind == g.kind);
-
- if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) {
- DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n",
- num_vertices(lhs));
- return false;
- }
-
- // Split the repeat graph into two regions: vertices in the LHS input DAG
- // are in one region, vertices in the bounded repeat are in another.
- const u32 lhs_region = 1;
- const u32 repeat_region = 2;
+ buildInputGraph(lhs, lhs_map, g, first, triggers);
+ assert(lhs.kind == g.kind);
+
+ if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) {
+ DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n",
+ num_vertices(lhs));
+ return false;
+ }
+
+ // Split the repeat graph into two regions: vertices in the LHS input DAG
+ // are in one region, vertices in the bounded repeat are in another.
+ const u32 lhs_region = 1;
+ const u32 repeat_region = 2;
unordered_map<NFAVertex, u32> region_map;
-
- for (const auto &v : rsi.vertices) {
- assert(!is_special(v, g)); // no specials in repeats
- assert(contains(rg_map, v));
+
+ for (const auto &v : rsi.vertices) {
+ assert(!is_special(v, g)); // no specials in repeats
+ assert(contains(rg_map, v));
DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index);
- region_map.emplace(rg_map.at(v), repeat_region);
- }
-
- for (const auto &v : vertices_range(rg)) {
- if (!contains(region_map, v)) {
+ region_map.emplace(rg_map.at(v), repeat_region);
+ }
+
+ for (const auto &v : vertices_range(rg)) {
+ if (!contains(region_map, v)) {
DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index);
- region_map.emplace(v, lhs_region);
- }
- }
-
- u32 bad_region = 0;
- if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) {
- DEBUG_PRINTF("input dag clears repeat: sole entry\n");
- return true;
- }
-
- DEBUG_PRINTF("not sole entry\n");
- return false;
-}
-
-namespace {
-
-template<class Graph>
-struct StrawWalker {
- StrawWalker(const NGHolder &h_in, const Graph &g_in,
- const vector<BoundedRepeatData> &all_repeats)
- : h(h_in), g(g_in), repeats(all_repeats) {}
-
- /** True if v is a cyclic that belongs to a bounded repeat (one without an
- * inf max bound). */
- bool isBoundedRepeatCyclic(NFAVertex v) const {
- for (const auto &r : repeats) {
- if (r.repeatMax.is_finite() && r.cyclic == v) {
- return true;
- }
- }
- return false;
- }
-
- NFAVertex step(NFAVertex v) const {
- typename Graph::adjacency_iterator ai, ae;
- tie(ai, ae) = adjacent_vertices(v, g);
- assert(ai != ae);
- NFAVertex next = *ai;
- if (next == v) { // Ignore self loop.
- ++ai;
- if (ai == ae) {
+ region_map.emplace(v, lhs_region);
+ }
+ }
+
+ u32 bad_region = 0;
+ if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) {
+ DEBUG_PRINTF("input dag clears repeat: sole entry\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("not sole entry\n");
+ return false;
+}
+
+namespace {
+
+template<class Graph>
+struct StrawWalker {
+ StrawWalker(const NGHolder &h_in, const Graph &g_in,
+ const vector<BoundedRepeatData> &all_repeats)
+ : h(h_in), g(g_in), repeats(all_repeats) {}
+
+ /** True if v is a cyclic that belongs to a bounded repeat (one without an
+ * inf max bound). */
+ bool isBoundedRepeatCyclic(NFAVertex v) const {
+ for (const auto &r : repeats) {
+ if (r.repeatMax.is_finite() && r.cyclic == v) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ NFAVertex step(NFAVertex v) const {
+ typename Graph::adjacency_iterator ai, ae;
+ tie(ai, ae) = adjacent_vertices(v, g);
+ assert(ai != ae);
+ NFAVertex next = *ai;
+ if (next == v) { // Ignore self loop.
+ ++ai;
+ if (ai == ae) {
return NGHolder::null_vertex();
- }
- next = *ai;
- }
- ++ai;
- if (ai != ae && *ai == v) { // Ignore self loop
- ++ai;
- }
- if (ai != ae) {
- DEBUG_PRINTF("more than one succ\n");
- set<NFAVertex> succs;
- insert(&succs, adjacent_vertices(v, g));
- succs.erase(v);
- for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
- next = *ai;
+ }
+ next = *ai;
+ }
+ ++ai;
+ if (ai != ae && *ai == v) { // Ignore self loop
+ ++ai;
+ }
+ if (ai != ae) {
+ DEBUG_PRINTF("more than one succ\n");
+ set<NFAVertex> succs;
+ insert(&succs, adjacent_vertices(v, g));
+ succs.erase(v);
+ for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
+ next = *ai;
DEBUG_PRINTF("checking %zu\n", g[next].index);
- if (next == v) {
- continue;
- }
- set<NFAVertex> lsuccs;
- insert(&lsuccs, adjacent_vertices(next, g));
-
- if (lsuccs != succs) {
- continue;
- }
-
- // Ensure that if v is in connected to accept, the reports
- // on `next` much match.
- if (is_match_vertex(v, h) && g[v].reports != g[next].reports) {
- DEBUG_PRINTF("report mismatch\n");
- continue;
- }
-
- return next;
- }
- DEBUG_PRINTF("bailing\n");
+ if (next == v) {
+ continue;
+ }
+ set<NFAVertex> lsuccs;
+ insert(&lsuccs, adjacent_vertices(next, g));
+
+ if (lsuccs != succs) {
+ continue;
+ }
+
+ // Ensure that if v is in connected to accept, the reports
+ // on `next` much match.
+ if (is_match_vertex(v, h) && g[v].reports != g[next].reports) {
+ DEBUG_PRINTF("report mismatch\n");
+ continue;
+ }
+
+ return next;
+ }
+ DEBUG_PRINTF("bailing\n");
return NGHolder::null_vertex();
- }
- return next;
- }
-
- NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const {
+ }
+ return next;
+ }
+
+ NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const {
DEBUG_PRINTF("walk from %zu\n", g[v].index);
unordered_set<NFAVertex> visited;
- straw.clear();
-
- while (!is_special(v, g)) {
+ straw.clear();
+
+ while (!is_special(v, g)) {
DEBUG_PRINTF("checking %zu\n", g[v].index);
- NFAVertex next = step(v);
+ NFAVertex next = step(v);
if (next == NGHolder::null_vertex()) {
- break;
- }
- if (!visited.insert(next).second) {
+ break;
+ }
+ if (!visited.insert(next).second) {
DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index);
- break; /* don't want to get stuck in any complicated loops */
- }
-
- const CharReach &reach_v = g[v].char_reach;
- const CharReach &reach_next = g[next].char_reach;
- if (!reach_v.isSubsetOf(reach_next)) {
+ break; /* don't want to get stuck in any complicated loops */
+ }
+
+ const CharReach &reach_v = g[v].char_reach;
+ const CharReach &reach_next = g[next].char_reach;
+ if (!reach_v.isSubsetOf(reach_next)) {
DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n",
- g[next].index, g[v].index);
- break;
- }
-
- // If this is cyclic with the right reach, we're done. Note that
- // startDs fulfils this requirement.
- if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) {
+ g[next].index, g[v].index);
+ break;
+ }
+
+ // If this is cyclic with the right reach, we're done. Note that
+ // startDs fulfils this requirement.
+ if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) {
DEBUG_PRINTF("found cyclic %zu\n", g[next].index);
- return next;
- }
-
- v = next;
- straw.push_back(v);
- }
-
- straw.clear();
+ return next;
+ }
+
+ v = next;
+ straw.push_back(v);
+ }
+
+ straw.clear();
return NGHolder::null_vertex();
- }
-
-private:
- const NGHolder &h; // underlying graph
- const Graph &g;
- const vector<BoundedRepeatData> &repeats;
-};
-
-} // namespace
-
-static
-NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v,
- const vector<BoundedRepeatData> &all_repeats,
- vector<NFAVertex> &straw) {
+ }
+
+private:
+ const NGHolder &h; // underlying graph
+ const Graph &g;
+ const vector<BoundedRepeatData> &repeats;
+};
+
+} // namespace
+
+static
+NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v,
+ const vector<BoundedRepeatData> &all_repeats,
+ vector<NFAVertex> &straw) {
typedef boost::reverse_graph<NGHolder, const NGHolder &> RevGraph;
const RevGraph revg(g);
-
- auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw);
- reverse(begin(straw), end(straw)); // path comes from cyclic
- return cyclic;
-}
-
-static
-NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v,
- const vector<BoundedRepeatData> &all_repeats,
- vector<NFAVertex> &straw) {
+
+ auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw);
+ reverse(begin(straw), end(straw)); // path comes from cyclic
+ return cyclic;
+}
+
+static
+NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v,
+ const vector<BoundedRepeatData> &all_repeats,
+ vector<NFAVertex> &straw) {
return StrawWalker<NGHolder>(g, g, all_repeats).walk(v, straw);
-}
-
-/** True if entries to this subgraph must pass through a cyclic state with
- * reachability that is a superset of the reach of the repeat, and
- * reachabilities along this path "nest" into the reaches of their
- * predecessors.
- *
- * This is what is called a 'straw' in the region code. */
-static
-bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi,
- const vector<BoundedRepeatData> &all_repeats) {
- // Cope with peeling by following a chain of single vertices backwards
- // until we encounter our cyclic, all of which must have superset reach.
- vector<NFAVertex> straw;
- return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) !=
+}
+
+/** True if entries to this subgraph must pass through a cyclic state with
+ * reachability that is a superset of the reach of the repeat, and
+ * reachabilities along this path "nest" into the reaches of their
+ * predecessors.
+ *
+ * This is what is called a 'straw' in the region code. */
+static
+bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi,
+ const vector<BoundedRepeatData> &all_repeats) {
+ // Cope with peeling by following a chain of single vertices backwards
+ // until we encounter our cyclic, all of which must have superset reach.
+ vector<NFAVertex> straw;
+ return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) !=
NGHolder::null_vertex();
-}
-
-static
-bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi,
- const vector<BoundedRepeatData> &all_repeats) {
- vector<NFAVertex> straw;
- return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) !=
+}
+
+static
+bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi,
+ const vector<BoundedRepeatData> &all_repeats) {
+ vector<NFAVertex> straw;
+ return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) !=
NGHolder::null_vertex();
-}
-
-static
-bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) {
- const NFAVertex u = rsi.vertices.back();
- for (auto v : adjacent_vertices_range(u, g)) {
- if (v != g.accept) {
- return false;
- }
- }
- assert(out_degree(u, g));
- return true;
-}
-
-static
-bool allSimpleHighlander(const ReportManager &rm,
- const flat_set<ReportID> &reports) {
- assert(!reports.empty());
- for (auto report : reports) {
- if (!isSimpleExhaustible(rm.getReport(report))) {
- return false;
- }
- }
-
- return true;
-}
-
-// Finds a single, fairly unrefined trigger for the repeat by walking backwards
-// and collecting the unioned reach at each step.
-static
-vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) {
- const size_t MAX_TRIGGER_STEPS = 32;
-
- vector<CharReach> trigger;
-
+}
+
+static
+bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) {
+ const NFAVertex u = rsi.vertices.back();
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (v != g.accept) {
+ return false;
+ }
+ }
+ assert(out_degree(u, g));
+ return true;
+}
+
+static
+bool allSimpleHighlander(const ReportManager &rm,
+ const flat_set<ReportID> &reports) {
+ assert(!reports.empty());
+ for (auto report : reports) {
+ if (!isSimpleExhaustible(rm.getReport(report))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Finds a single, fairly unrefined trigger for the repeat by walking backwards
+// and collecting the unioned reach at each step.
+static
+vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) {
+ const size_t MAX_TRIGGER_STEPS = 32;
+
+ vector<CharReach> trigger;
+
flat_set<NFAVertex> curr, next;
- insert(&curr, inv_adjacent_vertices(v, g));
-
- if (contains(curr, g.start)) {
- DEBUG_PRINTF("start in repeat's immediate preds\n");
- trigger.push_back(CharReach::dot()); // Trigger could be anything!
- return trigger;
- }
-
- for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) {
- next.clear();
- trigger.push_back(CharReach());
- CharReach &cr = trigger.back();
-
- for (auto v_c : curr) {
- cr |= g[v_c].char_reach;
- insert(&next, inv_adjacent_vertices(v_c, g));
- }
-
- DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str());
-
- if (next.empty() || contains(next, g.start)) {
- break;
- }
-
- curr.swap(next);
- }
-
- reverse(trigger.begin(), trigger.end());
- return trigger;
-}
-
-static
-vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g,
- const NFAVertex sink) {
- const size_t MAX_TRIGGER_STEPS = 32;
- const size_t UNIONED_FALLBACK_THRESHOLD = 100;
-
- using Path = deque<NFAVertex>;
-
- vector<vector<CharReach>> triggers;
-
- deque<Path> q; // work queue
- deque<Path> done; // finished paths
-
- size_t max_len = MAX_TRIGGER_STEPS;
-
- // Find a set of paths leading to vertex v by depth first search.
-
- for (auto u : inv_adjacent_vertices_range(sink, g)) {
- if (is_any_start(u, g)) {
- triggers.push_back({}); // empty
- return triggers;
- }
- q.push_back(Path(1, u));
- }
-
- while (!q.empty()) {
- Path &path = q.front();
- NFAVertex v = path.back();
-
- if (path.size() >= max_len) {
- max_len = min(max_len, path.size());
- done.push_back(path);
- goto next_path;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (is_any_start(u, g)) {
- // Found an accept. There's no point expanding this path any
- // further, we're done.
- max_len = min(max_len, path.size());
- done.push_back(path);
- goto next_path;
- }
-
- if (path.size() + 1 >= max_len) {
- done.push_back(path);
- done.back().push_back(u);
- } else {
- q.push_back(path); // copy
- q.back().push_back(u);
- }
- }
-
- next_path:
- q.pop_front();
-
- // If our queue or our finished trigger list gets too large, fall back
- // to generating a single trigger with union reach.
- if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) {
- DEBUG_PRINTF("search too large, fall back to union trigger\n");
- triggers.clear();
- triggers.push_back(getUnionedTrigger(g, sink));
- return triggers;
- }
- }
-
- assert(!done.empty());
-
- // Convert our path list into a set of unique triggers.
+ insert(&curr, inv_adjacent_vertices(v, g));
+
+ if (contains(curr, g.start)) {
+ DEBUG_PRINTF("start in repeat's immediate preds\n");
+ trigger.push_back(CharReach::dot()); // Trigger could be anything!
+ return trigger;
+ }
+
+ for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) {
+ next.clear();
+ trigger.push_back(CharReach());
+ CharReach &cr = trigger.back();
+
+ for (auto v_c : curr) {
+ cr |= g[v_c].char_reach;
+ insert(&next, inv_adjacent_vertices(v_c, g));
+ }
+
+ DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str());
+
+ if (next.empty() || contains(next, g.start)) {
+ break;
+ }
+
+ curr.swap(next);
+ }
+
+ reverse(trigger.begin(), trigger.end());
+ return trigger;
+}
+
+static
+vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g,
+ const NFAVertex sink) {
+ const size_t MAX_TRIGGER_STEPS = 32;
+ const size_t UNIONED_FALLBACK_THRESHOLD = 100;
+
+ using Path = deque<NFAVertex>;
+
+ vector<vector<CharReach>> triggers;
+
+ deque<Path> q; // work queue
+ deque<Path> done; // finished paths
+
+ size_t max_len = MAX_TRIGGER_STEPS;
+
+ // Find a set of paths leading to vertex v by depth first search.
+
+ for (auto u : inv_adjacent_vertices_range(sink, g)) {
+ if (is_any_start(u, g)) {
+ triggers.push_back({}); // empty
+ return triggers;
+ }
+ q.push_back(Path(1, u));
+ }
+
+ while (!q.empty()) {
+ Path &path = q.front();
+ NFAVertex v = path.back();
+
+ if (path.size() >= max_len) {
+ max_len = min(max_len, path.size());
+ done.push_back(path);
+ goto next_path;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (is_any_start(u, g)) {
+ // Found an accept. There's no point expanding this path any
+ // further, we're done.
+ max_len = min(max_len, path.size());
+ done.push_back(path);
+ goto next_path;
+ }
+
+ if (path.size() + 1 >= max_len) {
+ done.push_back(path);
+ done.back().push_back(u);
+ } else {
+ q.push_back(path); // copy
+ q.back().push_back(u);
+ }
+ }
+
+ next_path:
+ q.pop_front();
+
+ // If our queue or our finished trigger list gets too large, fall back
+ // to generating a single trigger with union reach.
+ if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) {
+ DEBUG_PRINTF("search too large, fall back to union trigger\n");
+ triggers.clear();
+ triggers.push_back(getUnionedTrigger(g, sink));
+ return triggers;
+ }
+ }
+
+ assert(!done.empty());
+
+ // Convert our path list into a set of unique triggers.
ue2_unordered_set<vector<CharReach>> unique_triggers;
- for (const auto &path : done) {
- vector<CharReach> reach_path;
- for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) {
- reach_path.push_back(g[*jt].char_reach);
- }
- unique_triggers.insert(reach_path);
- }
-
- insert(&triggers, triggers.end(), unique_triggers);
- sort(triggers.begin(), triggers.end());
- DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(),
- max_len);
- return triggers;
-}
-
-static
-void findMinPeriod(const NGHolder &g,
- const map<u32, vector<vector<CharReach>>> &triggers,
- ReachSubgraph &rsi) {
- const auto v = rsi.vertices.front();
- const CharReach &cr = g[v].char_reach;
-
- vector<vector<CharReach>> repeat_triggers;
-
- if (is_triggered(g)) {
- // Construct a temporary copy of the graph that also contains its
- // triggers, potentially lengthening the repeat's triggers.
- NGHolder tg;
- unordered_map<NFAVertex, NFAVertex> tg_map;
- cloneHolder(tg, g, &tg_map);
- addTriggers(tg, triggers);
- assert(contains(tg_map, v));
- repeat_triggers = getRepeatTriggers(tg, tg_map.at(v));
- } else {
- // Not triggered, no need to mutate the graph.
- repeat_triggers = getRepeatTriggers(g, v);
- }
-
- rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset);
- DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n",
- repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset);
-}
-
-static
-void
-selectHistoryScheme(const NGHolder &g, const ReportManager *rm,
- ReachSubgraph &rsi,
+ for (const auto &path : done) {
+ vector<CharReach> reach_path;
+ for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) {
+ reach_path.push_back(g[*jt].char_reach);
+ }
+ unique_triggers.insert(reach_path);
+ }
+
+ insert(&triggers, triggers.end(), unique_triggers);
+ sort(triggers.begin(), triggers.end());
+ DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(),
+ max_len);
+ return triggers;
+}
+
+static
+void findMinPeriod(const NGHolder &g,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ ReachSubgraph &rsi) {
+ const auto v = rsi.vertices.front();
+ const CharReach &cr = g[v].char_reach;
+
+ vector<vector<CharReach>> repeat_triggers;
+
+ if (is_triggered(g)) {
+ // Construct a temporary copy of the graph that also contains its
+ // triggers, potentially lengthening the repeat's triggers.
+ NGHolder tg;
+ unordered_map<NFAVertex, NFAVertex> tg_map;
+ cloneHolder(tg, g, &tg_map);
+ addTriggers(tg, triggers);
+ assert(contains(tg_map, v));
+ repeat_triggers = getRepeatTriggers(tg, tg_map.at(v));
+ } else {
+ // Not triggered, no need to mutate the graph.
+ repeat_triggers = getRepeatTriggers(g, v);
+ }
+
+ rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset);
+ DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n",
+ repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset);
+}
+
+static
+void
+selectHistoryScheme(const NGHolder &g, const ReportManager *rm,
+ ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- const vector<BoundedRepeatData> &all_repeats,
- const bool simple_model_selection) {
- // {N,} cases use the FIRST history mechanism.
- if (rsi.repeatMax.is_infinite()) {
- DEBUG_PRINTF("selected FIRST history\n");
- rsi.historyType = REPEAT_FIRST;
- return;
- }
-
- /* If we have a repeat which only raises a highlander, only the first match
- * matters */
- if (rm && leadsOnlyToAccept(g, rsi)
- && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) {
- DEBUG_PRINTF("selected FIRST history (as highlander)\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* for consistency */
- return;
- }
-
- // {N,M} cases can use the FIRST mechanism if they follow a cyclic which
- // includes their reachability via a "straw" path. (see UE-1589)
- if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
- "superset of reach\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
- return;
- }
-
- // Similarly, {N,M} cases can use the FIRST mechanism if they precede a
- // cyclic which includes their reachability via a "straw" path.
- if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
- "superset of reach\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
- return;
- }
-
- // Could have skip edges and therefore be a {0,N} repeat.
- if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) {
- DEBUG_PRINTF("selected LAST history\n");
- rsi.historyType = REPEAT_LAST;
- return;
- }
-
- // Fill minPeriod, is_reset flags
- findMinPeriod(g, triggers, rsi);
-
- // If we can't re-enter this cyclic state, we have a reset case.
- // This check can be very expensive, so we don't do it if we've been asked
- // for simple model selection.
- if (!simple_model_selection && !rsi.is_reset &&
- hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) {
- DEBUG_PRINTF("repeat is sole entry -> reset\n");
- rsi.is_reset = true;
- }
-
- // We can lean on the common selection code for the remainder of our repeat
- // models.
- rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax,
- rsi.minPeriod, rsi.is_reset);
-}
-
-static
-void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ const vector<BoundedRepeatData> &all_repeats,
+ const bool simple_model_selection) {
+ // {N,} cases use the FIRST history mechanism.
+ if (rsi.repeatMax.is_infinite()) {
+ DEBUG_PRINTF("selected FIRST history\n");
+ rsi.historyType = REPEAT_FIRST;
+ return;
+ }
+
+ /* If we have a repeat which only raises a highlander, only the first match
+ * matters */
+ if (rm && leadsOnlyToAccept(g, rsi)
+ && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) {
+ DEBUG_PRINTF("selected FIRST history (as highlander)\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* for consistency */
+ return;
+ }
+
+ // {N,M} cases can use the FIRST mechanism if they follow a cyclic which
+ // includes their reachability via a "straw" path. (see UE-1589)
+ if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
+ "superset of reach\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
+ return;
+ }
+
+ // Similarly, {N,M} cases can use the FIRST mechanism if they precede a
+ // cyclic which includes their reachability via a "straw" path.
+ if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
+ "superset of reach\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
+ return;
+ }
+
+ // Could have skip edges and therefore be a {0,N} repeat.
+ if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) {
+ DEBUG_PRINTF("selected LAST history\n");
+ rsi.historyType = REPEAT_LAST;
+ return;
+ }
+
+ // Fill minPeriod, is_reset flags
+ findMinPeriod(g, triggers, rsi);
+
+ // If we can't re-enter this cyclic state, we have a reset case.
+ // This check can be very expensive, so we don't do it if we've been asked
+ // for simple model selection.
+ if (!simple_model_selection && !rsi.is_reset &&
+ hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) {
+ DEBUG_PRINTF("repeat is sole entry -> reset\n");
+ rsi.is_reset = true;
+ }
+
+ // We can lean on the common selection code for the remainder of our repeat
+ // models.
+ rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax,
+ rsi.minPeriod, rsi.is_reset);
+}
+
+static
+void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<NFAVertex> &straw) {
- if (!g[rd.cyclic].char_reach.all()) {
- // Create another cyclic feeder state with flipped reach. It has an
- // edge from the repeat's cyclic state and pos_trigger, an edge to the
- // straw, and edges from every vertex along the straw.
- NFAVertex feeder = clone_vertex(g, rd.cyclic);
- created.insert(feeder);
- g[feeder].char_reach.flip();
- add_edge(feeder, feeder, g);
- add_edge(rd.pos_trigger, feeder, g);
- add_edge(rd.cyclic, feeder, g);
- add_edge(feeder, straw.front(), g);
-
- // An edge from every vertex in the straw.
- for (auto v : straw) {
- add_edge(v, feeder, g);
- }
-
- // An edge to the feeder from the first vertex in the straw and all of
- // its predecessors (other than the feeder itself, we've already
- // created that edge!)
- for (auto u : inv_adjacent_vertices_range(straw.front(), g)) {
- if (u == feeder) {
- continue;
- }
- add_edge(u, feeder, g);
- }
-
+ const vector<NFAVertex> &straw) {
+ if (!g[rd.cyclic].char_reach.all()) {
+ // Create another cyclic feeder state with flipped reach. It has an
+ // edge from the repeat's cyclic state and pos_trigger, an edge to the
+ // straw, and edges from every vertex along the straw.
+ NFAVertex feeder = clone_vertex(g, rd.cyclic);
+ created.insert(feeder);
+ g[feeder].char_reach.flip();
+ add_edge(feeder, feeder, g);
+ add_edge(rd.pos_trigger, feeder, g);
+ add_edge(rd.cyclic, feeder, g);
+ add_edge(feeder, straw.front(), g);
+
+ // An edge from every vertex in the straw.
+ for (auto v : straw) {
+ add_edge(v, feeder, g);
+ }
+
+ // An edge to the feeder from the first vertex in the straw and all of
+ // its predecessors (other than the feeder itself, we've already
+ // created that edge!)
+ for (auto u : inv_adjacent_vertices_range(straw.front(), g)) {
+ if (u == feeder) {
+ continue;
+ }
+ add_edge(u, feeder, g);
+ }
+
DEBUG_PRINTF("added feeder %zu\n", g[feeder].index);
- } else {
- // No neg trigger means feeder is empty, and unnecessary.
- assert(g[rd.pos_trigger].char_reach.all());
- }
-}
-
-/**
- * If we have a leading first repeat, we can split startDs so that it is not
- * cyclic so that the repeat is only triggered once, rather than every byte. If we
- * perform this transform we must create another cyclic state to retrigger the
- * repeat after we see an escape for the repeat.
- *
- * We do not use the anchored start state to allow us to restart the NFA at a deep
- * offset.
- */
-static
-bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
+ } else {
+ // No neg trigger means feeder is empty, and unnecessary.
+ assert(g[rd.pos_trigger].char_reach.all());
+ }
+}
+
+/**
+ * If we have a leading first repeat, we can split startDs so that it is not
+ * cyclic so that the repeat is only triggered once, rather than every byte. If we
+ * perform this transform we must create another cyclic state to retrigger the
+ * repeat after we see an escape for the repeat.
+ *
+ * We do not use the anchored start state to allow us to restart the NFA at a deep
+ * offset.
+ */
+static
+bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<BoundedRepeatData> &all_repeats) {
- assert(edge(g.startDs, g.startDs, g).second);
-
- // UE-1617: can rewire FIRST history cases that are preceded by
- // startDs.
- if (rd.type != REPEAT_FIRST) {
- return false;
- }
-
- const CharReach &cyc_cr = g[rd.cyclic].char_reach;
-
- // This transformation is only worth doing if this would allow us to
- // accelerate the cyclic state (UE-2055).
- if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("we wouldn't be able to accel this case\n");
- return false;
- }
-
- vector<NFAVertex> straw;
- NFAVertex pred =
- walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
- if (pred != g.startDs) {
- DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
- return false;
- }
-
- // This transformation is only safe if the straw path from startDs that
- // we've discovered can *only* lead to this repeat, since we're going to
- // remove the self-loop on startDs.
+ const vector<BoundedRepeatData> &all_repeats) {
+ assert(edge(g.startDs, g.startDs, g).second);
+
+ // UE-1617: can rewire FIRST history cases that are preceded by
+ // startDs.
+ if (rd.type != REPEAT_FIRST) {
+ return false;
+ }
+
+ const CharReach &cyc_cr = g[rd.cyclic].char_reach;
+
+ // This transformation is only worth doing if this would allow us to
+ // accelerate the cyclic state (UE-2055).
+ if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("we wouldn't be able to accel this case\n");
+ return false;
+ }
+
+ vector<NFAVertex> straw;
+ NFAVertex pred =
+ walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
+ if (pred != g.startDs) {
+ DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
+ return false;
+ }
+
+ // This transformation is only safe if the straw path from startDs that
+ // we've discovered can *only* lead to this repeat, since we're going to
+ // remove the self-loop on startDs.
if (proper_out_degree(g.startDs, g) > 1) {
- DEBUG_PRINTF("startDs has other successors\n");
- return false;
- }
- for (const auto &v : straw) {
- if (proper_out_degree(v, g) != 1) {
+ DEBUG_PRINTF("startDs has other successors\n");
+ return false;
+ }
+ for (const auto &v : straw) {
+ if (proper_out_degree(v, g) != 1) {
DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n",
- g[v].index);
- return false;
- }
- }
-
- if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("entry is narrow, could be accelerable\n");
- return false;
- }
-
- assert(!straw.empty());
-
- /* If there is overlap between the feeder and the first vertex in the straw
- * fun things happen. TODO: handle fun things happening (requires more
- * edges and more vertices). */
- if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
- DEBUG_PRINTF("straw has `interesting' reach\n");
- return false;
- }
-
- DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n");
-
- // Remove the self-loop on startDs! What a blast!
- remove_edge(g.startDs, g.startDs, g);
-
- // Wire up feeder state to straw.
- buildFeeder(g, rd, created, straw);
-
- return true;
-}
-
-static
-vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
- const vector<NFAVertex> &straw) {
- // Straw runs from startDs to our pos trigger.
- assert(!straw.empty());
- assert(edge(g.startDs, straw.front(), g).second);
- assert(edge(straw.back(), rd.pos_trigger, g).second);
-
- vector<NFAVertex> own_straw;
- for (const auto &v : straw) {
- NFAVertex v2 = clone_vertex(g, v);
- if (hasSelfLoop(v, g)) {
- add_edge(v2, v2, g);
- }
- if (!own_straw.empty()) {
- add_edge(own_straw.back(), v2, g);
- }
- own_straw.push_back(v2);
- }
-
- // Wire our straw to start, not startDs.
- add_edge(g.start, own_straw.front(), g);
-
- // Swap over to using our own straw to get to the POS trigger.
- remove_edge(straw.back(), rd.pos_trigger, g);
- add_edge(own_straw.back(), rd.pos_trigger, g);
-
- return own_straw;
-}
-
-/**
- * Specialized version of improveLeadingRepeat for outfixes, in which we can
- * rewire the straw to start instead of removing the startDs self-loop.
- */
-static
-bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
+ g[v].index);
+ return false;
+ }
+ }
+
+ if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("entry is narrow, could be accelerable\n");
+ return false;
+ }
+
+ assert(!straw.empty());
+
+ /* If there is overlap between the feeder and the first vertex in the straw
+ * fun things happen. TODO: handle fun things happening (requires more
+ * edges and more vertices). */
+ if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
+ DEBUG_PRINTF("straw has `interesting' reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n");
+
+ // Remove the self-loop on startDs! What a blast!
+ remove_edge(g.startDs, g.startDs, g);
+
+ // Wire up feeder state to straw.
+ buildFeeder(g, rd, created, straw);
+
+ return true;
+}
+
+static
+vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
+ const vector<NFAVertex> &straw) {
+ // Straw runs from startDs to our pos trigger.
+ assert(!straw.empty());
+ assert(edge(g.startDs, straw.front(), g).second);
+ assert(edge(straw.back(), rd.pos_trigger, g).second);
+
+ vector<NFAVertex> own_straw;
+ for (const auto &v : straw) {
+ NFAVertex v2 = clone_vertex(g, v);
+ if (hasSelfLoop(v, g)) {
+ add_edge(v2, v2, g);
+ }
+ if (!own_straw.empty()) {
+ add_edge(own_straw.back(), v2, g);
+ }
+ own_straw.push_back(v2);
+ }
+
+ // Wire our straw to start, not startDs.
+ add_edge(g.start, own_straw.front(), g);
+
+ // Swap over to using our own straw to get to the POS trigger.
+ remove_edge(straw.back(), rd.pos_trigger, g);
+ add_edge(own_straw.back(), rd.pos_trigger, g);
+
+ return own_straw;
+}
+
+/**
+ * Specialized version of improveLeadingRepeat for outfixes, in which we can
+ * rewire the straw to start instead of removing the startDs self-loop.
+ */
+static
+bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<BoundedRepeatData> &all_repeats) {
- assert(g.kind == NFA_OUTFIX);
-
- // UE-1617: can rewire FIRST history cases that are preceded by
- // startDs.
- if (rd.type != REPEAT_FIRST) {
- return false;
- }
-
- const CharReach &cyc_cr = g[rd.cyclic].char_reach;
-
- // This transformation is only worth doing if this would allow us to
- // accelerate the cyclic state (UE-2055).
- if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("we wouldn't be able to accel this case\n");
- return false;
- }
-
- vector<NFAVertex> straw;
- NFAVertex pred =
- walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
- if (pred != g.startDs) {
- DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
- return false;
- }
-
- if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("entry is narrow, could be accelerable\n");
- return false;
- }
-
- assert(!straw.empty());
-
- /* If there is overlap between the feeder and the first vertex in the straw
- * fun things happen. TODO: handle fun things happening (requires more
- * edges and more vertices). */
- if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
- DEBUG_PRINTF("straw has `interesting' reach\n");
- return false;
- }
-
- DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n");
-
- const auto own_straw = makeOwnStraw(g, rd, straw);
- insert(&created, own_straw);
-
- // Wire up feeder state to our new straw.
- buildFeeder(g, rd, created, own_straw);
-
- // We may no longer need the original straw.
- pruneUseless(g);
-
- return true;
-}
-
-/** Returns true if doing the bounded repeat transformation on this case
- * results in a smaller NFA model. */
-static
-bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) {
- static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow.
-
- // We use vertex count as an upper bound for the number of states.
- u32 curr_states = num_vertices(g) - 2; // accepts don't have states
-
- if (curr_states <= MAX_FAST_STATES) {
- return false;
- }
- if (curr_states > NFA_MAX_STATES) {
- return true;
- }
-
- u32 expected_states = curr_states;
- for (const auto &rsi : rs) {
- /* may be off as unpeeling not done yet */
- expected_states += 2; /* cyclic and pos */
- expected_states -= rsi.vertices.size();
- }
-
- return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128);
-}
-
-/** True if this repeat terminates with a vertex that leads only to accept. */
-static
-bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- return getSoleDestVertex(g, last) == g.accept;
-}
-
-static
-bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- return getSoleDestVertex(g, last) == g.acceptEod;
-}
-
-namespace {
-class pfti_visitor : public boost::default_dfs_visitor {
-public:
+ const vector<BoundedRepeatData> &all_repeats) {
+ assert(g.kind == NFA_OUTFIX);
+
+ // UE-1617: can rewire FIRST history cases that are preceded by
+ // startDs.
+ if (rd.type != REPEAT_FIRST) {
+ return false;
+ }
+
+ const CharReach &cyc_cr = g[rd.cyclic].char_reach;
+
+ // This transformation is only worth doing if this would allow us to
+ // accelerate the cyclic state (UE-2055).
+ if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("we wouldn't be able to accel this case\n");
+ return false;
+ }
+
+ vector<NFAVertex> straw;
+ NFAVertex pred =
+ walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
+ if (pred != g.startDs) {
+ DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
+ return false;
+ }
+
+ if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("entry is narrow, could be accelerable\n");
+ return false;
+ }
+
+ assert(!straw.empty());
+
+ /* If there is overlap between the feeder and the first vertex in the straw
+ * fun things happen. TODO: handle fun things happening (requires more
+ * edges and more vertices). */
+ if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
+ DEBUG_PRINTF("straw has `interesting' reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n");
+
+ const auto own_straw = makeOwnStraw(g, rd, straw);
+ insert(&created, own_straw);
+
+ // Wire up feeder state to our new straw.
+ buildFeeder(g, rd, created, own_straw);
+
+ // We may no longer need the original straw.
+ pruneUseless(g);
+
+ return true;
+}
+
+/** Returns true if doing the bounded repeat transformation on this case
+ * results in a smaller NFA model. */
+static
+bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) {
+ static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow.
+
+ // We use vertex count as an upper bound for the number of states.
+ u32 curr_states = num_vertices(g) - 2; // accepts don't have states
+
+ if (curr_states <= MAX_FAST_STATES) {
+ return false;
+ }
+ if (curr_states > NFA_MAX_STATES) {
+ return true;
+ }
+
+ u32 expected_states = curr_states;
+ for (const auto &rsi : rs) {
+ /* may be off as unpeeling not done yet */
+ expected_states += 2; /* cyclic and pos */
+ expected_states -= rsi.vertices.size();
+ }
+
+ return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128);
+}
+
+/** True if this repeat terminates with a vertex that leads only to accept. */
+static
+bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ return getSoleDestVertex(g, last) == g.accept;
+}
+
+static
+bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ return getSoleDestVertex(g, last) == g.acceptEod;
+}
+
+namespace {
+class pfti_visitor : public boost::default_dfs_visitor {
+public:
pfti_visitor(unordered_map<NFAVertex, depth> &top_depths_in,
- const depth &our_depth_in)
- : top_depths(top_depths_in), our_depth(our_depth_in) {}
-
+ const depth &our_depth_in)
+ : top_depths(top_depths_in), our_depth(our_depth_in) {}
+
void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) {
DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index,
- our_depth.str().c_str());
-
- auto it = top_depths.find(v);
- if (it != top_depths.end() && it->second != our_depth) {
- // already seen at a different depth, remove from consideration.
- it->second = depth::infinity();
- } else {
- top_depths[v] = our_depth;
- }
- }
+ our_depth.str().c_str());
+
+ auto it = top_depths.find(v);
+ if (it != top_depths.end() && it->second != our_depth) {
+ // already seen at a different depth, remove from consideration.
+ it->second = depth::infinity();
+ } else {
+ top_depths[v] = our_depth;
+ }
+ }
unordered_map<NFAVertex, depth> &top_depths;
- const depth &our_depth;
-};
-} // namespace
-
-static
-void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
- const NGHolder &g,
+ const depth &our_depth;
+};
+} // namespace
+
+static
+void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
+ const NGHolder &g,
unordered_set<NFAVertex> *reached_by_fixed_tops) {
- if (fixed_depth_tops.empty()) {
- return; /* we will never find anything */
- }
-
- assert(!proper_out_degree(g.startDs, g));
+ if (fixed_depth_tops.empty()) {
+ return; /* we will never find anything */
+ }
+
+ assert(!proper_out_degree(g.startDs, g));
unordered_map<NFAVertex, depth> top_depths;
auto colours = make_small_color_map(g);
-
- for (const auto &e : out_edges_range(g.start, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
-
- depth td = depth::infinity();
+
+ for (const auto &e : out_edges_range(g.start, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+
+ depth td = depth::infinity();
for (u32 top : g[e].tops) {
if (!contains(fixed_depth_tops, top)) {
td = depth::infinity();
@@ -2131,417 +2131,417 @@ void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
td = depth::infinity();
break;
}
- }
-
+ }
+
DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index,
td.str().c_str());
- /* for each vertex reachable from v update its map to reflect that it is
- * reachable from a top of depth td. */
-
+ /* for each vertex reachable from v update its map to reflect that it is
+ * reachable from a top of depth td. */
+
depth_first_visit(g, v, pfti_visitor(top_depths, td), colours);
- }
-
- for (const auto &v_depth : top_depths) {
- const NFAVertex v = v_depth.first;
- const depth &d = v_depth.second;
- if (d.is_finite()) {
+ }
+
+ for (const auto &v_depth : top_depths) {
+ const NFAVertex v = v_depth.first;
+ const depth &d = v_depth.second;
+ if (d.is_finite()) {
DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n",
- g[v].index, d.str().c_str());
- reached_by_fixed_tops->insert(v);
- }
- }
-}
-
-#ifndef NDEBUG
-/** Assertion use only. Returns true if the given bounded repeats share any
- * vertices, which we don't allow. */
-static
-bool hasOverlappingRepeats(UNUSED const NGHolder &g,
- const vector<BoundedRepeatData> &repeats) {
+ g[v].index, d.str().c_str());
+ reached_by_fixed_tops->insert(v);
+ }
+ }
+}
+
+#ifndef NDEBUG
+/** Assertion use only. Returns true if the given bounded repeats share any
+ * vertices, which we don't allow. */
+static
+bool hasOverlappingRepeats(UNUSED const NGHolder &g,
+ const vector<BoundedRepeatData> &repeats) {
unordered_set<NFAVertex> involved;
-
- for (const auto &br : repeats) {
- if (contains(involved, br.cyclic)) {
+
+ for (const auto &br : repeats) {
+ if (contains(involved, br.cyclic)) {
DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index);
- return true;
- }
- if (contains(involved, br.pos_trigger)) {
+ return true;
+ }
+ if (contains(involved, br.pos_trigger)) {
DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index);
- return true;
- }
- for (auto v : br.tug_triggers) {
- if (contains(involved, v)) {
+ return true;
+ }
+ for (auto v : br.tug_triggers) {
+ if (contains(involved, v)) {
DEBUG_PRINTF("already seen tug %zu\n", g[v].index);
- return true;
- }
- }
-
- involved.insert(br.cyclic);
- involved.insert(br.pos_trigger);
- involved.insert(br.tug_triggers.begin(), br.tug_triggers.end());
- }
-
- return false;
-}
-
-#endif // NDEBUG
-
-/**
- * Identifies so-called "nasty" repeats, in which the reachability of both the
- * repeat itself and its tugs are wide, which means that executing the NFA will
- * likely be bogged down in exception processing.
- */
-static
-bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi,
+ return true;
+ }
+ }
+
+ involved.insert(br.cyclic);
+ involved.insert(br.pos_trigger);
+ involved.insert(br.tug_triggers.begin(), br.tug_triggers.end());
+ }
+
+ return false;
+}
+
+#endif // NDEBUG
+
+/**
+ * Identifies so-called "nasty" repeats, in which the reachability of both the
+ * repeat itself and its tugs are wide, which means that executing the NFA will
+ * likely be bogged down in exception processing.
+ */
+static
+bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths) {
- if (num_vertices(g) > NFA_MAX_STATES) {
- // We may have no choice but to implement this repeat to get the graph
- // down to a tractable number of vertices.
- return false;
- }
-
- if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
- DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n");
- return false;
- }
-
- const NFAVertex first = rsi.vertices.front();
- DEBUG_PRINTF("min depth from startds = %s\n",
- depths.at(first).fromStartDotStar.min.str().c_str());
- if (depths.at(first).fromStartDotStar.min > depth(2)) {
- return false;
- }
-
- NFAVertex last = rsi.vertices.back();
- const CharReach &cyclicreach = g[last].char_reach;
- CharReach tugreach;
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last || is_special(v, g)) {
- continue;
- }
- tugreach |= g[v].char_reach;
- }
- // Deal with unpeeled cases.
- if (tugreach.none()) {
- tugreach = cyclicreach;
- }
- DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n",
- tugreach.count(), cyclicreach.count());
- return (tugreach.count() > 200) && (cyclicreach.count() > 200);
-}
-
-void analyseRepeats(NGHolder &g, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- vector<BoundedRepeatData> *repeats, bool streaming,
- bool simple_model_selection, const Grey &grey,
- bool *reformed_start_ds) {
- if (!grey.allowExtendedNFA || !grey.allowLimExNFA) {
- return;
- }
-
- // Quick sanity test.
- assert(allMatchStatesHaveReports(g));
-
-#ifndef NDEBUG
- // So we can assert that the number of tops hasn't changed at the end of
- // this analysis.
+ if (num_vertices(g) > NFA_MAX_STATES) {
+ // We may have no choice but to implement this repeat to get the graph
+ // down to a tractable number of vertices.
+ return false;
+ }
+
+ if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
+ DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n");
+ return false;
+ }
+
+ const NFAVertex first = rsi.vertices.front();
+ DEBUG_PRINTF("min depth from startds = %s\n",
+ depths.at(first).fromStartDotStar.min.str().c_str());
+ if (depths.at(first).fromStartDotStar.min > depth(2)) {
+ return false;
+ }
+
+ NFAVertex last = rsi.vertices.back();
+ const CharReach &cyclicreach = g[last].char_reach;
+ CharReach tugreach;
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last || is_special(v, g)) {
+ continue;
+ }
+ tugreach |= g[v].char_reach;
+ }
+ // Deal with unpeeled cases.
+ if (tugreach.none()) {
+ tugreach = cyclicreach;
+ }
+ DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n",
+ tugreach.count(), cyclicreach.count());
+ return (tugreach.count() > 200) && (cyclicreach.count() > 200);
+}
+
+void analyseRepeats(NGHolder &g, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ vector<BoundedRepeatData> *repeats, bool streaming,
+ bool simple_model_selection, const Grey &grey,
+ bool *reformed_start_ds) {
+ if (!grey.allowExtendedNFA || !grey.allowLimExNFA) {
+ return;
+ }
+
+ // Quick sanity test.
+ assert(allMatchStatesHaveReports(g));
+
+#ifndef NDEBUG
+ // So we can assert that the number of tops hasn't changed at the end of
+ // this analysis.
const flat_set<u32> allTops = getTops(g);
-#endif
-
- // Later on, we're (a little bit) dependent on depth information for
- // unpeeling and so forth. Note that these depths MUST be maintained when
- // new vertices are added.
+#endif
+
+ // Later on, we're (a little bit) dependent on depth information for
+ // unpeeling and so forth. Note that these depths MUST be maintained when
+ // new vertices are added.
unordered_map<NFAVertex, NFAVertexDepth> depths;
- findInitDepths(g, depths);
-
- // Construct our list of subgraphs with the same reach using BGL magic.
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
-
- // Validate and split subgraphs.
- checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
-
- // Identify which subgraphs represent bounded repeats in forms ("cliches")
- // that we accept, and mark the others as bad.
- for (auto &rsi: rs) {
- if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) {
- rsi.bad = true;
- continue;
- }
-
- DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
-
- // Identify repeats with wide cyclic and tug reach which will produce
- // low-performance implementations and avoid doing them.
- if (repeatIsNasty(g, rsi, depths)) {
- DEBUG_PRINTF("marking nasty repeat as bad\n");
- rsi.bad = true;
- }
- }
-
- // Remove bad cases, then sort remaining subgraphs in descending size
- // order.
- rs.erase(remove_if(rs.begin(), rs.end(),
- [](const ReachSubgraph &r) { return r.bad; }),
- rs.end());
- stable_sort(rs.begin(), rs.end(),
- [](const ReachSubgraph &a, const ReachSubgraph &b) {
- return a.vertices.size() > b.vertices.size();
- });
-
- if (!streaming && !givesBetterModel(g, rs)) {
- /* in block mode, there is no state space so we are only looking for
- * performance wins */
- DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n");
- return;
- }
-
- if (rs.empty()) {
- /* no good repeats */
- return;
- }
-
- // Store a copy of the original, unmodified graph in case we need to revert
- // back: in particular, due to tug cloning it is possible to build a graph
- // that was bigger than the original. See UE-2370. FIXME: smarter analysis
- // could make this unnecessary?
- const unique_ptr<const NGHolder> orig_g(cloneHolder(g));
-
+ findInitDepths(g, depths);
+
+ // Construct our list of subgraphs with the same reach using BGL magic.
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
+
+ // Validate and split subgraphs.
+ checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
+
+ // Identify which subgraphs represent bounded repeats in forms ("cliches")
+ // that we accept, and mark the others as bad.
+ for (auto &rsi: rs) {
+ if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) {
+ rsi.bad = true;
+ continue;
+ }
+
+ DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+
+ // Identify repeats with wide cyclic and tug reach which will produce
+ // low-performance implementations and avoid doing them.
+ if (repeatIsNasty(g, rsi, depths)) {
+ DEBUG_PRINTF("marking nasty repeat as bad\n");
+ rsi.bad = true;
+ }
+ }
+
+ // Remove bad cases, then sort remaining subgraphs in descending size
+ // order.
+ rs.erase(remove_if(rs.begin(), rs.end(),
+ [](const ReachSubgraph &r) { return r.bad; }),
+ rs.end());
+ stable_sort(rs.begin(), rs.end(),
+ [](const ReachSubgraph &a, const ReachSubgraph &b) {
+ return a.vertices.size() > b.vertices.size();
+ });
+
+ if (!streaming && !givesBetterModel(g, rs)) {
+ /* in block mode, there is no state space so we are only looking for
+ * performance wins */
+ DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n");
+ return;
+ }
+
+ if (rs.empty()) {
+ /* no good repeats */
+ return;
+ }
+
+ // Store a copy of the original, unmodified graph in case we need to revert
+ // back: in particular, due to tug cloning it is possible to build a graph
+ // that was bigger than the original. See UE-2370. FIXME: smarter analysis
+ // could make this unnecessary?
+ const unique_ptr<const NGHolder> orig_g(cloneHolder(g));
+
unordered_set<NFAVertex> reached_by_fixed_tops;
- if (is_triggered(g)) {
- populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops);
- }
-
- // Go to town on the remaining acceptable subgraphs.
+ if (is_triggered(g)) {
+ populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops);
+ }
+
+ // Go to town on the remaining acceptable subgraphs.
unordered_set<NFAVertex> created;
- for (auto &rsi : rs) {
+ for (auto &rsi : rs) {
DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n",
- g[rsi.vertices.front()].index,
- rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
-
- if (!peelSubgraph(g, grey, rsi, created)) {
- DEBUG_PRINTF("peel failed, skipping\n");
- continue;
- }
-
- // Attempt to peel a vertex if we're up against startDs, for
- // performance reasons.
- peelStartDotStar(g, depths, grey, rsi);
-
- // Our peeling passes may have killed off this repeat.
- if (rsi.bad) {
- continue;
- }
-
- selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers,
- *repeats, simple_model_selection);
-
- if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
- DEBUG_PRINTF("accepty-rosy graph\n");
- replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
- } else if (endsInAcceptEod(g, rsi)) {
- DEBUG_PRINTF("accepty-rosy graph\n");
- replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
- } else {
- replaceSubgraphWithSpecial(g, rsi, repeats, depths, created);
- }
-
- // Some of our analyses require correctly numbered vertices, so we
- // renumber after changes.
+ g[rsi.vertices.front()].index,
+ rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
+
+ if (!peelSubgraph(g, grey, rsi, created)) {
+ DEBUG_PRINTF("peel failed, skipping\n");
+ continue;
+ }
+
+ // Attempt to peel a vertex if we're up against startDs, for
+ // performance reasons.
+ peelStartDotStar(g, depths, grey, rsi);
+
+ // Our peeling passes may have killed off this repeat.
+ if (rsi.bad) {
+ continue;
+ }
+
+ selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers,
+ *repeats, simple_model_selection);
+
+ if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
+ DEBUG_PRINTF("accepty-rosy graph\n");
+ replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
+ } else if (endsInAcceptEod(g, rsi)) {
+ DEBUG_PRINTF("accepty-rosy graph\n");
+ replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
+ } else {
+ replaceSubgraphWithSpecial(g, rsi, repeats, depths, created);
+ }
+
+ // Some of our analyses require correctly numbered vertices, so we
+ // renumber after changes.
renumber_vertices(g);
- }
-
- bool modified_start_ds = false;
-
- // We may be able to make improvements to the graph for performance
- // reasons. Note that this may do 'orrible things like remove the startDs
- // cycle, this should only happen quite late in the graph lifecycle.
- if (repeats->size() == 1) {
- if (g.kind == NFA_OUTFIX) {
- improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats);
- // (Does not modify startDs, so we don't need to set
- // reformed_start_ds for this case.)
- } else {
- modified_start_ds =
- improveLeadingRepeat(g, repeats->back(), created, *repeats);
- }
- }
-
- if (reformed_start_ds) {
- *reformed_start_ds = modified_start_ds;
- }
-
- if (!repeats->empty()) {
- if (num_vertices(g) > NFA_MAX_STATES) {
- // We've managed to build an unimplementable NFA. Swap back to the
- // original.
- DEBUG_PRINTF("NFA has %zu vertices; swapping back to the "
- "original graph\n", num_vertices(g));
- clear_graph(g);
- assert(orig_g);
- cloneHolder(g, *orig_g);
- repeats->clear();
- }
-
- // Sanity test: we don't want any repeats that share special vertices
- // as our construction code later can't cope with it.
- assert(!hasOverlappingRepeats(g, *repeats));
-
- // We have modified the graph, so we need to ensure that our edges
- // and vertices are correctly numbered.
+ }
+
+ bool modified_start_ds = false;
+
+ // We may be able to make improvements to the graph for performance
+ // reasons. Note that this may do 'orrible things like remove the startDs
+ // cycle, this should only happen quite late in the graph lifecycle.
+ if (repeats->size() == 1) {
+ if (g.kind == NFA_OUTFIX) {
+ improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats);
+ // (Does not modify startDs, so we don't need to set
+ // reformed_start_ds for this case.)
+ } else {
+ modified_start_ds =
+ improveLeadingRepeat(g, repeats->back(), created, *repeats);
+ }
+ }
+
+ if (reformed_start_ds) {
+ *reformed_start_ds = modified_start_ds;
+ }
+
+ if (!repeats->empty()) {
+ if (num_vertices(g) > NFA_MAX_STATES) {
+ // We've managed to build an unimplementable NFA. Swap back to the
+ // original.
+ DEBUG_PRINTF("NFA has %zu vertices; swapping back to the "
+ "original graph\n", num_vertices(g));
+ clear_graph(g);
+ assert(orig_g);
+ cloneHolder(g, *orig_g);
+ repeats->clear();
+ }
+
+ // Sanity test: we don't want any repeats that share special vertices
+ // as our construction code later can't cope with it.
+ assert(!hasOverlappingRepeats(g, *repeats));
+
+ // We have modified the graph, so we need to ensure that our edges
+ // and vertices are correctly numbered.
renumber_vertices(g);
renumber_edges(g);
- // Remove stray report IDs.
- clearReports(g);
- }
-
- // Quick sanity tests.
- assert(allMatchStatesHaveReports(g));
- assert(!is_triggered(g) || getTops(g) == allTops);
-}
-
-/**
- * \brief True if the non-special vertices in the given graph all have the same
- * character reachability.
- */
-static
-bool allOneReach(const NGHolder &g) {
- const CharReach *cr = nullptr;
- for (const auto &v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (!cr) {
- cr = &g[v].char_reach;
- } else {
- if (*cr != g[v].char_reach) {
- return false;
- }
- }
- }
- return true;
-}
-
-bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) {
- assert(allMatchStatesHaveReports(g));
-
- DEBUG_PRINTF("entry\n");
-
- // Must be start anchored.
- assert(edge(g.startDs, g.startDs, g).second);
+ // Remove stray report IDs.
+ clearReports(g);
+ }
+
+ // Quick sanity tests.
+ assert(allMatchStatesHaveReports(g));
+ assert(!is_triggered(g) || getTops(g) == allTops);
+}
+
+/**
+ * \brief True if the non-special vertices in the given graph all have the same
+ * character reachability.
+ */
+static
+bool allOneReach(const NGHolder &g) {
+ const CharReach *cr = nullptr;
+ for (const auto &v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (!cr) {
+ cr = &g[v].char_reach;
+ } else {
+ if (*cr != g[v].char_reach) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) {
+ assert(allMatchStatesHaveReports(g));
+
+ DEBUG_PRINTF("entry\n");
+
+ // Must be start anchored.
+ assert(edge(g.startDs, g.startDs, g).second);
if (out_degree(g.startDs, g) > 1) {
- DEBUG_PRINTF("Unanchored\n");
- return false;
- }
-
- // Must not be EOD-anchored.
- assert(edge(g.accept, g.acceptEod, g).second);
+ DEBUG_PRINTF("Unanchored\n");
+ return false;
+ }
+
+ // Must not be EOD-anchored.
+ assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.acceptEod, g) > 1) {
- DEBUG_PRINTF("EOD anchored\n");
- return false;
- }
-
- // Must have precisely one top.
+ DEBUG_PRINTF("EOD anchored\n");
+ return false;
+ }
+
+ // Must have precisely one top.
if (is_triggered(g) && !onlyOneTop(g)) {
- DEBUG_PRINTF("Too many tops\n");
- return false;
- }
-
- if (!allOneReach(g)) {
- DEBUG_PRINTF("vertices with different reach\n");
- return false;
- }
-
- // We allow this code to report true for any repeat, even for '.*' or '.+'
- // cases.
- const u32 minNumVertices = 1;
-
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(g, rs, minNumVertices);
- checkReachSubgraphs(g, rs, minNumVertices);
- if (rs.size() != 1) {
- DEBUG_PRINTF("too many subgraphs\n");
- return false;
- }
-
- ReachSubgraph &rsi = *rs.begin();
- if (!processSubgraph(g, rsi, minNumVertices)) {
- DEBUG_PRINTF("not a supported repeat\n");
- return false;
- }
-
- if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) {
- DEBUG_PRINTF("repeat doesn't span graph\n");
- return false;
- }
-
- assert(!rsi.bad);
- assert(rsi.vertices.size() >= minNumVertices);
-
- const NFAVertex v = rsi.vertices.back();
-
- repeat.reach = g[v].char_reach;
- repeat.bounds.min = rsi.repeatMin;
- repeat.bounds.max = rsi.repeatMax;
- insert(&repeat.reports, g[v].reports);
-
- if (isVacuous(g)) {
- // This graph might be a {0,N} or {0,} repeat. For this to be true, we
- // must have found a {1,N} or {1,} repeat and the start vertex must
- // have the same report set as the vertices in the repeat.
- if (repeat.bounds.min == depth(1) &&
- g[g.start].reports == g[v].reports) {
+ DEBUG_PRINTF("Too many tops\n");
+ return false;
+ }
+
+ if (!allOneReach(g)) {
+ DEBUG_PRINTF("vertices with different reach\n");
+ return false;
+ }
+
+ // We allow this code to report true for any repeat, even for '.*' or '.+'
+ // cases.
+ const u32 minNumVertices = 1;
+
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(g, rs, minNumVertices);
+ checkReachSubgraphs(g, rs, minNumVertices);
+ if (rs.size() != 1) {
+ DEBUG_PRINTF("too many subgraphs\n");
+ return false;
+ }
+
+ ReachSubgraph &rsi = *rs.begin();
+ if (!processSubgraph(g, rsi, minNumVertices)) {
+ DEBUG_PRINTF("not a supported repeat\n");
+ return false;
+ }
+
+ if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) {
+ DEBUG_PRINTF("repeat doesn't span graph\n");
+ return false;
+ }
+
+ assert(!rsi.bad);
+ assert(rsi.vertices.size() >= minNumVertices);
+
+ const NFAVertex v = rsi.vertices.back();
+
+ repeat.reach = g[v].char_reach;
+ repeat.bounds.min = rsi.repeatMin;
+ repeat.bounds.max = rsi.repeatMax;
+ insert(&repeat.reports, g[v].reports);
+
+ if (isVacuous(g)) {
+ // This graph might be a {0,N} or {0,} repeat. For this to be true, we
+ // must have found a {1,N} or {1,} repeat and the start vertex must
+ // have the same report set as the vertices in the repeat.
+ if (repeat.bounds.min == depth(1) &&
+ g[g.start].reports == g[v].reports) {
repeat.bounds.min = depth(0);
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- } else {
- DEBUG_PRINTF("not a supported repeat\n");
- return false;
- }
- }
-
- assert(all_reports(g) == set<ReportID>(begin(g[v].reports),
- end(g[v].reports)));
- return true;
-}
-
-void findRepeats(const NGHolder &h, u32 minRepeatVertices,
- vector<GraphRepeatInfo> *repeats_out) {
- // Construct our list of subgraphs with the same reach using BGL magic.
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(h, rs, minRepeatVertices);
- checkReachSubgraphs(h, rs, minRepeatVertices);
-
- for (auto &rsi : rs) {
- if (!processSubgraph(h, rsi, minRepeatVertices)) {
- continue;
- }
-
- DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
-
- depth repeatMax = rsi.repeatMax;
-
- vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in
- * this path */
- if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
- "superset of reach\n");
- repeatMax = depth::infinity(); /* will continue to pump out matches */
- }
- if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
- "superset of reach\n");
- repeatMax = depth::infinity(); /* will continue to pump out matches */
- }
-
- repeats_out->push_back(GraphRepeatInfo());
- GraphRepeatInfo &ri = repeats_out->back();
- ri.vertices.swap(rsi.vertices);
- ri.repeatMin = rsi.repeatMin;
- ri.repeatMax = repeatMax;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ } else {
+ DEBUG_PRINTF("not a supported repeat\n");
+ return false;
+ }
+ }
+
+ assert(all_reports(g) == set<ReportID>(begin(g[v].reports),
+ end(g[v].reports)));
+ return true;
+}
+
+void findRepeats(const NGHolder &h, u32 minRepeatVertices,
+ vector<GraphRepeatInfo> *repeats_out) {
+ // Construct our list of subgraphs with the same reach using BGL magic.
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(h, rs, minRepeatVertices);
+ checkReachSubgraphs(h, rs, minRepeatVertices);
+
+ for (auto &rsi : rs) {
+ if (!processSubgraph(h, rsi, minRepeatVertices)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+
+ depth repeatMax = rsi.repeatMax;
+
+ vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in
+ * this path */
+ if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
+ "superset of reach\n");
+ repeatMax = depth::infinity(); /* will continue to pump out matches */
+ }
+ if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
+ "superset of reach\n");
+ repeatMax = depth::infinity(); /* will continue to pump out matches */
+ }
+
+ repeats_out->push_back(GraphRepeatInfo());
+ GraphRepeatInfo &ri = repeats_out->back();
+ ri.vertices.swap(rsi.vertices);
+ ri.repeatMin = rsi.repeatMin;
+ ri.repeatMax = repeatMax;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
index cfd804b7ef..330e33c340 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
@@ -1,160 +1,160 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat analysis.
- */
-
-#ifndef NG_REPEAT_H
-#define NG_REPEAT_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "nfa/repeat_internal.h"
-#include "util/depth.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat analysis.
+ */
+
+#ifndef NG_REPEAT_H
+#define NG_REPEAT_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "nfa/repeat_internal.h"
+#include "util/depth.h"
#include "util/flat_containers.h"
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-struct Grey;
-
-/**
- * \brief Everything you need to know about a bounded repeat that we have
- * transformed.
- */
-struct BoundedRepeatData {
- BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
- u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
- const std::vector<NFAVertex> &tug_in)
- : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
- cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
-
- BoundedRepeatData() = delete; // no default construction allowed.
-
- enum RepeatType type; //!< selected type based on bounds and structure
- depth repeatMin; //!< minimum repeat bound
- depth repeatMax; //!< maximum repeat bound
- u32 minPeriod; //!< min trigger period
- NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
- NFAVertex pos_trigger; //!< positive trigger vertex
- std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
-};
-
-/**
- * \brief Run the bounded repeat analysis and transform the graph where
- * bounded repeats are found.
- *
- * \param h
- * Graph to operate on.
- * \param rm
- * ReportManager, or nullptr if the graph's reports are internal (e.g. for
- * Rose use).
- * \param fixed_depth_tops
- * Map of top to possible trigger depth.
- * \param triggers
- * Map of top to the vector of triggers (i.e. preceding literals/masks)
- * \param repeats
- * Repeat info is filled in for caller here.
- * \param streaming
- * True if we're in streaming mode.
- * \param simple_model_selection
- * Don't perform complex (and slow) model selection analysis, e.g.
- * determining whether the repeat is sole entry.
- * \param grey
- * Grey box object.
- * \param reformed_start_ds
- * If supplied, this will be set to true if the graph was optimised for a
- * leading first repeat, resulting in the output graph having no self-loop
- * on startDs.
- */
-void analyseRepeats(NGHolder &h, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
- std::vector<BoundedRepeatData> *repeats, bool streaming,
- bool simple_model_selection, const Grey &grey,
- bool *reformed_start_ds = nullptr);
-
-/**
- * \brief Information on repeats in a holder, returned from \ref findRepeats.
- */
-struct GraphRepeatInfo {
- depth repeatMin; /**< minimum bound */
- depth repeatMax; /**< effective max bound */
- std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
-};
-
-/**
- * \brief Provides information on repeats in the graph.
- */
-void findRepeats(const NGHolder &h, u32 minRepeatVertices,
- std::vector<GraphRepeatInfo> *repeats_out);
-
-struct PureRepeat {
- CharReach reach;
- DepthMinMax bounds;
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+struct Grey;
+
+/**
+ * \brief Everything you need to know about a bounded repeat that we have
+ * transformed.
+ */
+struct BoundedRepeatData {
+ BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
+ u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
+ const std::vector<NFAVertex> &tug_in)
+ : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
+ cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
+
+ BoundedRepeatData() = delete; // no default construction allowed.
+
+ enum RepeatType type; //!< selected type based on bounds and structure
+ depth repeatMin; //!< minimum repeat bound
+ depth repeatMax; //!< maximum repeat bound
+ u32 minPeriod; //!< min trigger period
+ NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
+ NFAVertex pos_trigger; //!< positive trigger vertex
+ std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
+};
+
+/**
+ * \brief Run the bounded repeat analysis and transform the graph where
+ * bounded repeats are found.
+ *
+ * \param h
+ * Graph to operate on.
+ * \param rm
+ * ReportManager, or nullptr if the graph's reports are internal (e.g. for
+ * Rose use).
+ * \param fixed_depth_tops
+ * Map of top to possible trigger depth.
+ * \param triggers
+ * Map of top to the vector of triggers (i.e. preceding literals/masks)
+ * \param repeats
+ * Repeat info is filled in for caller here.
+ * \param streaming
+ * True if we're in streaming mode.
+ * \param simple_model_selection
+ * Don't perform complex (and slow) model selection analysis, e.g.
+ * determining whether the repeat is sole entry.
+ * \param grey
+ * Grey box object.
+ * \param reformed_start_ds
+ * If supplied, this will be set to true if the graph was optimised for a
+ * leading first repeat, resulting in the output graph having no self-loop
+ * on startDs.
+ */
+void analyseRepeats(NGHolder &h, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+ std::vector<BoundedRepeatData> *repeats, bool streaming,
+ bool simple_model_selection, const Grey &grey,
+ bool *reformed_start_ds = nullptr);
+
+/**
+ * \brief Information on repeats in a holder, returned from \ref findRepeats.
+ */
+struct GraphRepeatInfo {
+ depth repeatMin; /**< minimum bound */
+ depth repeatMax; /**< effective max bound */
+ std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
+};
+
+/**
+ * \brief Provides information on repeats in the graph.
+ */
+void findRepeats(const NGHolder &h, u32 minRepeatVertices,
+ std::vector<GraphRepeatInfo> *repeats_out);
+
+struct PureRepeat {
+ CharReach reach;
+ DepthMinMax bounds;
flat_set<ReportID> reports;
-
- bool operator==(const PureRepeat &a) const {
- return reach == a.reach && bounds == a.bounds && reports == a.reports;
- }
-
- bool operator!=(const PureRepeat &a) const { return !(*this == a); }
-
- bool operator<(const PureRepeat &a) const {
- if (reach != a.reach) {
- return reach < a.reach;
- }
- if (bounds != a.bounds) {
- return bounds < a.bounds;
- }
- return reports < a.reports;
- }
-};
-
-/**
- * \brief Returns true and fills the given PureRepeat structure if the graph is
- * wholly a repeat over a single character class.
- *
- * For example, something like:
- *
- * /^[a-z]{10,20}/
- *
- * - Note: graph must not use SDS or EOD.
- * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
- * bound on the repeat.
- */
-bool isPureRepeat(const NGHolder &h, PureRepeat &r);
-
-} // namespace ue2
-
-#endif // NG_REPEAT_H
+
+ bool operator==(const PureRepeat &a) const {
+ return reach == a.reach && bounds == a.bounds && reports == a.reports;
+ }
+
+ bool operator!=(const PureRepeat &a) const { return !(*this == a); }
+
+ bool operator<(const PureRepeat &a) const {
+ if (reach != a.reach) {
+ return reach < a.reach;
+ }
+ if (bounds != a.bounds) {
+ return bounds < a.bounds;
+ }
+ return reports < a.reports;
+ }
+};
+
+/**
+ * \brief Returns true and fills the given PureRepeat structure if the graph is
+ * wholly a repeat over a single character class.
+ *
+ * For example, something like:
+ *
+ * /^[a-z]{10,20}/
+ *
+ * - Note: graph must not use SDS or EOD.
+ * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
+ * bound on the repeat.
+ */
+bool isPureRepeat(const NGHolder &h, PureRepeat &r);
+
+} // namespace ue2
+
+#endif // NG_REPEAT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
index 4e9b498df0..ed85863b08 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions for working with Report ID sets.
- */
-#include "ng_reports.h"
-
-#include "ng_holder.h"
-#include "util/container.h"
-#include "util/compile_context.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-using namespace std;
-
-namespace ue2 {
-
-/** Returns the set of all reports in the graph. */
-set<ReportID> all_reports(const NGHolder &g) {
- set<ReportID> rv;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- insert(&rv, g[v].reports);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- insert(&rv, g[v].reports);
- }
-
- return rv;
-}
-
-/** True if *all* reports in the graph are exhaustible. */
-bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
- for (ReportID report_id : all_reports(g)) {
- if (rm.getReport(report_id).ekey == INVALID_EKEY) {
- return false;
- }
- }
-
- return true;
-}
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions for working with Report ID sets.
+ */
+#include "ng_reports.h"
+
+#include "ng_holder.h"
+#include "util/container.h"
+#include "util/compile_context.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+using namespace std;
+
+namespace ue2 {
+
+/** Returns the set of all reports in the graph. */
+set<ReportID> all_reports(const NGHolder &g) {
+ set<ReportID> rv;
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ insert(&rv, g[v].reports);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ insert(&rv, g[v].reports);
+ }
+
+ return rv;
+}
+
+/** True if *all* reports in the graph are exhaustible. */
+bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
+ for (ReportID report_id : all_reports(g)) {
+ if (rm.getReport(report_id).ekey == INVALID_EKEY) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
void set_report(NGHolder &g, ReportID internal_report) {
// First, wipe the report IDs on all vertices.
for (auto v : vertices_range(g)) {
@@ -85,22 +85,22 @@ void set_report(NGHolder &g, ReportID internal_report) {
}
}
-/** Derive a maximum offset for the graph from the max_offset values of its
- * reports. Returns MAX_OFFSET for inf. */
-u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
- u64a maxOffset = 0;
- set<ReportID> reports = all_reports(g);
- assert(!reports.empty());
-
- for (ReportID report_id : all_reports(g)) {
- const Report &ir = rm.getReport(report_id);
- if (ir.hasBounds()) {
- maxOffset = max(maxOffset, ir.maxOffset);
- } else {
- return MAX_OFFSET;
- }
- }
- return maxOffset;
-}
-
-} // namespace ue2
+/** Derive a maximum offset for the graph from the max_offset values of its
+ * reports. Returns MAX_OFFSET for inf. */
+u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
+ u64a maxOffset = 0;
+ set<ReportID> reports = all_reports(g);
+ assert(!reports.empty());
+
+ for (ReportID report_id : all_reports(g)) {
+ const Report &ir = rm.getReport(report_id);
+ if (ir.hasBounds()) {
+ maxOffset = max(maxOffset, ir.maxOffset);
+ } else {
+ return MAX_OFFSET;
+ }
+ }
+ return maxOffset;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
index 31c9530880..0f1b43c482 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
@@ -1,61 +1,61 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions for working with Report ID sets.
- */
-
-#ifndef NG_REPORTS_H
-#define NG_REPORTS_H
-
-#include "ue2common.h"
-
-#include <set>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
-/** Returns the set of all reports in the graph. */
-std::set<ReportID> all_reports(const NGHolder &g);
-
-/** True if *all* reports in the graph are exhaustible. */
-bool can_exhaust(const NGHolder &g, const ReportManager &rm);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions for working with Report ID sets.
+ */
+
+#ifndef NG_REPORTS_H
+#define NG_REPORTS_H
+
+#include "ue2common.h"
+
+#include <set>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
+/** Returns the set of all reports in the graph. */
+std::set<ReportID> all_reports(const NGHolder &g);
+
+/** True if *all* reports in the graph are exhaustible. */
+bool can_exhaust(const NGHolder &g, const ReportManager &rm);
+
/** Replaces all existing reports on the holder with the provided internal
* report id. */
void set_report(NGHolder &g, ReportID internal_report);
-/** Derive a maximum offset for the graph from the max_offset values of its
- * reports. Returns MAX_OFFSET for inf. */
-u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_REPORTS_H
+/** Derive a maximum offset for the graph from the max_offset values of its
+ * reports. Returns MAX_OFFSET for inf. */
+u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
+
+} // namespace ue2
+
+#endif // NG_REPORTS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
index 704697e57f..c746877678 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief State numbering and late graph restructuring code.
- */
-#include "ng_restructuring.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <cassert>
-
-#include <boost/graph/transpose_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Connect the start vertex to each of the vertices in \p tops. This is useful
- * temporarily for when we need to run a graph algorithm that expects a single
- * source vertex. */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief State numbering and late graph restructuring code.
+ */
+#include "ng_restructuring.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <cassert>
+
+#include <boost/graph/transpose_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Connect the start vertex to each of the vertices in \p tops. This is useful
+ * temporarily for when we need to run a graph algorithm that expects a single
+ * source vertex. */
static
void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
vector<NFAEdge> &tempEdges) {
for (NFAVertex v : tops) {
- assert(!isLeafNode(v, g));
-
+ assert(!isLeafNode(v, g));
+
const NFAEdge &e = add_edge(g.start, v, g);
tempEdges.push_back(e);
- }
-}
-
+ }
+}
+
/**
* Returns true if start's successors (aside from startDs) are subset of
* startDs's proper successors or if start has no successors other than startDs.
*/
-static
+static
bool startIsRedundant(const NGHolder &g) {
/* We ignore startDs as the self-loop may have been stripped as an
* optimisation for repeats (improveLeadingRepeats()). */
@@ -92,130 +92,130 @@ bool startIsRedundant(const NGHolder &g) {
static
void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops,
- vector<NFAVertex> &ordering) {
- // First, wire up our "tops" to start so that we have a single source,
- // which will give a nicer topo order.
+ vector<NFAVertex> &ordering) {
+ // First, wire up our "tops" to start so that we have a single source,
+ // which will give a nicer topo order.
vector<NFAEdge> tempEdges;
wireStartToTops(g, tops, tempEdges);
-
+
renumber_vertices(g);
-
- vector<NFAVertex> temp = getTopoOrdering(g);
-
+
+ vector<NFAVertex> temp = getTopoOrdering(g);
+
remove_edges(tempEdges, g);
-
- // Move {start, startDs} to the end, so they'll be first when we reverse
+
+ // Move {start, startDs} to the end, so they'll be first when we reverse
// the ordering (if they are required).
- temp.erase(remove(temp.begin(), temp.end(), g.startDs));
- temp.erase(remove(temp.begin(), temp.end(), g.start));
+ temp.erase(remove(temp.begin(), temp.end(), g.startDs));
+ temp.erase(remove(temp.begin(), temp.end(), g.start));
if (proper_out_degree(g.startDs, g)) {
temp.push_back(g.startDs);
}
if (!startIsRedundant(g)) {
temp.push_back(g.start);
}
-
- // Walk ordering, remove vertices that shouldn't be participating in state
- // numbering, such as accepts.
- for (auto v : temp) {
- if (is_any_accept(v, g)) {
- continue; // accepts don't need states
- }
-
- ordering.push_back(v);
- }
-
- // Output of topo order was in reverse.
- reverse(ordering.begin(), ordering.end());
-}
-
-// Returns the number of states.
-static
+
+ // Walk ordering, remove vertices that shouldn't be participating in state
+ // numbering, such as accepts.
+ for (auto v : temp) {
+ if (is_any_accept(v, g)) {
+ continue; // accepts don't need states
+ }
+
+ ordering.push_back(v);
+ }
+
+ // Output of topo order was in reverse.
+ reverse(ordering.begin(), ordering.end());
+}
+
+// Returns the number of states.
+static
unordered_map<NFAVertex, u32>
-getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
+getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
unordered_map<NFAVertex, u32> states;
- for (const auto &v : vertices_range(h)) {
- states[v] = NO_STATE;
- }
-
- u32 stateNum = 0;
- for (auto v : ordering) {
+ for (const auto &v : vertices_range(h)) {
+ states[v] = NO_STATE;
+ }
+
+ u32 stateNum = 0;
+ for (auto v : ordering) {
DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum,
- h[v].index);
- states[v] = stateNum++;
- }
- return states;
-}
-
-/** UE-1648: A state with a single successor that happens to be a predecessor
- * can be given any ol' state ID by the topological ordering, so we sink it
- * next to its pred. This enables better merging. */
-static
-void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
- deque<pair<NFAVertex, NFAVertex>> candidates;
-
- auto start = ordering.begin();
- for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
- NFAVertex v = *it;
- if (is_special(v, g)) {
- continue;
- }
-
- if (out_degree(v, g) == 1) {
- NFAVertex t = *(adjacent_vertices(v, g).first);
- if (v == t) {
- continue;
- }
- if (edge(t, v, g).second && find(start, it, t) != ite) {
- candidates.push_back(make_pair(v, t));
- }
- }
- }
-
- for (const auto &cand : candidates) {
- NFAVertex v = cand.first, u = cand.second;
- auto u_it = find(ordering.begin(), ordering.end(), u);
- auto v_it = find(ordering.begin(), ordering.end(), v);
-
- // Only move candidates backwards in the ordering, and only move them
- // when necessary.
- if (u_it >= v_it || distance(u_it, v_it) == 1) {
- continue;
- }
-
+ h[v].index);
+ states[v] = stateNum++;
+ }
+ return states;
+}
+
+/** UE-1648: A state with a single successor that happens to be a predecessor
+ * can be given any ol' state ID by the topological ordering, so we sink it
+ * next to its pred. This enables better merging. */
+static
+void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
+ deque<pair<NFAVertex, NFAVertex>> candidates;
+
+ auto start = ordering.begin();
+ for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
+ NFAVertex v = *it;
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (out_degree(v, g) == 1) {
+ NFAVertex t = *(adjacent_vertices(v, g).first);
+ if (v == t) {
+ continue;
+ }
+ if (edge(t, v, g).second && find(start, it, t) != ite) {
+ candidates.push_back(make_pair(v, t));
+ }
+ }
+ }
+
+ for (const auto &cand : candidates) {
+ NFAVertex v = cand.first, u = cand.second;
+ auto u_it = find(ordering.begin(), ordering.end(), u);
+ auto v_it = find(ordering.begin(), ordering.end(), v);
+
+ // Only move candidates backwards in the ordering, and only move them
+ // when necessary.
+ if (u_it >= v_it || distance(u_it, v_it) == 1) {
+ continue;
+ }
+
DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index);
-
- ordering.erase(v_it);
- ordering.insert(++u_it, v);
- }
-}
-
+
+ ordering.erase(v_it);
+ ordering.insert(++u_it, v);
+ }
+}
+
unordered_map<NFAVertex, u32>
numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) {
- DEBUG_PRINTF("numbering states for holder %p\n", &h);
-
- vector<NFAVertex> ordering;
- getStateOrdering(h, tops, ordering);
-
- optimiseTightLoops(h, ordering);
-
+ DEBUG_PRINTF("numbering states for holder %p\n", &h);
+
+ vector<NFAVertex> ordering;
+ getStateOrdering(h, tops, ordering);
+
+ optimiseTightLoops(h, ordering);
+
return getStateIndices(h, ordering);
-}
-
+}
+
u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) {
- if (state_ids.empty()) {
- return 0;
- }
-
- u32 max_state = 0;
- for (const auto &m : state_ids) {
- if (m.second != NO_STATE) {
- max_state = max(m.second, max_state);
- }
- }
- u32 num_states = max_state + 1;
-
- return num_states;
-}
-
-} // namespace ue2
+ if (state_ids.empty()) {
+ return 0;
+ }
+
+ u32 max_state = 0;
+ for (const auto &m : state_ids) {
+ if (m.second != NO_STATE) {
+ max_state = max(m.second, max_state);
+ }
+ }
+ u32 num_states = max_state + 1;
+
+ return num_states;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
index 75d19c6294..7c381748fc 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
@@ -1,64 +1,64 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief State numbering and late graph restructuring code.
*/
-
-/** \file
- * \brief State numbering and late graph restructuring code.
- */
-
-#ifndef NG_RESTRUCTURING_H
-#define NG_RESTRUCTURING_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
+
+#ifndef NG_RESTRUCTURING_H
+#define NG_RESTRUCTURING_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-
+
#include <unordered_map>
-
-namespace ue2 {
-
-/**
- * \brief Special state index value meaning that the vertex will not
- * participate in an (NFA/DFA/etc) implementation.
- */
-static constexpr u32 NO_STATE = ~0;
-
-/**
- * \brief Gives each participating vertex in the graph a unique state index.
- */
+
+namespace ue2 {
+
+/**
+ * \brief Special state index value meaning that the vertex will not
+ * participate in an (NFA/DFA/etc) implementation.
+ */
+static constexpr u32 NO_STATE = ~0;
+
+/**
+ * \brief Gives each participating vertex in the graph a unique state index.
+ */
std::unordered_map<NFAVertex, u32>
numberStates(NGHolder &h, const flat_set<NFAVertex> &tops);
-
-/**
- * \brief Counts the number of states (vertices with state indices) in the
- * graph.
- */
+
+/**
+ * \brief Counts the number of states (vertices with state indices) in the
+ * graph.
+ */
u32 countStates(const std::unordered_map<NFAVertex, u32> &state_ids);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
index 0f932668c9..bc21d3a13b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
@@ -1,299 +1,299 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Reverse acceleration analysis.
- */
-#include "ng_revacc.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "nfa/accel.h"
-#include "nfa/nfa_internal.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/graph_range.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Reverse acceleration analysis.
+ */
+#include "ng_revacc.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "nfa/accel.h"
+#include "nfa/nfa_internal.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/graph_range.h"
+
#include <set>
-using namespace std;
-
-namespace ue2 {
-
-static
-bool isPseudoNoCaseChar(const CharReach &cr) {
- return cr.count() == 2 && !(cr.find_first() & 32)
- && cr.test(cr.find_first() | 32);
-}
-
-static
-bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
- NFA *nfa) {
- DEBUG_PRINTF("pure eod triggered pattern\n");
-
- /* 2 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
- const CharReach &cr = rev_info.acceptEodReach[i];
- const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
-
- if (!nocase && cr.count() == 1 && cr2.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDEOD;
- nfa->rAccelData.array[0] = (u8)cr.find_first();
- nfa->rAccelData.array[1] = (u8)cr2.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
- nfa->rAccelOffset, nfa->rAccelData.dc);
- return true;
- } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
- && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
- assert(i < minWidth);
- if (i >= minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDEOD_NOCASE;
- nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
- nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
- nfa->rAccelOffset, nfa->rAccelData.dc);
- return true;
- }
- }
- }
-
- single:
- /* 1 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- const CharReach &cr = rev_info.acceptEodReach[i];
- if (!nocase && cr.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_REOD;
- nfa->rAccelData.c = (u8) cr.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel eod %u %02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.c);
- return true;
- } else if (nocase && isPseudoNoCaseChar(cr)) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_REOD_NOCASE;
- nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.c);
- return true;
- }
- }
- }
-
- return false;
-}
-
-static
-bool lookForFloatingSchemes(const RevAccInfo &rev_info,
- const u32 minWidth, NFA *nfa) {
- /* 2 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
- CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
- CharReach cr2 = rev_info.acceptEodReach[i - 1]
- | rev_info.acceptReach[i - 1];
- if (!nocase && cr.count() == 1 && cr2.count() == 1) {
- assert((u8)(i - 1) < minWidth);
- if (i > minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDVERM;
- nfa->rAccelData.array[0] = (u8)cr.find_first();
- nfa->rAccelData.array[1] = (u8)cr2.find_first();
- nfa->rAccelOffset = i;
- DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.array[0],
- nfa->rAccelData.array[1]);
- return true;
- } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
- && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
- assert((u8)(i - 1) < minWidth);
- if (i > minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDVERM_NOCASE;
- nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
- nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
- nfa->rAccelOffset = i;
- DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
- nfa->rAccelOffset, nfa->rAccelData.array[0],
- nfa->rAccelData.array[1]);
- return true;
- }
- }
- }
-
- single:
- /* 1 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
- if (!nocase && cr.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_RVERM;
- nfa->rAccelData.c = (u8)cr.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
- nfa->rAccelData.c);
- return true;
- } else if (nocase && isPseudoNoCaseChar(cr)) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_RVERM_NOCASE;
- nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
- nfa->rAccelData.c);
- return true;
- }
- }
- }
-
- return false;
-}
-
-void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
- u32 min_width, bool eod_only) {
- assert(nfa);
-
- if (!rev_info.valid) {
- return;
- }
-
- nfa->rAccelOffset = 1;
-
- assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
- if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
- DEBUG_PRINTF("expected path to accept\n");
- return;
- }
-
- if (rev_info.acceptReach[0].none()) {
- /* eod only */
-
- if (lookForEodSchemes(rev_info, min_width, nfa)) {
- assert(nfa->rAccelOffset <= min_width);
- return;
- }
- }
-
- if (eod_only) {
- return;
- }
-
- if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
- DEBUG_PRINTF("failed to accelerate\n");
- }
-}
-
-static
-void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
- vector<CharReach> *reach) {
- set<NFAVertex> vset;
-
- for (auto v : inv_adjacent_vertices_range(terminal, g)) {
- if (!is_special(v, g)) {
- vset.insert(v);
- }
- }
-
- for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
- set<NFAVertex> next;
-
- for (auto v : vset) {
- const CharReach &cr = g[v].char_reach;
- (*reach)[offset] |= cr;
-
- DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
- (*reach)[offset].count());
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.start || u == g.startDs) {
- /* kill all subsequent offsets by setting to dot, setting
- * to dot is in someways not accurate as there may be no
- * data at all but neither case can be accelerated */
- for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
- (*reach)[i].setall();
- }
- break;
- } else if (!is_special(u, g)) {
- next.insert(u);
- }
- }
- }
-
- swap(vset, next);
- }
-}
-
-void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
- DEBUG_PRINTF("pop rev info\n");
- populateRevAccelInfo(g, g.accept, &rai.acceptReach);
- populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
- rai.valid = true;
-}
-
-void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
- DEBUG_PRINTF("merging ra\n");
-
- dest.valid &= vic.valid;
-
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- dest.acceptReach[i] |= vic.acceptReach[i];
- dest.acceptEodReach[i] |= vic.acceptEodReach[i];
- }
-}
-
-RevAccInfo::RevAccInfo(void)
- : valid(false), acceptReach(MAX_RACCEL_OFFSET),
- acceptEodReach(MAX_RACCEL_OFFSET) {}
-
-} // namespace ue2
+using namespace std;
+
+namespace ue2 {
+
+static
+bool isPseudoNoCaseChar(const CharReach &cr) {
+ return cr.count() == 2 && !(cr.find_first() & 32)
+ && cr.test(cr.find_first() | 32);
+}
+
+static
+bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
+ NFA *nfa) {
+ DEBUG_PRINTF("pure eod triggered pattern\n");
+
+ /* 2 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
+ const CharReach &cr = rev_info.acceptEodReach[i];
+ const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
+
+ if (!nocase && cr.count() == 1 && cr2.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDEOD;
+ nfa->rAccelData.array[0] = (u8)cr.find_first();
+ nfa->rAccelData.array[1] = (u8)cr2.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
+ nfa->rAccelOffset, nfa->rAccelData.dc);
+ return true;
+ } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
+ && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDEOD_NOCASE;
+ nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
+ nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
+ nfa->rAccelOffset, nfa->rAccelData.dc);
+ return true;
+ }
+ }
+ }
+
+ single:
+ /* 1 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ const CharReach &cr = rev_info.acceptEodReach[i];
+ if (!nocase && cr.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_REOD;
+ nfa->rAccelData.c = (u8) cr.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel eod %u %02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.c);
+ return true;
+ } else if (nocase && isPseudoNoCaseChar(cr)) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_REOD_NOCASE;
+ nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.c);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static
+bool lookForFloatingSchemes(const RevAccInfo &rev_info,
+ const u32 minWidth, NFA *nfa) {
+ /* 2 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
+ CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
+ CharReach cr2 = rev_info.acceptEodReach[i - 1]
+ | rev_info.acceptReach[i - 1];
+ if (!nocase && cr.count() == 1 && cr2.count() == 1) {
+ assert((u8)(i - 1) < minWidth);
+ if (i > minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDVERM;
+ nfa->rAccelData.array[0] = (u8)cr.find_first();
+ nfa->rAccelData.array[1] = (u8)cr2.find_first();
+ nfa->rAccelOffset = i;
+ DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1]);
+ return true;
+ } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
+ && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
+ assert((u8)(i - 1) < minWidth);
+ if (i > minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDVERM_NOCASE;
+ nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
+ nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
+ nfa->rAccelOffset = i;
+ DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
+ nfa->rAccelOffset, nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1]);
+ return true;
+ }
+ }
+ }
+
+ single:
+ /* 1 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
+ if (!nocase && cr.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_RVERM;
+ nfa->rAccelData.c = (u8)cr.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
+ nfa->rAccelData.c);
+ return true;
+ } else if (nocase && isPseudoNoCaseChar(cr)) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_RVERM_NOCASE;
+ nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
+ nfa->rAccelData.c);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
+ u32 min_width, bool eod_only) {
+ assert(nfa);
+
+ if (!rev_info.valid) {
+ return;
+ }
+
+ nfa->rAccelOffset = 1;
+
+ assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
+ if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
+ DEBUG_PRINTF("expected path to accept\n");
+ return;
+ }
+
+ if (rev_info.acceptReach[0].none()) {
+ /* eod only */
+
+ if (lookForEodSchemes(rev_info, min_width, nfa)) {
+ assert(nfa->rAccelOffset <= min_width);
+ return;
+ }
+ }
+
+ if (eod_only) {
+ return;
+ }
+
+ if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
+ DEBUG_PRINTF("failed to accelerate\n");
+ }
+}
+
+static
+void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
+ vector<CharReach> *reach) {
+ set<NFAVertex> vset;
+
+ for (auto v : inv_adjacent_vertices_range(terminal, g)) {
+ if (!is_special(v, g)) {
+ vset.insert(v);
+ }
+ }
+
+ for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
+ set<NFAVertex> next;
+
+ for (auto v : vset) {
+ const CharReach &cr = g[v].char_reach;
+ (*reach)[offset] |= cr;
+
+ DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
+ (*reach)[offset].count());
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.start || u == g.startDs) {
+ /* kill all subsequent offsets by setting to dot, setting
+ * to dot is in someways not accurate as there may be no
+ * data at all but neither case can be accelerated */
+ for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
+ (*reach)[i].setall();
+ }
+ break;
+ } else if (!is_special(u, g)) {
+ next.insert(u);
+ }
+ }
+ }
+
+ swap(vset, next);
+ }
+}
+
+void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
+ DEBUG_PRINTF("pop rev info\n");
+ populateRevAccelInfo(g, g.accept, &rai.acceptReach);
+ populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
+ rai.valid = true;
+}
+
+void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
+ DEBUG_PRINTF("merging ra\n");
+
+ dest.valid &= vic.valid;
+
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ dest.acceptReach[i] |= vic.acceptReach[i];
+ dest.acceptEodReach[i] |= vic.acceptEodReach[i];
+ }
+}
+
+RevAccInfo::RevAccInfo(void)
+ : valid(false), acceptReach(MAX_RACCEL_OFFSET),
+ acceptEodReach(MAX_RACCEL_OFFSET) {}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
index bde54574cb..0ab6a338c2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Reverse acceleration analysis.
- */
-
-#ifndef NG_REVACC_H
-#define NG_REVACC_H
-
-#include "util/charreach.h"
-
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class NGHolder;
-
-#define MAX_RACCEL_OFFSET 16
-
-struct RevAccInfo {
- RevAccInfo(void);
- bool valid;
- std::vector<CharReach> acceptReach; /**< bytes which can appear n
- * bytes before a match */
- std::vector<CharReach> acceptEodReach; /**< bytes which can appear n
- * bytes before eod match */
-};
-
-void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info,
- u32 min_width, bool eod_only = false);
-
-void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g);
-void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Reverse acceleration analysis.
+ */
+
+#ifndef NG_REVACC_H
+#define NG_REVACC_H
+
+#include "util/charreach.h"
+
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class NGHolder;
+
+#define MAX_RACCEL_OFFSET 16
+
+struct RevAccInfo {
+ RevAccInfo(void);
+ bool valid;
+ std::vector<CharReach> acceptReach; /**< bytes which can appear n
+ * bytes before a match */
+ std::vector<CharReach> acceptEodReach; /**< bytes which can appear n
+ * bytes before eod match */
+};
+
+void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info,
+ u32 min_width, bool eod_only = false);
+
+void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g);
+void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
index 86528b4a00..82ee226cec 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
@@ -1,93 +1,93 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Short Exhaustible Passthroughs.
- *
- * Analysis code for determining whether a graph should be treated specially
- * because it is short and contains exhaustible reports; typically we turn
- * these into outfixes rather than risk them becoming Rose literals.
- *
- * For example, the pattern:
- *
- * /[a-f]/H
- *
- * ... is far better suited to becoming a small outfix that generates one match
- * and goes dead than being split into six one-byte Rose literals that end up
- * in the literal matcher.
- */
-#include "ng_sep.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool checkFromVertex(const NGHolder &g, NFAVertex start) {
- for (auto v : adjacent_vertices_range(start, g)) {
- if (v == g.startDs) {
- continue;
- }
-
- assert(!is_special(v, g)); /* should not be vacuous */
-
- if (!edge(g.startDs, v, g).second) { /* only floating starts */
- return false;
- } else if (out_degree(v, g) == 1
- && edge(v, g.accept, g).second) { /* only floating end */
- ; /* possible sep */
- } else {
- return false;
- }
- }
- return true;
-}
-
-bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) {
- if (!grey.mergeSEP || !can_exhaust(g, rm)) {
- return false;
- }
-
- if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) {
- return false;
- }
-
- assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g));
-
- DEBUG_PRINTF("graph is an SEP\n");
- return true;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Short Exhaustible Passthroughs.
+ *
+ * Analysis code for determining whether a graph should be treated specially
+ * because it is short and contains exhaustible reports; typically we turn
+ * these into outfixes rather than risk them becoming Rose literals.
+ *
+ * For example, the pattern:
+ *
+ * /[a-f]/H
+ *
+ * ... is far better suited to becoming a small outfix that generates one match
+ * and goes dead than being split into six one-byte Rose literals that end up
+ * in the literal matcher.
+ */
+#include "ng_sep.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool checkFromVertex(const NGHolder &g, NFAVertex start) {
+ for (auto v : adjacent_vertices_range(start, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+
+ assert(!is_special(v, g)); /* should not be vacuous */
+
+ if (!edge(g.startDs, v, g).second) { /* only floating starts */
+ return false;
+ } else if (out_degree(v, g) == 1
+ && edge(v, g.accept, g).second) { /* only floating end */
+ ; /* possible sep */
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) {
+ if (!grey.mergeSEP || !can_exhaust(g, rm)) {
+ return false;
+ }
+
+ if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) {
+ return false;
+ }
+
+ assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g));
+
+ DEBUG_PRINTF("graph is an SEP\n");
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
index 4a2bef34f7..d4195c5ef4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Short Exhaustible Passthroughs.
- */
-
-#ifndef NG_SEP_H
-#define NG_SEP_H
-
-namespace ue2 {
-
-struct Grey;
-class NGHolder;
-class ReportManager;
-
-bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Short Exhaustible Passthroughs.
+ */
+
+#ifndef NG_SEP_H
+#define NG_SEP_H
+
+namespace ue2 {
+
+struct Grey;
+class NGHolder;
+class ReportManager;
+
+bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
index 9c2d9ba38d..9c07f2087c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
@@ -1,268 +1,268 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose construction from NGHolder for cases representing small literal
- * sets.
- */
-#include "ng_small_literal_set.h"
-
-#include "grey.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose construction from NGHolder for cases representing small literal
+ * sets.
+ */
+#include "ng_small_literal_set.h"
+
+#include "grey.h"
#include "ng_holder.h"
-#include "ng_util.h"
-#include "rose/rose_build.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-#include "ue2common.h"
-
-#include <map>
-#include <set>
-#include <vector>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-/** \brief The maximum number of literals to accept per pattern. */
-static const size_t MAX_LITERAL_SET_SIZE = 30;
-
-/**
- * \brief The maximum number of literals to accept per pattern where at least
- * one is weak (has period < MIN_STRONG_PERIOD).
- */
-static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20;
-
-/**
- * \brief The minimum string period to consider a literal "strong" (and not
- * apply the weak size limit).
- */
-static const size_t MIN_STRONG_PERIOD = 3;
-
-namespace {
-
-struct sls_literal {
- bool anchored;
- bool eod;
- ue2_literal s;
-
- explicit sls_literal(bool a) : anchored(a), eod(false) {}
-
- sls_literal append(char c, bool nocase) const {
- sls_literal rv(anchored);
- rv.s = s;
- rv.s.push_back(ue2_literal::elem(c, nocase));
-
- return rv;
- }
-};
-
-static
-bool operator<(const sls_literal &a, const sls_literal &b) {
- ORDER_CHECK(anchored);
- ORDER_CHECK(eod);
- ORDER_CHECK(s);
-
- return false;
-}
-
-} // namespace
-
-static
-bool checkLongMixedSensitivityLiterals(
+#include "ng_util.h"
+#include "rose/rose_build.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+#include "ue2common.h"
+
+#include <map>
+#include <set>
+#include <vector>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+/** \brief The maximum number of literals to accept per pattern. */
+static const size_t MAX_LITERAL_SET_SIZE = 30;
+
+/**
+ * \brief The maximum number of literals to accept per pattern where at least
+ * one is weak (has period < MIN_STRONG_PERIOD).
+ */
+static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20;
+
+/**
+ * \brief The minimum string period to consider a literal "strong" (and not
+ * apply the weak size limit).
+ */
+static const size_t MIN_STRONG_PERIOD = 3;
+
+namespace {
+
+struct sls_literal {
+ bool anchored;
+ bool eod;
+ ue2_literal s;
+
+ explicit sls_literal(bool a) : anchored(a), eod(false) {}
+
+ sls_literal append(char c, bool nocase) const {
+ sls_literal rv(anchored);
+ rv.s = s;
+ rv.s.push_back(ue2_literal::elem(c, nocase));
+
+ return rv;
+ }
+};
+
+static
+bool operator<(const sls_literal &a, const sls_literal &b) {
+ ORDER_CHECK(anchored);
+ ORDER_CHECK(eod);
+ ORDER_CHECK(s);
+
+ return false;
+}
+
+} // namespace
+
+static
+bool checkLongMixedSensitivityLiterals(
const map<sls_literal, flat_set<ReportID>> &literals) {
- const size_t len = MAX_MASK2_WIDTH;
-
- for (const sls_literal &lit : literals | map_keys) {
- if (mixed_sensitivity(lit.s) && lit.s.length() > len) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool findLiterals(const NGHolder &g,
+ const size_t len = MAX_MASK2_WIDTH;
+
+ for (const sls_literal &lit : literals | map_keys) {
+ if (mixed_sensitivity(lit.s) && lit.s.length() > len) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+bool findLiterals(const NGHolder &g,
map<sls_literal, flat_set<ReportID>> *literals) {
- vector<NFAVertex> order = getTopoOrdering(g);
-
- vector<set<sls_literal>> built(num_vertices(g));
- vector<size_t> read_count(num_vertices(g));
-
- for (auto it = order.rbegin(); it != order.rend(); ++it) {
- NFAVertex v = *it;
- set<sls_literal> &out = built[g[v].index];
- read_count[g[v].index] = out_degree(v, g);
-
+ vector<NFAVertex> order = getTopoOrdering(g);
+
+ vector<set<sls_literal>> built(num_vertices(g));
+ vector<size_t> read_count(num_vertices(g));
+
+ for (auto it = order.rbegin(); it != order.rend(); ++it) {
+ NFAVertex v = *it;
+ set<sls_literal> &out = built[g[v].index];
+ read_count[g[v].index] = out_degree(v, g);
+
DEBUG_PRINTF("setting read_count to %zu for %zu\n",
- read_count[g[v].index], g[v].index);
-
- assert(out.empty());
- if (v == g.start) {
- out.insert(sls_literal(true));
- continue;
- } else if (v == g.startDs) {
- out.insert(sls_literal(false));
- continue;
- }
-
- bool eod = v == g.acceptEod;
- bool accept = v == g.accept || v == g.acceptEod;
- const CharReach &cr = g[v].char_reach;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.accept) {
- continue;
- }
-
- if (u == g.start && edge(g.startDs, v, g).second) {
- /* floating start states may have connections to start and
- * startDs - don't create duplicate anchored literals */
- DEBUG_PRINTF("skipping as floating\n");
- continue;
- }
-
- set<sls_literal> &in = built[g[u].index];
+ read_count[g[v].index], g[v].index);
+
+ assert(out.empty());
+ if (v == g.start) {
+ out.insert(sls_literal(true));
+ continue;
+ } else if (v == g.startDs) {
+ out.insert(sls_literal(false));
+ continue;
+ }
+
+ bool eod = v == g.acceptEod;
+ bool accept = v == g.accept || v == g.acceptEod;
+ const CharReach &cr = g[v].char_reach;
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.accept) {
+ continue;
+ }
+
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ /* floating start states may have connections to start and
+ * startDs - don't create duplicate anchored literals */
+ DEBUG_PRINTF("skipping as floating\n");
+ continue;
+ }
+
+ set<sls_literal> &in = built[g[u].index];
DEBUG_PRINTF("getting from %zu (%zu reads to go)\n",
- g[u].index, read_count[g[u].index]);
- assert(!in.empty());
- assert(read_count[g[u].index]);
-
- for (const sls_literal &lit : in) {
- if (accept) {
- sls_literal accept_lit = lit; // copy
- accept_lit.eod = eod;
- insert(&(*literals)[accept_lit], g[u].reports);
- continue;
- }
-
- for (size_t c = cr.find_first(); c != cr.npos;
- c = cr.find_next(c)) {
- bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
- && cr.test(mytolower(c));
-
- if (nocase && (char)c == mytolower(c)) {
- continue; /* uppercase already handled us */
- }
-
- out.insert(lit.append((u8)c, nocase));
-
- if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
- DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
- literals->size());
- return false;
- }
- }
- }
-
- read_count[g[u].index]--;
- if (!read_count[g[u].index]) {
+ g[u].index, read_count[g[u].index]);
+ assert(!in.empty());
+ assert(read_count[g[u].index]);
+
+ for (const sls_literal &lit : in) {
+ if (accept) {
+ sls_literal accept_lit = lit; // copy
+ accept_lit.eod = eod;
+ insert(&(*literals)[accept_lit], g[u].reports);
+ continue;
+ }
+
+ for (size_t c = cr.find_first(); c != cr.npos;
+ c = cr.find_next(c)) {
+ bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
+ && cr.test(mytolower(c));
+
+ if (nocase && (char)c == mytolower(c)) {
+ continue; /* uppercase already handled us */
+ }
+
+ out.insert(lit.append((u8)c, nocase));
+
+ if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
+ DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
+ literals->size());
+ return false;
+ }
+ }
+ }
+
+ read_count[g[u].index]--;
+ if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
- in.clear();
- }
- }
- }
-
- return true;
-}
-
-static
+ in.clear();
+ }
+ }
+ }
+
+ return true;
+}
+
+static
size_t min_period(const map<sls_literal, flat_set<ReportID>> &literals) {
- size_t rv = SIZE_MAX;
-
- for (const sls_literal &lit : literals | map_keys) {
- rv = min(rv, minStringPeriod(lit.s));
- }
- DEBUG_PRINTF("min period %zu\n", rv);
- return rv;
-}
-
-// If this component is just a small set of literals and can be handled by
-// Rose, feed it directly into rose.
-bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc) {
- if (!cc.grey.allowSmallLiteralSet) {
- return false;
- }
-
- if (!isAcyclic(g)) {
- /* literal sets would typically be acyclic... */
- DEBUG_PRINTF("not acyclic\n");
- return false;
- }
-
+ size_t rv = SIZE_MAX;
+
+ for (const sls_literal &lit : literals | map_keys) {
+ rv = min(rv, minStringPeriod(lit.s));
+ }
+ DEBUG_PRINTF("min period %zu\n", rv);
+ return rv;
+}
+
+// If this component is just a small set of literals and can be handled by
+// Rose, feed it directly into rose.
+bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc) {
+ if (!cc.grey.allowSmallLiteralSet) {
+ return false;
+ }
+
+ if (!isAcyclic(g)) {
+ /* literal sets would typically be acyclic... */
+ DEBUG_PRINTF("not acyclic\n");
+ return false;
+ }
+
if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) {
DEBUG_PRINTF("vertex with wide reach found\n");
return false;
}
- DEBUG_PRINTF("looking for literals\n");
-
+ DEBUG_PRINTF("looking for literals\n");
+
map<sls_literal, flat_set<ReportID>> literals;
- if (!findLiterals(g, &literals)) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- assert(!literals.empty());
-
- if (literals.size() > MAX_LITERAL_SET_SIZE) {
- /* try a mask instead */
- DEBUG_PRINTF("too many literals\n");
- return false;
- }
-
- size_t period = min_period(literals);
- if (period < MIN_STRONG_PERIOD &&
- literals.size() > MAX_WEAK_LITERAL_SET_SIZE) {
- DEBUG_PRINTF("too many literals with weak period\n");
- return false;
- }
-
- if (!checkLongMixedSensitivityLiterals(literals)) {
- DEBUG_PRINTF("long mixed\n");
- return false;
- }
-
- DEBUG_PRINTF("adding %zu literals\n", literals.size());
- for (const auto &m : literals) {
- const sls_literal &lit = m.first;
- const auto &reports = m.second;
- rose.add(lit.anchored, lit.eod, lit.s, reports);
- }
-
- return true;
-}
-
-} // namespace ue2
+ if (!findLiterals(g, &literals)) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ assert(!literals.empty());
+
+ if (literals.size() > MAX_LITERAL_SET_SIZE) {
+ /* try a mask instead */
+ DEBUG_PRINTF("too many literals\n");
+ return false;
+ }
+
+ size_t period = min_period(literals);
+ if (period < MIN_STRONG_PERIOD &&
+ literals.size() > MAX_WEAK_LITERAL_SET_SIZE) {
+ DEBUG_PRINTF("too many literals with weak period\n");
+ return false;
+ }
+
+ if (!checkLongMixedSensitivityLiterals(literals)) {
+ DEBUG_PRINTF("long mixed\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("adding %zu literals\n", literals.size());
+ for (const auto &m : literals) {
+ const sls_literal &lit = m.first;
+ const auto &reports = m.second;
+ rose.add(lit.anchored, lit.eod, lit.s, reports);
+ }
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
index e626627071..0beca09a96 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
@@ -1,50 +1,50 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose construction from NGHolder for cases representing small literal
- * sets.
- */
-
-#ifndef NG_SMALL_LITERAL_SET_H
-#define NG_SMALL_LITERAL_SET_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct CompileContext;
-
-/** \brief If the graph represents a small set of literals, feed them directly
- * to rose. Returns true if successful. */
-bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NG_SMALL_LITERAL_SET_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose construction from NGHolder for cases representing small literal
+ * sets.
+ */
+
+#ifndef NG_SMALL_LITERAL_SET_H
+#define NG_SMALL_LITERAL_SET_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct CompileContext;
+
+/** \brief If the graph represents a small set of literals, feed them directly
+ * to rose. Returns true if successful. */
+bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NG_SMALL_LITERAL_SET_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
index d23ac408b0..7383817ad8 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
@@ -1,747 +1,747 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief SOM ("Start of Match") analysis.
- */
+ * \brief SOM ("Start of Match") analysis.
+ */
#include "ng_som.h"
-#include "ng.h"
-#include "ng_dump.h"
-#include "ng_equivalence.h"
-#include "ng_execute.h"
-#include "ng_haig.h"
-#include "ng_limex.h"
-#include "ng_literal_analysis.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_reports.h"
-#include "ng_som_add_redundancy.h"
-#include "ng_som_util.h"
-#include "ng_split.h"
-#include "ng_util.h"
+#include "ng.h"
+#include "ng_dump.h"
+#include "ng_equivalence.h"
+#include "ng_execute.h"
+#include "ng_haig.h"
+#include "ng_limex.h"
+#include "ng_literal_analysis.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_reports.h"
+#include "ng_som_add_redundancy.h"
+#include "ng_som_util.h"
+#include "ng_split.h"
+#include "ng_util.h"
#include "ng_violet.h"
-#include "ng_width.h"
-#include "grey.h"
-#include "ue2common.h"
+#include "ng_width.h"
+#include "grey.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
-#include "parser/position.h"
-#include "som/som.h"
-#include "rose/rose_build.h"
-#include "rose/rose_in_util.h"
-#include "util/alloc.h"
-#include "util/compare.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <map>
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
+#include "parser/position.h"
+#include "som/som.h"
+#include "rose/rose_build.h"
+#include "rose/rose_in_util.h"
+#include "util/alloc.h"
+#include "util/compare.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static const size_t MAX_SOM_PLANS = 10;
-static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
-
-#define MAX_REV_NFA_PREFIX 80
-
-namespace {
-struct som_plan {
- som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
- u32 parent_in) : prefix(p), escapes(e), is_reset(i),
- no_implement(false), parent(parent_in) { }
- shared_ptr<NGHolder> prefix;
- CharReach escapes;
- bool is_reset;
- bool no_implement;
- u32 parent; // index of parent plan in the vector.
-
- // Reporters: a list of vertices in the graph that must be have their
- // reports updated at implementation time to report this plan's
- // som_loc_out.
- vector<NFAVertex> reporters;
-
- // Similar, but these report the som_loc_in.
- vector<NFAVertex> reporters_in;
-};
-}
-
-static
-bool regionCanEstablishSom(const NGHolder &g,
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static const size_t MAX_SOM_PLANS = 10;
+static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
+
+#define MAX_REV_NFA_PREFIX 80
+
+namespace {
+struct som_plan {
+ som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
+ u32 parent_in) : prefix(p), escapes(e), is_reset(i),
+ no_implement(false), parent(parent_in) { }
+ shared_ptr<NGHolder> prefix;
+ CharReach escapes;
+ bool is_reset;
+ bool no_implement;
+ u32 parent; // index of parent plan in the vector.
+
+ // Reporters: a list of vertices in the graph that must be have their
+ // reports updated at implementation time to report this plan's
+ // som_loc_out.
+ vector<NFAVertex> reporters;
+
+ // Similar, but these report the som_loc_in.
+ vector<NFAVertex> reporters_in;
+};
+}
+
+static
+bool regionCanEstablishSom(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const vector<NFAVertex> &r_exits,
- const vector<DepthMinMax> &depths) {
- if (region == regions.at(g.accept) ||
- region == regions.at(g.acceptEod)) {
- DEBUG_PRINTF("accept in region\n");
- return false;
- }
-
- DEBUG_PRINTF("region %u\n", region);
- for (UNUSED auto v : r_exits) {
+ const u32 region, const vector<NFAVertex> &r_exits,
+ const vector<DepthMinMax> &depths) {
+ if (region == regions.at(g.accept) ||
+ region == regions.at(g.acceptEod)) {
+ DEBUG_PRINTF("accept in region\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("region %u\n", region);
+ for (UNUSED auto v : r_exits) {
DEBUG_PRINTF(" exit %zu\n", g[v].index);
- }
-
- /* simple if each region exit is at fixed distance from SOM. Note SOM does
- not include virtual starts */
- for (auto v : r_exits) {
- assert(regions.at(v) == region);
- const DepthMinMax &d = depths.at(g[v].index);
- if (d.min != d.max) {
+ }
+
+ /* simple if each region exit is at fixed distance from SOM. Note SOM does
+ not include virtual starts */
+ for (auto v : r_exits) {
+ assert(regions.at(v) == region);
+ const DepthMinMax &d = depths.at(g[v].index);
+ if (d.min != d.max) {
DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index,
- d.min.str().c_str(), d.max.str().c_str());
- return false;
- }
- }
+ d.min.str().c_str(), d.max.str().c_str());
+ return false;
+ }
+ }
DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]),
- g[r_exits[0]].index);
-
- return true;
-}
-
-namespace {
-
-struct region_info {
- region_info() : optional(false), dag(false) {}
- vector<NFAVertex> enters;
- vector<NFAVertex> exits;
- vector<NFAVertex> full;
- bool optional; /* skip edges around region */
- bool dag; /* completely acyclic */
-};
-
-}
-
-static
-void buildRegionMapping(const NGHolder &g,
+ g[r_exits[0]].index);
+
+ return true;
+}
+
+namespace {
+
+struct region_info {
+ region_info() : optional(false), dag(false) {}
+ vector<NFAVertex> enters;
+ vector<NFAVertex> exits;
+ vector<NFAVertex> full;
+ bool optional; /* skip edges around region */
+ bool dag; /* completely acyclic */
+};
+
+}
+
+static
+void buildRegionMapping(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- map<u32, region_info> &info,
- bool include_region_0 = false) {
- for (auto v : vertices_range(g)) {
- u32 region = regions.at(v);
- if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
- continue;
- }
- assert(!region || !is_any_start(v, g));
-
- if (is_any_accept(v, g)) {
- continue;
- }
-
- if (isRegionEntry(g, v, regions)) {
- info[region].enters.push_back(v);
- }
- if (isRegionExit(g, v, regions)) {
- info[region].exits.push_back(v);
- }
- info[region].full.push_back(v);
- }
-
- for (auto &m : info) {
- if (!m.second.enters.empty()
- && isOptionalRegion(g, m.second.enters.front(), regions)) {
- m.second.optional = true;
- }
- m.second.dag = true; /* will be cleared for cyclic regions later */
- }
-
- set<NFAEdge> be;
- BackEdges<set<NFAEdge> > backEdgeVisitor(be);
+ map<u32, region_info> &info,
+ bool include_region_0 = false) {
+ for (auto v : vertices_range(g)) {
+ u32 region = regions.at(v);
+ if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
+ continue;
+ }
+ assert(!region || !is_any_start(v, g));
+
+ if (is_any_accept(v, g)) {
+ continue;
+ }
+
+ if (isRegionEntry(g, v, regions)) {
+ info[region].enters.push_back(v);
+ }
+ if (isRegionExit(g, v, regions)) {
+ info[region].exits.push_back(v);
+ }
+ info[region].full.push_back(v);
+ }
+
+ for (auto &m : info) {
+ if (!m.second.enters.empty()
+ && isOptionalRegion(g, m.second.enters.front(), regions)) {
+ m.second.optional = true;
+ }
+ m.second.dag = true; /* will be cleared for cyclic regions later */
+ }
+
+ set<NFAEdge> be;
+ BackEdges<set<NFAEdge> > backEdgeVisitor(be);
boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
-
- for (const auto &e : be) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- if (is_special(u, g) || is_special(v, g)) {
- assert(is_special(u, g) && is_special(v, g));
- continue;
- }
- u32 r = regions.at(v);
- assert(regions.at(u) == r);
- info[r].dag = false;
- }
-
- if (include_region_0) {
- info[0].dag = false;
- }
-
- #ifdef DEBUG
- for (const auto &m : info) {
- u32 r = m.first;
- const region_info &r_i = m.second;
- DEBUG_PRINTF("region %u:%s%s\n", r,
- r_i.dag ? " (dag)" : "",
- r_i.optional ? " (optional)" : "");
- DEBUG_PRINTF(" enters:");
- for (u32 i = 0; i < r_i.enters.size(); i++) {
+
+ for (const auto &e : be) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ if (is_special(u, g) || is_special(v, g)) {
+ assert(is_special(u, g) && is_special(v, g));
+ continue;
+ }
+ u32 r = regions.at(v);
+ assert(regions.at(u) == r);
+ info[r].dag = false;
+ }
+
+ if (include_region_0) {
+ info[0].dag = false;
+ }
+
+ #ifdef DEBUG
+ for (const auto &m : info) {
+ u32 r = m.first;
+ const region_info &r_i = m.second;
+ DEBUG_PRINTF("region %u:%s%s\n", r,
+ r_i.dag ? " (dag)" : "",
+ r_i.optional ? " (optional)" : "");
+ DEBUG_PRINTF(" enters:");
+ for (u32 i = 0; i < r_i.enters.size(); i++) {
printf(" %zu", g[r_i.enters[i]].index);
- }
- printf("\n");
- DEBUG_PRINTF(" exits:");
- for (u32 i = 0; i < r_i.exits.size(); i++) {
+ }
+ printf("\n");
+ DEBUG_PRINTF(" exits:");
+ for (u32 i = 0; i < r_i.exits.size(); i++) {
printf(" %zu", g[r_i.exits[i]].index);
- }
- printf("\n");
- DEBUG_PRINTF(" all:");
- for (u32 i = 0; i < r_i.full.size(); i++) {
+ }
+ printf("\n");
+ DEBUG_PRINTF(" all:");
+ for (u32 i = 0; i < r_i.full.size(); i++) {
printf(" %zu", g[r_i.full[i]].index);
- }
- printf("\n");
- }
- #endif
-}
-
-static
-bool validateXSL(const NGHolder &g,
+ }
+ printf("\n");
+ }
+ #endif
+}
+
+static
+bool validateXSL(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const CharReach &escapes, u32 *bad_region) {
- /* need to check that the escapes escape all of the graph past region */
- u32 first_bad_region = ~0U;
- for (auto v : vertices_range(g)) {
- u32 v_region = regions.at(v);
- if (!is_special(v, g) && v_region > region &&
- (escapes & g[v].char_reach).any()) {
+ const u32 region, const CharReach &escapes, u32 *bad_region) {
+ /* need to check that the escapes escape all of the graph past region */
+ u32 first_bad_region = ~0U;
+ for (auto v : vertices_range(g)) {
+ u32 v_region = regions.at(v);
+ if (!is_special(v, g) && v_region > region &&
+ (escapes & g[v].char_reach).any()) {
DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index);
- first_bad_region = MIN(first_bad_region, v_region);
- }
- }
-
- if (first_bad_region != ~0U) {
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-static
-bool validateEXSL(const NGHolder &g,
+ first_bad_region = MIN(first_bad_region, v_region);
+ }
+ }
+
+ if (first_bad_region != ~0U) {
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool validateEXSL(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const CharReach &escapes,
- const NGHolder &prefix, u32 *bad_region) {
- /* EXSL: To be a valid EXSL with escapes e, we require that all states
- * go dead after /[e][^e]*{subsequent prefix match}/.
- */
-
- /* TODO: this is overly conservative as it allow partial matches from the
- * prefix to be considered even when the tail has processed some [^e] */
-
- u32 first_bad_region = ~0U;
- const vector<CharReach> escapes_vec(1, escapes);
- const vector<CharReach> notescapes_vec(1, ~escapes);
-
+ const u32 region, const CharReach &escapes,
+ const NGHolder &prefix, u32 *bad_region) {
+ /* EXSL: To be a valid EXSL with escapes e, we require that all states
+ * go dead after /[e][^e]*{subsequent prefix match}/.
+ */
+
+ /* TODO: this is overly conservative as it allow partial matches from the
+ * prefix to be considered even when the tail has processed some [^e] */
+
+ u32 first_bad_region = ~0U;
+ const vector<CharReach> escapes_vec(1, escapes);
+ const vector<CharReach> notescapes_vec(1, ~escapes);
+
flat_set<NFAVertex> states;
- /* turn on all states past the prefix */
- DEBUG_PRINTF("region %u is cutover\n", region);
- for (auto v : vertices_range(g)) {
- if (!is_special(v, g) && regions.at(v) > region) {
- states.insert(v);
- }
- }
-
- /* process the escapes */
- states = execute_graph(g, escapes_vec, states);
-
- /* flood with any number of not escapes */
+ /* turn on all states past the prefix */
+ DEBUG_PRINTF("region %u is cutover\n", region);
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g) && regions.at(v) > region) {
+ states.insert(v);
+ }
+ }
+
+ /* process the escapes */
+ states = execute_graph(g, escapes_vec, states);
+
+ /* flood with any number of not escapes */
flat_set<NFAVertex> prev_states;
- while (prev_states != states) {
- prev_states = states;
- states = execute_graph(g, notescapes_vec, states);
- insert(&states, prev_states);
- }
-
- /* find input starts to use for when we are running the prefix through as
- * when the escape character arrives we may be in matching the prefix
- * already */
+ while (prev_states != states) {
+ prev_states = states;
+ states = execute_graph(g, notescapes_vec, states);
+ insert(&states, prev_states);
+ }
+
+ /* find input starts to use for when we are running the prefix through as
+ * when the escape character arrives we may be in matching the prefix
+ * already */
flat_set<NFAVertex> prefix_start_states;
- for (auto v : vertices_range(prefix)) {
- if (v != prefix.accept && v != prefix.acceptEod
- /* and as we have already made it past the prefix once */
- && v != prefix.start) {
- prefix_start_states.insert(v);
- }
- }
-
- prefix_start_states =
- execute_graph(prefix, escapes_vec, prefix_start_states);
-
- assert(contains(prefix_start_states, prefix.startDs));
- /* see what happens after we feed it the prefix */
- states = execute_graph(g, prefix, prefix_start_states, states);
-
- for (auto v : states) {
- assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
- * on */
- DEBUG_PRINTF("state still active\n");
- first_bad_region = MIN(first_bad_region, regions.at(v));
- }
-
- if (first_bad_region != ~0U) {
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-static
-bool isPossibleLock(const NGHolder &g,
- map<u32, region_info>::const_iterator region,
- const map<u32, region_info> &info,
- CharReach *escapes_out) {
- /* TODO: we could also check for self-loops on curr region */
-
- /* TODO: some straw-walking logic. lowish priority has we know there can
- * only be optional regions between us and the cyclic */
-
- assert(region != info.end());
- map<u32, region_info>::const_iterator next_region = region;
- ++next_region;
- if (next_region == info.end()) {
- assert(0); /* odd */
- return false;
- }
-
- const region_info &next_info = next_region->second;
- if (next_info.enters.empty()) {
- assert(0); /* odd */
- return false;
- }
-
- if (next_info.full.size() == 1 && !next_info.dag) {
- *escapes_out = ~g[next_info.full.front()].char_reach;
- return true;
- }
-
- return false;
-}
-
-static
-unique_ptr<NGHolder>
+ for (auto v : vertices_range(prefix)) {
+ if (v != prefix.accept && v != prefix.acceptEod
+ /* and as we have already made it past the prefix once */
+ && v != prefix.start) {
+ prefix_start_states.insert(v);
+ }
+ }
+
+ prefix_start_states =
+ execute_graph(prefix, escapes_vec, prefix_start_states);
+
+ assert(contains(prefix_start_states, prefix.startDs));
+ /* see what happens after we feed it the prefix */
+ states = execute_graph(g, prefix, prefix_start_states, states);
+
+ for (auto v : states) {
+ assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
+ * on */
+ DEBUG_PRINTF("state still active\n");
+ first_bad_region = MIN(first_bad_region, regions.at(v));
+ }
+
+ if (first_bad_region != ~0U) {
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool isPossibleLock(const NGHolder &g,
+ map<u32, region_info>::const_iterator region,
+ const map<u32, region_info> &info,
+ CharReach *escapes_out) {
+ /* TODO: we could also check for self-loops on curr region */
+
+ /* TODO: some straw-walking logic. lowish priority has we know there can
+ * only be optional regions between us and the cyclic */
+
+ assert(region != info.end());
+ map<u32, region_info>::const_iterator next_region = region;
+ ++next_region;
+ if (next_region == info.end()) {
+ assert(0); /* odd */
+ return false;
+ }
+
+ const region_info &next_info = next_region->second;
+ if (next_info.enters.empty()) {
+ assert(0); /* odd */
+ return false;
+ }
+
+ if (next_info.full.size() == 1 && !next_info.dag) {
+ *escapes_out = ~g[next_info.full.front()].char_reach;
+ return true;
+ }
+
+ return false;
+}
+
+static
+unique_ptr<NGHolder>
makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> &regions,
- const region_info &curr, const region_info &next,
- bool renumber = true) {
- const vector<NFAVertex> &curr_exits = curr.exits;
- const vector<NFAVertex> &next_enters = next.enters;
-
- assert(!next_enters.empty());
- assert(!curr_exits.empty());
-
- unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>();
- NGHolder &prefix = *prefix_ptr;
-
- deque<NFAVertex> lhs_verts;
- insert(&lhs_verts, lhs_verts.end(), vertices(g));
-
+ const region_info &curr, const region_info &next,
+ bool renumber = true) {
+ const vector<NFAVertex> &curr_exits = curr.exits;
+ const vector<NFAVertex> &next_enters = next.enters;
+
+ assert(!next_enters.empty());
+ assert(!curr_exits.empty());
+
+ unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>();
+ NGHolder &prefix = *prefix_ptr;
+
+ deque<NFAVertex> lhs_verts;
+ insert(&lhs_verts, lhs_verts.end(), vertices(g));
+
unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix
- fillHolder(&prefix, g, lhs_verts, &lhs_map);
- prefix.kind = NFA_OUTFIX;
-
- // We need a reverse mapping to track regions.
+ fillHolder(&prefix, g, lhs_verts, &lhs_map);
+ prefix.kind = NFA_OUTFIX;
+
+ // We need a reverse mapping to track regions.
unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g
- for (const auto &e : lhs_map) {
- rev_map.emplace(e.second, e.first);
- }
-
- clear_in_edges(prefix.accept, prefix);
- clear_in_edges(prefix.acceptEod, prefix);
- add_edge(prefix.accept, prefix.acceptEod, prefix);
-
- assert(!next_enters.empty());
+ for (const auto &e : lhs_map) {
+ rev_map.emplace(e.second, e.first);
+ }
+
+ clear_in_edges(prefix.accept, prefix);
+ clear_in_edges(prefix.acceptEod, prefix);
+ add_edge(prefix.accept, prefix.acceptEod, prefix);
+
+ assert(!next_enters.empty());
assert(next_enters.front() != NGHolder::null_vertex());
- u32 dead_region = regions.at(next_enters.front());
- DEBUG_PRINTF("curr_region %u, dead_region %u\n",
- regions.at(curr_exits.front()), dead_region);
- for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
- if (regions.at(v) >= dead_region) {
- continue;
- }
- /* add edge to new accepts */
- NFAVertex p_v = lhs_map[v];
- add_edge(p_v, prefix.accept, prefix);
- }
-
- assert(in_degree(prefix.accept, prefix) != 0);
-
- /* prune everything past the picked region */
- vector<NFAVertex> to_clear;
- assert(contains(lhs_map, curr_exits.front()));
- NFAVertex p_u = lhs_map[curr_exits.front()];
+ u32 dead_region = regions.at(next_enters.front());
+ DEBUG_PRINTF("curr_region %u, dead_region %u\n",
+ regions.at(curr_exits.front()), dead_region);
+ for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
+ if (regions.at(v) >= dead_region) {
+ continue;
+ }
+ /* add edge to new accepts */
+ NFAVertex p_v = lhs_map[v];
+ add_edge(p_v, prefix.accept, prefix);
+ }
+
+ assert(in_degree(prefix.accept, prefix) != 0);
+
+ /* prune everything past the picked region */
+ vector<NFAVertex> to_clear;
+ assert(contains(lhs_map, curr_exits.front()));
+ NFAVertex p_u = lhs_map[curr_exits.front()];
DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index);
- for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
- auto v = rev_map.at(p_v);
- if (p_v == prefix.accept || regions.at(v) < dead_region) {
- continue;
- }
- to_clear.push_back(p_v);
- }
-
- for (auto v : to_clear) {
+ for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
+ auto v = rev_map.at(p_v);
+ if (p_v == prefix.accept || regions.at(v) < dead_region) {
+ continue;
+ }
+ to_clear.push_back(p_v);
+ }
+
+ for (auto v : to_clear) {
DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index);
- clear_in_edges(v, prefix);
- }
-
- pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
- depth map valid */);
-
- assert(num_vertices(prefix) > N_SPECIALS);
- return prefix_ptr;
-}
-
-static
-void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
- const u32 temp_slot = UINT32_MAX;
- /* update the som slot on the prefix report */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- if (ir.onmatch != temp_slot) {
- continue;
- }
- ir.onmatch = real_slot;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
+ clear_in_edges(v, prefix);
+ }
+
+ pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
+ depth map valid */);
+
+ assert(num_vertices(prefix) > N_SPECIALS);
+ return prefix_ptr;
+}
+
+static
+void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
+ const u32 temp_slot = UINT32_MAX;
+ /* update the som slot on the prefix report */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ if (ir.onmatch != temp_slot) {
+ continue;
+ }
+ ir.onmatch = real_slot;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type,
u32 som_loc, const vector<DepthMinMax> &depths,
bool prefix_by_rev) {
- Report ir = makeCallback(0U, 0);
- ir.type = ir_type;
- ir.onmatch = som_loc;
-
- /* add report for storing in som location on new accepts */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (prefix_by_rev) {
- ir.somDistance = MO_INVALID_IDX; /* will be populated properly
- * later */
- } else {
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- ir.somDistance = d.max;
- }
- ReportID rep = rm.getInternalId(ir);
-
- auto &reports = g[v].reports;
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
+ Report ir = makeCallback(0U, 0);
+ ir.type = ir_type;
+ ir.onmatch = som_loc;
+
+ /* add report for storing in som location on new accepts */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (prefix_by_rev) {
+ ir.somDistance = MO_INVALID_IDX; /* will be populated properly
+ * later */
+ } else {
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ ir.somDistance = d.max;
+ }
+ ReportID rep = rm.getInternalId(ir);
+
+ auto &reports = g[v].reports;
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) {
- /* update the som action on the prefix report */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- ir.type = ir_type;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
- u32 rev_comp_id) {
- /* update the action on the prefix report, to refer to a reverse nfa,
- * report type is also adjusted. */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- switch (ir.type) {
- case INTERNAL_SOM_LOC_SET:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
- break;
- case INTERNAL_SOM_LOC_SET_IF_UNSET:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
- break;
- case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
- break;
- default:
- assert(0);
- break;
- }
-
- ir.revNfaIndex = rev_comp_id;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-void setMidfixReports(ReportManager &rm, const som_plan &item,
- const u32 som_slot_in, const u32 som_slot_out) {
- assert(item.prefix);
- NGHolder &g = *item.prefix;
-
- Report ir = makeCallback(0U, 0);
- ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
- : INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
- ir.onmatch = som_slot_out;
- ir.somDistance = som_slot_in;
- ReportID rep = rm.getInternalId(ir);
-
- /* add report for storing in som location on new accepts */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-bool finalRegion(const NGHolder &g,
+ /* update the som action on the prefix report */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ ir.type = ir_type;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
+ u32 rev_comp_id) {
+ /* update the action on the prefix report, to refer to a reverse nfa,
+ * report type is also adjusted. */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ switch (ir.type) {
+ case INTERNAL_SOM_LOC_SET:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
+ break;
+ case INTERNAL_SOM_LOC_SET_IF_UNSET:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
+ break;
+ case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ir.revNfaIndex = rev_comp_id;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+void setMidfixReports(ReportManager &rm, const som_plan &item,
+ const u32 som_slot_in, const u32 som_slot_out) {
+ assert(item.prefix);
+ NGHolder &g = *item.prefix;
+
+ Report ir = makeCallback(0U, 0);
+ ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
+ : INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
+ ir.onmatch = som_slot_out;
+ ir.somDistance = som_slot_in;
+ ReportID rep = rm.getInternalId(ir);
+
+ /* add report for storing in som location on new accepts */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+bool finalRegion(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- NFAVertex v) {
- u32 region = regions.at(v);
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
+ NFAVertex v) {
+ u32 region = regions.at(v);
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
NFAVertex v, ReportType ir_type,
u64a param) {
- assert(!g[v].reports.empty());
-
- flat_set<ReportID> r_new;
-
- for (const ReportID &report_id : g[v].reports) {
- Report ir = rm.getReport(report_id);
-
- if (ir.type != EXTERNAL_CALLBACK) {
- /* we must have already done whatever magic we needed to do to this
- * report */
- r_new.insert(report_id);
- continue;
- }
-
- ir.type = ir_type;
- ir.somDistance = param;
- ReportID rep = rm.getInternalId(ir);
-
+ assert(!g[v].reports.empty());
+
+ flat_set<ReportID> r_new;
+
+ for (const ReportID &report_id : g[v].reports) {
+ Report ir = rm.getReport(report_id);
+
+ if (ir.type != EXTERNAL_CALLBACK) {
+ /* we must have already done whatever magic we needed to do to this
+ * report */
+ r_new.insert(report_id);
+ continue;
+ }
+
+ ir.type = ir_type;
+ ir.somDistance = param;
+ ReportID rep = rm.getInternalId(ir);
+
DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n",
- g[v].index, report_id, rep, ir_type);
- r_new.insert(rep);
- }
- g[v].reports = r_new;
-}
-
-/* updates the reports on all vertices leading to the sink */
-static
-void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
- const vector<DepthMinMax> &depths) {
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
-
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
- d.min);
- }
-}
-
-/* updates the reports on all the provided vertices */
-static
-void makeSomRelReports(ReportManager &rm, NGHolder &g,
- const vector<NFAVertex> &to_update,
- const vector<DepthMinMax> &depths) {
- for (auto v : to_update) {
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
- d.min);
- }
-}
-
-static
-void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
- 0);
- }
-}
-
-static
-void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
-}
-
-static
-void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
- const vector<NFAVertex> &to_update,
- u32 som_loc) {
- for (auto v : to_update) {
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
-}
-
-static
-bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
- u32 som_loc) {
- ReportManager &rm = ng.rm;
-
- /* escaper = /prefix[^escapes]*[escapes]/ */
- DEBUG_PRINTF("creating escaper for %u\n", som_loc);
- NGHolder h;
- cloneHolder(h, prefix);
- assert(h.kind == NFA_OUTFIX);
-
- NFAVertex u = add_vertex(h);
- h[u].char_reach = ~escapes;
-
- NFAVertex v = add_vertex(h);
- h[v].char_reach = escapes;
-
- for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
- add_edge(w, u, h);
- add_edge(w, v, h);
- h[w].reports.clear();
- }
-
- clear_in_edges(h.accept, h);
-
- add_edge(u, v, h);
- add_edge(u, u, h);
- add_edge(v, h.accept, h);
-
- Report ir = makeCallback(0U, 0);
- ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
- ir.onmatch = som_loc;
- h[v].reports.insert(rm.getInternalId(ir));
- return ng.addHolder(h);
-}
-
-static
-void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked) {
- /* NOTE: This is appropriate for firstMatchIsFirst */
- DEBUG_PRINTF("prepping for lock check\n");
-
- NGHolder &midfix = *out;
-
- map<NFAVertex, NFAVertex> v_map;
- v_map[g.start] = midfix.start;
- v_map[g.startDs] = midfix.startDs;
-
- /* include the lock region */
+ g[v].index, report_id, rep, ir_type);
+ r_new.insert(rep);
+ }
+ g[v].reports = r_new;
+}
+
+/* updates the reports on all vertices leading to the sink */
+static
+void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
+ const vector<DepthMinMax> &depths) {
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
+ d.min);
+ }
+}
+
+/* updates the reports on all the provided vertices */
+static
+void makeSomRelReports(ReportManager &rm, NGHolder &g,
+ const vector<NFAVertex> &to_update,
+ const vector<DepthMinMax> &depths) {
+ for (auto v : to_update) {
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
+ d.min);
+ }
+}
+
+static
+void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
+ 0);
+ }
+}
+
+static
+void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+}
+
+static
+void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
+ const vector<NFAVertex> &to_update,
+ u32 som_loc) {
+ for (auto v : to_update) {
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+}
+
+static
+bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
+ u32 som_loc) {
+ ReportManager &rm = ng.rm;
+
+ /* escaper = /prefix[^escapes]*[escapes]/ */
+ DEBUG_PRINTF("creating escaper for %u\n", som_loc);
+ NGHolder h;
+ cloneHolder(h, prefix);
+ assert(h.kind == NFA_OUTFIX);
+
+ NFAVertex u = add_vertex(h);
+ h[u].char_reach = ~escapes;
+
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = escapes;
+
+ for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
+ add_edge(w, u, h);
+ add_edge(w, v, h);
+ h[w].reports.clear();
+ }
+
+ clear_in_edges(h.accept, h);
+
+ add_edge(u, v, h);
+ add_edge(u, u, h);
+ add_edge(v, h.accept, h);
+
+ Report ir = makeCallback(0U, 0);
+ ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
+ ir.onmatch = som_loc;
+ h[v].reports.insert(rm.getInternalId(ir));
+ return ng.addHolder(h);
+}
+
+static
+void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked) {
+ /* NOTE: This is appropriate for firstMatchIsFirst */
+ DEBUG_PRINTF("prepping for lock check\n");
+
+ NGHolder &midfix = *out;
+
+ map<NFAVertex, NFAVertex> v_map;
+ v_map[g.start] = midfix.start;
+ v_map[g.startDs] = midfix.startDs;
+
+ /* include the lock region */
assert(picked != info.end());
auto graph_last = next(picked);
-
+
assert(!graph_last->second.dag);
assert(graph_last->second.full.size() == 1);
for (auto jt = graph_last; ; --jt) {
- DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
-
- /* add all vertices in region, create mapping */
- for (auto v : jt->second.full) {
+ DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
+
+ /* add all vertices in region, create mapping */
+ for (auto v : jt->second.full) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- if (contains(v_map, v)) {
- continue;
- }
-
- /* treat all virtual starts as happening anywhere, so that the
- * virtual start is not counted as part of the SoM */
- if (is_virtual_start(v, g)) {
- v_map[v] = midfix.startDs;
- continue;
- }
-
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.full) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
-
+ if (contains(v_map, v)) {
+ continue;
+ }
+
+ /* treat all virtual starts as happening anywhere, so that the
+ * virtual start is not counted as part of the SoM */
+ if (is_virtual_start(v, g)) {
+ v_map[v] = midfix.startDs;
+ continue;
+ }
+
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.full) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+
if (jt == info.begin()) {
break;
}
@@ -750,1671 +750,1671 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
/* add edges from startds to the enters of all the initial optional
* regions and the first mandatory region. */
for (auto jt = info.begin(); ; ++jt) {
- for (auto enter : jt->second.enters) {
- assert(contains(v_map, enter));
- NFAVertex v = v_map[enter];
- add_edge_if_not_present(midfix.startDs, v, midfix);
- }
-
+ for (auto enter : jt->second.enters) {
+ assert(contains(v_map, enter));
+ NFAVertex v = v_map[enter];
+ add_edge_if_not_present(midfix.startDs, v, midfix);
+ }
+
if (!jt->second.optional) {
- break;
- }
+ break;
+ }
if (jt == graph_last) {
/* all regions are optional - add a direct edge to accept */
add_edge_if_not_present(midfix.startDs, midfix.accept, midfix);
break;
}
- }
-
- assert(in_degree(midfix.accept, midfix));
+ }
+
+ assert(in_degree(midfix.accept, midfix));
renumber_vertices(midfix);
-}
-
-static
-void fillRoughMidfix(NGHolder *out, const NGHolder &g,
+}
+
+static
+void fillRoughMidfix(NGHolder *out, const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked) {
- /* as we are not the first prefix, we are probably not acyclic. We need to
- * generate an acyclic holder to acts a fake prefix to sentClearsTail.
- * This will result in a more conservative estimate. */
- /* NOTE: This is not appropriate for firstMatchIsFirst */
- NGHolder &midfix = *out;
- add_edge(midfix.startDs, midfix.accept, midfix);
-
- map<NFAVertex, NFAVertex> v_map;
-
- map<u32, region_info>::const_iterator jt = picked;
- for (; jt->second.dag; --jt) {
- DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
- if (!jt->second.optional) {
- clear_out_edges(midfix.startDs, midfix);
- add_edge(midfix.startDs, midfix.startDs, midfix);
- }
-
- /* add all vertices in region, create mapping */
- for (auto v : jt->second.full) {
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked) {
+ /* as we are not the first prefix, we are probably not acyclic. We need to
+ * generate an acyclic holder to acts a fake prefix to sentClearsTail.
+ * This will result in a more conservative estimate. */
+ /* NOTE: This is not appropriate for firstMatchIsFirst */
+ NGHolder &midfix = *out;
+ add_edge(midfix.startDs, midfix.accept, midfix);
+
+ map<NFAVertex, NFAVertex> v_map;
+
+ map<u32, region_info>::const_iterator jt = picked;
+ for (; jt->second.dag; --jt) {
+ DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
+ if (!jt->second.optional) {
+ clear_out_edges(midfix.startDs, midfix);
+ add_edge(midfix.startDs, midfix.startDs, midfix);
+ }
+
+ /* add all vertices in region, create mapping */
+ for (auto v : jt->second.full) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.full) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod) {
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
-
- /* add edges from startds to enters */
- for (auto enter : jt->second.enters) {
- assert(contains(v_map, enter));
- NFAVertex v = v_map[enter];
- add_edge(midfix.startDs, v, midfix);
- }
-
- if (jt == info.begin()) {
- break;
- }
- }
-
- /* we can include the exits of the regions leading in */
- if (!jt->second.dag) {
- u32 first_early_region = jt->first;
- clear_out_edges(midfix.startDs, midfix);
- add_edge(midfix.startDs, midfix.startDs, midfix);
-
- do {
- for (auto v : jt->second.exits) {
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.full) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+
+ /* add edges from startds to enters */
+ for (auto enter : jt->second.enters) {
+ assert(contains(v_map, enter));
+ NFAVertex v = v_map[enter];
+ add_edge(midfix.startDs, v, midfix);
+ }
+
+ if (jt == info.begin()) {
+ break;
+ }
+ }
+
+ /* we can include the exits of the regions leading in */
+ if (!jt->second.dag) {
+ u32 first_early_region = jt->first;
+ clear_out_edges(midfix.startDs, midfix);
+ add_edge(midfix.startDs, midfix.startDs, midfix);
+
+ do {
+ for (auto v : jt->second.exits) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
-
- /* add edges from startds to new vertices */
- add_edge(midfix.startDs, vnew, midfix);
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.exits) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod
- || regions.at(w) <= first_early_region) {
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
- } while (jt->second.optional && jt != info.begin() && (jt--)->first);
-
- if (jt->second.optional) {
- assert(!jt->second.exits.empty());
- NFAVertex v = v_map[jt->second.exits.front()];
- for (auto w : adjacent_vertices_range(v, midfix)) {
- add_edge(midfix.startDs, w, midfix);
- }
- }
- }
-}
-
-static
-bool beginsWithDotStar(const NGHolder &g) {
- bool hasDot = false;
-
- // We can ignore the successors of start, as matches that begin there will
- // necessarily have a SOM of 0.
-
- set<NFAVertex> succ;
- insert(&succ, adjacent_vertices(g.startDs, g));
- succ.erase(g.startDs);
-
- for (auto v : succ) {
- // We want 'dot' states that aren't virtual starts.
- if (g[v].char_reach.all() &&
- !g[v].assert_flags) {
- hasDot = true;
- set<NFAVertex> dotsucc;
- insert(&dotsucc, adjacent_vertices(v, g));
- if (dotsucc != succ) {
- DEBUG_PRINTF("failed dot-star succ check\n");
- return false;
- }
- }
- }
-
- if (hasDot) {
- DEBUG_PRINTF("begins with dot-star\n");
- }
- return hasDot;
-}
-
-static
-bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
- const u32 som_slot_out) {
- assert(item.prefix);
- assert(hasCorrectlyNumberedVertices(*item.prefix));
-
- /* setup escaper for second som_location if required */
- if (item.escapes.any()) {
- if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
- return false;
- }
- }
-
- /* ensure we copy som from prev loc */
- setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
-
- /* add second prefix/1st midfix */
- if (!ng.addHolder(*item.prefix)) {
- DEBUG_PRINTF("---addHolder failed---\n");
- return false;
- }
-
- return true;
-}
-
-static
-bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
- map<u32, region_info>::const_iterator b) {
- while (b != a) {
- if (!b->second.optional) {
- return true;
- }
- --b;
- }
-
- return false;
-}
-
-// Attempts to advance the current plan. Returns true if we advance to the end
-// (woot!); updates picked, plan and bad_region.
-static
-bool advancePlan(const NGHolder &g,
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+
+ /* add edges from startds to new vertices */
+ add_edge(midfix.startDs, vnew, midfix);
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.exits) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod
+ || regions.at(w) <= first_early_region) {
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+ } while (jt->second.optional && jt != info.begin() && (jt--)->first);
+
+ if (jt->second.optional) {
+ assert(!jt->second.exits.empty());
+ NFAVertex v = v_map[jt->second.exits.front()];
+ for (auto w : adjacent_vertices_range(v, midfix)) {
+ add_edge(midfix.startDs, w, midfix);
+ }
+ }
+ }
+}
+
+static
+bool beginsWithDotStar(const NGHolder &g) {
+ bool hasDot = false;
+
+ // We can ignore the successors of start, as matches that begin there will
+ // necessarily have a SOM of 0.
+
+ set<NFAVertex> succ;
+ insert(&succ, adjacent_vertices(g.startDs, g));
+ succ.erase(g.startDs);
+
+ for (auto v : succ) {
+ // We want 'dot' states that aren't virtual starts.
+ if (g[v].char_reach.all() &&
+ !g[v].assert_flags) {
+ hasDot = true;
+ set<NFAVertex> dotsucc;
+ insert(&dotsucc, adjacent_vertices(v, g));
+ if (dotsucc != succ) {
+ DEBUG_PRINTF("failed dot-star succ check\n");
+ return false;
+ }
+ }
+ }
+
+ if (hasDot) {
+ DEBUG_PRINTF("begins with dot-star\n");
+ }
+ return hasDot;
+}
+
+static
+bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
+ const u32 som_slot_out) {
+ assert(item.prefix);
+ assert(hasCorrectlyNumberedVertices(*item.prefix));
+
+ /* setup escaper for second som_location if required */
+ if (item.escapes.any()) {
+ if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
+ return false;
+ }
+ }
+
+ /* ensure we copy som from prev loc */
+ setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
+
+ /* add second prefix/1st midfix */
+ if (!ng.addHolder(*item.prefix)) {
+ DEBUG_PRINTF("---addHolder failed---\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
+ map<u32, region_info>::const_iterator b) {
+ while (b != a) {
+ if (!b->second.optional) {
+ return true;
+ }
+ --b;
+ }
+
+ return false;
+}
+
+// Attempts to advance the current plan. Returns true if we advance to the end
+// (woot!); updates picked, plan and bad_region.
+static
+bool advancePlan(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const NGHolder &prefix, bool stuck,
- map<u32, region_info>::const_iterator &picked,
- const map<u32, region_info>::const_iterator furthest,
- const map<u32, region_info>::const_iterator furthest_lock,
- const CharReach &next_escapes, som_plan &plan,
- u32 *bad_region) {
- u32 bad_region_r = 0;
- u32 bad_region_x = 0;
- u32 bad_region_e = 0;
- DEBUG_PRINTF("curr %u\n", picked->first);
-
- if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
- plan.is_reset = true;
- picked = furthest;
- DEBUG_PRINTF("Prefix clears tail, woot!\n");
- return true;
- } else {
- DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
- }
-
- if (stuck) {
- u32 to_region = furthest_lock->first;
- if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
- DEBUG_PRINTF("XSL\n");
- picked = furthest_lock;
- plan.escapes = next_escapes;
- return true;
- } else {
- DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
- }
-
- if (validateEXSL(g, regions, to_region, next_escapes, prefix,
- &bad_region_e)) {
- DEBUG_PRINTF("EXSL\n");
- picked = furthest_lock;
- plan.escapes = next_escapes;
- return true;
- } else {
- DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
- }
- } else {
- DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
- }
-
- assert(!plan.is_reset);
-
- *bad_region = max(bad_region_x, bad_region_e);
- if (bad_region_r >= *bad_region) {
- *bad_region = bad_region_r;
- plan.is_reset = true;
- plan.escapes.clear();
- picked = furthest;
- } else {
- picked = furthest_lock;
- plan.escapes = next_escapes;
- }
-
- DEBUG_PRINTF("first bad region now %u\n", *bad_region);
- return false;
-}
-
-static
-bool addPlan(vector<som_plan> &plan, u32 parent) {
- DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
- parent);
-
- if (plan.size() >= MAX_SOM_PLANS) {
- DEBUG_PRINTF("too many plans!\n");
- return false;
- }
-
- plan.emplace_back(nullptr, CharReach(), false, parent);
- return true;
-}
-
-// Fetches all preds of {accept, acceptEod} for this graph.
-static
-void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
+ const NGHolder &prefix, bool stuck,
+ map<u32, region_info>::const_iterator &picked,
+ const map<u32, region_info>::const_iterator furthest,
+ const map<u32, region_info>::const_iterator furthest_lock,
+ const CharReach &next_escapes, som_plan &plan,
+ u32 *bad_region) {
+ u32 bad_region_r = 0;
+ u32 bad_region_x = 0;
+ u32 bad_region_e = 0;
+ DEBUG_PRINTF("curr %u\n", picked->first);
+
+ if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
+ plan.is_reset = true;
+ picked = furthest;
+ DEBUG_PRINTF("Prefix clears tail, woot!\n");
+ return true;
+ } else {
+ DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
+ }
+
+ if (stuck) {
+ u32 to_region = furthest_lock->first;
+ if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
+ DEBUG_PRINTF("XSL\n");
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ return true;
+ } else {
+ DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
+ }
+
+ if (validateEXSL(g, regions, to_region, next_escapes, prefix,
+ &bad_region_e)) {
+ DEBUG_PRINTF("EXSL\n");
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ return true;
+ } else {
+ DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
+ }
+ } else {
+ DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
+ }
+
+ assert(!plan.is_reset);
+
+ *bad_region = max(bad_region_x, bad_region_e);
+ if (bad_region_r >= *bad_region) {
+ *bad_region = bad_region_r;
+ plan.is_reset = true;
+ plan.escapes.clear();
+ picked = furthest;
+ } else {
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ }
+
+ DEBUG_PRINTF("first bad region now %u\n", *bad_region);
+ return false;
+}
+
+static
+bool addPlan(vector<som_plan> &plan, u32 parent) {
+ DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
+ parent);
+
+ if (plan.size() >= MAX_SOM_PLANS) {
+ DEBUG_PRINTF("too many plans!\n");
+ return false;
+ }
+
+ plan.emplace_back(nullptr, CharReach(), false, parent);
+ return true;
+}
+
+// Fetches all preds of {accept, acceptEod} for this graph.
+static
+void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
set<NFAVertex> tmp;
- insert(&tmp, inv_adjacent_vertices(g.accept, g));
- insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
- tmp.erase(g.accept);
-
-#ifdef DEBUG
- DEBUG_PRINTF("add reporters:");
- for (UNUSED auto v : tmp) {
+ insert(&tmp, inv_adjacent_vertices(g.accept, g));
+ insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
+ tmp.erase(g.accept);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("add reporters:");
+ for (UNUSED auto v : tmp) {
printf(" %zu", g[v].index);
- }
- printf("\n");
-#endif
-
- reporters.insert(reporters.end(), tmp.begin(), tmp.end());
-}
-
-// Fetches all preds of {accept, acceptEod} in this region.
-static
-void addReporterVertices(const region_info &r, const NGHolder &g,
- vector<NFAVertex> &reporters) {
- for (auto v : r.exits) {
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ }
+ printf("\n");
+#endif
+
+ reporters.insert(reporters.end(), tmp.begin(), tmp.end());
+}
+
+// Fetches all preds of {accept, acceptEod} in this region.
+static
+void addReporterVertices(const region_info &r, const NGHolder &g,
+ vector<NFAVertex> &reporters) {
+ for (auto v : r.exits) {
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
DEBUG_PRINTF("add reporter %zu\n", g[v].index);
- reporters.push_back(v);
- }
- }
-}
-
-// Fetches the mappings of all preds of {accept, acceptEod} in this region.
-static
-void addMappedReporterVertices(const region_info &r, const NGHolder &g,
+ reporters.push_back(v);
+ }
+ }
+}
+
+// Fetches the mappings of all preds of {accept, acceptEod} in this region.
+static
+void addMappedReporterVertices(const region_info &r, const NGHolder &g,
const unordered_map<NFAVertex, NFAVertex> &mapping,
- vector<NFAVertex> &reporters) {
- for (auto v : r.exits) {
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ vector<NFAVertex> &reporters) {
+ for (auto v : r.exits) {
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
DEBUG_PRINTF("adding v=%zu\n", g[v].index);
auto it = mapping.find(v);
- assert(it != mapping.end());
- reporters.push_back(it->second);
- }
- }
-}
-
-// Clone a version of the graph, but only including the in-edges of `enter'
-// from earlier regions.
-static
-void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
+ assert(it != mapping.end());
+ reporters.push_back(it->second);
+ }
+ }
+}
+
+// Clone a version of the graph, but only including the in-edges of `enter'
+// from earlier regions.
+static
+void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- NFAVertex entry, const vector<NFAVertex> &enters,
+ NFAVertex entry, const vector<NFAVertex> &enters,
unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
- orig_to_copy.clear();
- cloneHolder(out, g, &orig_to_copy);
-
- assert(contains(orig_to_copy, entry));
- const u32 region = regions.at(entry);
-
- for (auto v : enters) {
- if (v == entry) {
- continue;
- }
- assert(contains(orig_to_copy, v));
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (regions.at(u) < region) {
- assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
- remove_edge(orig_to_copy[u], orig_to_copy[v], out);
- }
- }
- }
-
- pruneUseless(out);
-}
-
-static
+ orig_to_copy.clear();
+ cloneHolder(out, g, &orig_to_copy);
+
+ assert(contains(orig_to_copy, entry));
+ const u32 region = regions.at(entry);
+
+ for (auto v : enters) {
+ if (v == entry) {
+ continue;
+ }
+ assert(contains(orig_to_copy, v));
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (regions.at(u) < region) {
+ assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
+ remove_edge(orig_to_copy[u], orig_to_copy[v], out);
+ }
+ }
+ }
+
+ pruneUseless(out);
+}
+
+static
void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> &regions,
- vector<NFAVertex> &enters) {
- assert(!enters.empty());
- const u32 split_region = regions.at(enters.front());
-
- vector<NFAVertex> new_enters;
-
- // Gather the list of vertices in the split region and subsequent regions.
- vector<NFAVertex> tail_vertices;
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || regions.at(v) < split_region) {
- continue;
- }
- tail_vertices.push_back(v);
- }
-
- for (auto enter : enters) {
+ vector<NFAVertex> &enters) {
+ assert(!enters.empty());
+ const u32 split_region = regions.at(enters.front());
+
+ vector<NFAVertex> new_enters;
+
+ // Gather the list of vertices in the split region and subsequent regions.
+ vector<NFAVertex> tail_vertices;
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || regions.at(v) < split_region) {
+ continue;
+ }
+ tail_vertices.push_back(v);
+ }
+
+ for (auto enter : enters) {
DEBUG_PRINTF("processing enter %zu\n", g[enter].index);
- map<NFAVertex, NFAVertex> orig_to_copy;
-
- // Make a copy of all of the tail vertices, storing region info along
- // the way.
- for (auto v : tail_vertices) {
- auto v2 = clone_vertex(g, v);
- orig_to_copy[v] = v2;
- regions[v2] = regions.at(v);
- }
-
- // Wire up the edges: edges from previous regions come from the
- // original vertices, while edges internal to and beyond the split
- // region go to the copies.
-
- for (const auto &m : orig_to_copy) {
- NFAVertex v = m.first, v2 = m.second;
-
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- u32 t_region = regions.at(t);
- if (t_region >= split_region && !is_special(t, g)) {
- assert(contains(orig_to_copy, t));
- t = orig_to_copy[t];
- }
- add_edge_if_not_present(v2, t, g[e], g);
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (regions.at(u) >= split_region && !is_special(u, g)) {
- assert(contains(orig_to_copy, u));
- u = orig_to_copy[u];
- }
- add_edge_if_not_present(u, v2, g[e], g);
- }
-
- }
-
- // Clear the in-edges from earlier regions of the OTHER enters for this
- // copy of the split region.
- for (auto v : enters) {
- if (v == enter) {
- continue;
- }
-
- remove_in_edge_if(orig_to_copy[v],
- [&](const NFAEdge &e) {
- NFAVertex u = source(e, g);
- return regions.at(u) < split_region;
+ map<NFAVertex, NFAVertex> orig_to_copy;
+
+ // Make a copy of all of the tail vertices, storing region info along
+ // the way.
+ for (auto v : tail_vertices) {
+ auto v2 = clone_vertex(g, v);
+ orig_to_copy[v] = v2;
+ regions[v2] = regions.at(v);
+ }
+
+ // Wire up the edges: edges from previous regions come from the
+ // original vertices, while edges internal to and beyond the split
+ // region go to the copies.
+
+ for (const auto &m : orig_to_copy) {
+ NFAVertex v = m.first, v2 = m.second;
+
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ u32 t_region = regions.at(t);
+ if (t_region >= split_region && !is_special(t, g)) {
+ assert(contains(orig_to_copy, t));
+ t = orig_to_copy[t];
+ }
+ add_edge_if_not_present(v2, t, g[e], g);
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (regions.at(u) >= split_region && !is_special(u, g)) {
+ assert(contains(orig_to_copy, u));
+ u = orig_to_copy[u];
+ }
+ add_edge_if_not_present(u, v2, g[e], g);
+ }
+
+ }
+
+ // Clear the in-edges from earlier regions of the OTHER enters for this
+ // copy of the split region.
+ for (auto v : enters) {
+ if (v == enter) {
+ continue;
+ }
+
+ remove_in_edge_if(orig_to_copy[v],
+ [&](const NFAEdge &e) {
+ NFAVertex u = source(e, g);
+ return regions.at(u) < split_region;
}, g);
- }
-
- new_enters.push_back(orig_to_copy[enter]);
- }
-
- // Remove the original set of tail vertices.
- remove_vertices(tail_vertices, g);
- pruneUseless(g);
- regions = assignRegions(g);
-
- enters.swap(new_enters);
-}
-
-static
-bool doTreePlanningIntl(NGHolder &g,
+ }
+
+ new_enters.push_back(orig_to_copy[enter]);
+ }
+
+ // Remove the original set of tail vertices.
+ remove_vertices(tail_vertices, g);
+ pruneUseless(g);
+ regions = assignRegions(g);
+
+ enters.swap(new_enters);
+}
+
+static
+bool doTreePlanningIntl(NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked, u32 bad_region,
- u32 parent_plan,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked, u32 bad_region,
+ u32 parent_plan,
const unordered_map<NFAVertex, NFAVertex> &copy_to_orig,
- vector<som_plan> &plan, const Grey &grey) {
- assert(picked != info.end());
-
- DEBUG_PRINTF("picked=%u\n", picked->first);
- DEBUG_PRINTF("parent is %u\n", parent_plan);
-
- map<u32, region_info>::const_iterator furthest;
-
- bool to_end = false;
- while (!to_end) {
- DEBUG_PRINTF("picked is %u\n", picked->first);
- DEBUG_PRINTF("first bad region now %u\n", bad_region);
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- if (furthest->first <= picked->first) {
- DEBUG_PRINTF("failed to make any progress\n");
- return false;
- }
-
- map<u32, region_info>::const_iterator furthest_lock = furthest;
- CharReach next_escapes;
- bool lock_found;
- /* The last possible lock in the range that we examine should be the
- * best. If the previous plan is a lock, this follow as any early lock
- * must have a reach that is a subset of the last plan's lock. If the
- * last plan is a resetting plan ..., ?is this true? */
- do {
- lock_found = isPossibleLock(g, furthest_lock, info,
- &next_escapes);
- } while (!lock_found && (--furthest_lock)->first > picked->first);
- DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
-
- if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
- lock_found = false;
- }
-
- if (!isMandRegionBetween(picked, furthest)) {
- return false;
- }
-
- /* There is no certainty that the som at a reset location will always
- * go forward */
- if (plan[parent_plan].is_reset && lock_found) {
- NGHolder midfix;
- DEBUG_PRINTF("checking if midfix is suitable for lock\n");
- fillHolderForLockCheck(&midfix, g, info, furthest_lock);
-
- if (!firstMatchIsFirst(midfix)) {
- DEBUG_PRINTF("not stuck\n");
- lock_found = false;
- }
- }
-
- if (!addPlan(plan, parent_plan)) {
- return false;
- }
-
- to_end = false;
-
- if (lock_found && next_escapes.none()) {
- picked = furthest_lock;
- to_end = true;
- }
-
- if (!to_end) {
- NGHolder conservative_midfix; /* for use in reset, exsl analysis */
- fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
- dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
-
- u32 old_bad_region = bad_region;
- to_end = advancePlan(g, regions, conservative_midfix, lock_found,
- picked, furthest, furthest_lock, next_escapes,
- plan.back(), &bad_region);
- if (!to_end
- && bad_region <= old_bad_region) { /* we failed to progress */
- DEBUG_PRINTF("failed to make any progress\n");
- return false;
- }
- }
-
- /* handle direct edge to accepts from region */
- if (edge(furthest->second.exits.front(), g.accept, g).second
- || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = furthest;
- do {
- addMappedReporterVertices(it->second, g, copy_to_orig,
- plan.back().reporters_in);
- } while (it != info.begin() && it->second.optional && (it--)->first);
- }
-
- /* create second prefix */
- plan.back().prefix = makePrefix(g, regions, furthest->second,
- next(furthest)->second);
- parent_plan = plan.size() - 1;
- }
-
- // The last region contributes reporters. If it's optional, the regions
- // before it do as well.
- map<u32, region_info>::const_reverse_iterator it = info.rbegin();
- do {
- DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
- addMappedReporterVertices(it->second, g, copy_to_orig,
- plan.back().reporters);
- } while (it->second.optional && it != info.rend() &&
- (++it)->first > furthest->first);
-
- return true;
-}
-
-static
-bool doTreePlanning(NGHolder &g,
- map<u32, region_info>::const_iterator presplit,
- map<u32, region_info>::const_iterator picked,
- vector<som_plan> &plan, const Grey &grey) {
- DEBUG_PRINTF("picked is %u\n", picked->first);
- DEBUG_PRINTF("presplit is %u\n", presplit->first);
-
- map<u32, region_info>::const_iterator splitter = next(presplit);
- vector<NFAVertex> enters = splitter->second.enters; // mutable copy
- DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
-
- if (enters.size() <= 1) {
- // TODO: Splitting a region with one entry won't get us anywhere, but
- // it shouldn't create buggy analyses either. See UE-1892.
- DEBUG_PRINTF("nothing to split\n");
- return false;
- }
-
- if (plan.size() + enters.size() > MAX_SOM_PLANS) {
- DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
- return false;
- }
-
- assert(!plan.empty());
- const u32 parent_plan = plan.size() - 1;
-
- // Make a copy of the graph, with the subgraph under each enter vertex
- // duplicated without the edges into the other enter vertices.
- // NOTE WELL: this will invalidate 'info' from the split point, but it's
- // OK... we don't use it after this.
- auto g_regions = assignRegions(g);
- expandGraph(g, g_regions, enters);
- dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
-
- for (auto v : enters) {
+ vector<som_plan> &plan, const Grey &grey) {
+ assert(picked != info.end());
+
+ DEBUG_PRINTF("picked=%u\n", picked->first);
+ DEBUG_PRINTF("parent is %u\n", parent_plan);
+
+ map<u32, region_info>::const_iterator furthest;
+
+ bool to_end = false;
+ while (!to_end) {
+ DEBUG_PRINTF("picked is %u\n", picked->first);
+ DEBUG_PRINTF("first bad region now %u\n", bad_region);
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ if (furthest->first <= picked->first) {
+ DEBUG_PRINTF("failed to make any progress\n");
+ return false;
+ }
+
+ map<u32, region_info>::const_iterator furthest_lock = furthest;
+ CharReach next_escapes;
+ bool lock_found;
+ /* The last possible lock in the range that we examine should be the
+ * best. If the previous plan is a lock, this follow as any early lock
+ * must have a reach that is a subset of the last plan's lock. If the
+ * last plan is a resetting plan ..., ?is this true? */
+ do {
+ lock_found = isPossibleLock(g, furthest_lock, info,
+ &next_escapes);
+ } while (!lock_found && (--furthest_lock)->first > picked->first);
+ DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
+
+ if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
+ lock_found = false;
+ }
+
+ if (!isMandRegionBetween(picked, furthest)) {
+ return false;
+ }
+
+ /* There is no certainty that the som at a reset location will always
+ * go forward */
+ if (plan[parent_plan].is_reset && lock_found) {
+ NGHolder midfix;
+ DEBUG_PRINTF("checking if midfix is suitable for lock\n");
+ fillHolderForLockCheck(&midfix, g, info, furthest_lock);
+
+ if (!firstMatchIsFirst(midfix)) {
+ DEBUG_PRINTF("not stuck\n");
+ lock_found = false;
+ }
+ }
+
+ if (!addPlan(plan, parent_plan)) {
+ return false;
+ }
+
+ to_end = false;
+
+ if (lock_found && next_escapes.none()) {
+ picked = furthest_lock;
+ to_end = true;
+ }
+
+ if (!to_end) {
+ NGHolder conservative_midfix; /* for use in reset, exsl analysis */
+ fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
+ dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
+
+ u32 old_bad_region = bad_region;
+ to_end = advancePlan(g, regions, conservative_midfix, lock_found,
+ picked, furthest, furthest_lock, next_escapes,
+ plan.back(), &bad_region);
+ if (!to_end
+ && bad_region <= old_bad_region) { /* we failed to progress */
+ DEBUG_PRINTF("failed to make any progress\n");
+ return false;
+ }
+ }
+
+ /* handle direct edge to accepts from region */
+ if (edge(furthest->second.exits.front(), g.accept, g).second
+ || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = furthest;
+ do {
+ addMappedReporterVertices(it->second, g, copy_to_orig,
+ plan.back().reporters_in);
+ } while (it != info.begin() && it->second.optional && (it--)->first);
+ }
+
+ /* create second prefix */
+ plan.back().prefix = makePrefix(g, regions, furthest->second,
+ next(furthest)->second);
+ parent_plan = plan.size() - 1;
+ }
+
+ // The last region contributes reporters. If it's optional, the regions
+ // before it do as well.
+ map<u32, region_info>::const_reverse_iterator it = info.rbegin();
+ do {
+ DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
+ addMappedReporterVertices(it->second, g, copy_to_orig,
+ plan.back().reporters);
+ } while (it->second.optional && it != info.rend() &&
+ (++it)->first > furthest->first);
+
+ return true;
+}
+
+static
+bool doTreePlanning(NGHolder &g,
+ map<u32, region_info>::const_iterator presplit,
+ map<u32, region_info>::const_iterator picked,
+ vector<som_plan> &plan, const Grey &grey) {
+ DEBUG_PRINTF("picked is %u\n", picked->first);
+ DEBUG_PRINTF("presplit is %u\n", presplit->first);
+
+ map<u32, region_info>::const_iterator splitter = next(presplit);
+ vector<NFAVertex> enters = splitter->second.enters; // mutable copy
+ DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
+
+ if (enters.size() <= 1) {
+ // TODO: Splitting a region with one entry won't get us anywhere, but
+ // it shouldn't create buggy analyses either. See UE-1892.
+ DEBUG_PRINTF("nothing to split\n");
+ return false;
+ }
+
+ if (plan.size() + enters.size() > MAX_SOM_PLANS) {
+ DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
+ return false;
+ }
+
+ assert(!plan.empty());
+ const u32 parent_plan = plan.size() - 1;
+
+ // Make a copy of the graph, with the subgraph under each enter vertex
+ // duplicated without the edges into the other enter vertices.
+ // NOTE WELL: this will invalidate 'info' from the split point, but it's
+ // OK... we don't use it after this.
+ auto g_regions = assignRegions(g);
+ expandGraph(g, g_regions, enters);
+ dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
+
+ for (auto v : enters) {
DEBUG_PRINTF("enter %zu\n", g[v].index);
-
- // For this entry vertex, construct a version of the graph without the
- // other entries in this region (g_path), and calculate its depths and
- // regions.
-
- NGHolder g_path;
+
+ // For this entry vertex, construct a version of the graph without the
+ // other entries in this region (g_path), and calculate its depths and
+ // regions.
+
+ NGHolder g_path;
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
- auto regions = assignRegions(g_path);
- dumpHolder(g_path, regions, 14, "som_treepath", grey);
-
- map<u32, region_info> path_info;
- buildRegionMapping(g_path, regions, path_info);
-
- // Translate 'picked' to the corresponding region iterator over the
- // g_path graph. we can't trust the numbering, so we use a vertex
- // instead.
- NFAVertex picked_v = picked->second.enters.front();
- assert(contains(orig_to_copy, picked_v));
- u32 picked_region = regions.at(orig_to_copy[picked_v]);
- map<u32, region_info>::const_iterator path_pick =
- path_info.find(picked_region);
- if (path_pick == path_info.end()) {
- assert(0); // odd
- return false;
- }
-
- // Similarly, find our bad_region.
- assert(contains(orig_to_copy, v));
- u32 bad_region = regions.at(orig_to_copy[v]);
-
- // It's possible that the region may have grown to include its
- // successors, in which case we (currently) run screaming. Just
- // checking the size should be sufficient here.
- if (picked->second.full.size() != path_pick->second.full.size()) {
- DEBUG_PRINTF("picked region has grown, bailing\n");
- return false;
- }
-
- // Construct reverse mapping from vertices in g_path to g.
+ cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
+ auto regions = assignRegions(g_path);
+ dumpHolder(g_path, regions, 14, "som_treepath", grey);
+
+ map<u32, region_info> path_info;
+ buildRegionMapping(g_path, regions, path_info);
+
+ // Translate 'picked' to the corresponding region iterator over the
+ // g_path graph. we can't trust the numbering, so we use a vertex
+ // instead.
+ NFAVertex picked_v = picked->second.enters.front();
+ assert(contains(orig_to_copy, picked_v));
+ u32 picked_region = regions.at(orig_to_copy[picked_v]);
+ map<u32, region_info>::const_iterator path_pick =
+ path_info.find(picked_region);
+ if (path_pick == path_info.end()) {
+ assert(0); // odd
+ return false;
+ }
+
+ // Similarly, find our bad_region.
+ assert(contains(orig_to_copy, v));
+ u32 bad_region = regions.at(orig_to_copy[v]);
+
+ // It's possible that the region may have grown to include its
+ // successors, in which case we (currently) run screaming. Just
+ // checking the size should be sufficient here.
+ if (picked->second.full.size() != path_pick->second.full.size()) {
+ DEBUG_PRINTF("picked region has grown, bailing\n");
+ return false;
+ }
+
+ // Construct reverse mapping from vertices in g_path to g.
unordered_map<NFAVertex, NFAVertex> copy_to_orig;
- for (const auto &m : orig_to_copy) {
- copy_to_orig.insert(make_pair(m.second, m.first));
- }
-
- bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
- bad_region, parent_plan,
- copy_to_orig, plan, grey);
- if (!to_end) {
- return false;
- }
- }
-
- return true;
-}
-
-enum dsp_behaviour {
- ALLOW_MODIFY_HOLDER,
- DISALLOW_MODIFY_HOLDER /* say no to tree planning */
-};
-
-static
-bool doSomPlanning(NGHolder &g, bool stuck_in,
+ for (const auto &m : orig_to_copy) {
+ copy_to_orig.insert(make_pair(m.second, m.first));
+ }
+
+ bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
+ bad_region, parent_plan,
+ copy_to_orig, plan, grey);
+ if (!to_end) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+enum dsp_behaviour {
+ ALLOW_MODIFY_HOLDER,
+ DISALLOW_MODIFY_HOLDER /* say no to tree planning */
+};
+
+static
+bool doSomPlanning(NGHolder &g, bool stuck_in,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked,
- vector<som_plan> &plan,
- const Grey &grey,
- dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
- DEBUG_PRINTF("in picked is %u\n", picked->first);
-
- /* Need to verify how far the lock covers */
- u32 bad_region;
- NGHolder *ap_pref = plan.back().prefix.get();
- NGHolder ap_temp;
- if (hasBigCycles(*ap_pref)) {
- fillRoughMidfix(&ap_temp, g, regions, info, picked);
- ap_pref = &ap_temp;
- }
-
- bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
- picked, picked, plan.back().escapes,
- plan.back(), &bad_region);
-
- if (to_end) {
- DEBUG_PRINTF("advanced through the whole graph in one go!\n");
- addReporterVertices(g, plan.back().reporters);
- return true;
- }
-
- map<u32, region_info>::const_iterator prev_furthest = picked;
- map<u32, region_info>::const_iterator furthest;
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.begin() || furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- if (furthest->first <= picked->first) {
- do_tree:
- /* unable to establish SoM past the last picked region */
- if (behaviour == DISALLOW_MODIFY_HOLDER) {
- /* tree planning mutates the graph */
- return false;
- }
-
- DEBUG_PRINTF("failed to make any progress\n");
- assert(!plan.empty());
- if (plan.size() == 1) {
- DEBUG_PRINTF("not handling initial alternations yet\n");
- return false;
- }
- plan.pop_back();
- return doTreePlanning(g, furthest, prev_furthest, plan, grey);
- }
-
- furthest = picked;
- while (!to_end) {
- prev_furthest = furthest;
-
- DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
- DEBUG_PRINTF("first bad region now %u\n", bad_region);
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.begin() || furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- map<u32, region_info>::const_iterator furthest_lock = furthest;
- CharReach next_escapes;
- bool stuck;
- do {
- stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
- } while (!stuck && (--furthest_lock)->first > prev_furthest->first);
- DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
- DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
-
- if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
- stuck = false;
- }
-
- if (!isMandRegionBetween(prev_furthest, furthest)) {
- DEBUG_PRINTF("no mand region between %u and %u\n",
- prev_furthest->first, furthest->first);
- return false;
- }
-
- /* There is no certainty that the som at a reset location will always
- * go forward */
- if (plan.back().is_reset && stuck) {
- NGHolder midfix;
- fillHolderForLockCheck(&midfix, g, info, furthest_lock);
-
- DEBUG_PRINTF("checking if midfix is suitable for lock\n");
- if (!firstMatchIsFirst(midfix)) {
- DEBUG_PRINTF("not stuck\n");
- stuck = false;
- }
- }
-
- assert(!plan.empty());
- if (!addPlan(plan, plan.size() - 1)) {
- return false;
- }
-
- to_end = false;
-
- if (stuck && next_escapes.none()) {
- picked = furthest_lock;
- to_end = true;
- }
-
- if (!to_end) {
- NGHolder conservative_midfix; /* for use in reset, exsl analysis */
- fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
-
- u32 old_bad_region = bad_region;
- to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
- furthest, furthest_lock, next_escapes,
- plan.back(), &bad_region);
-
- if (!to_end
- && bad_region <= old_bad_region) { /* we failed to progress */
- goto do_tree;
- }
- }
-
- /* handle direct edge to accepts from region */
- if (edge(furthest->second.exits.front(), g.accept, g).second
- || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = furthest;
- do {
- DEBUG_PRINTF("direct edge to accept from region %u\n",
- it->first);
- addReporterVertices(it->second, g, plan.back().reporters_in);
- } while (it != info.begin() && it->second.optional
- && (it--)->first);
- }
-
- /* create second prefix */
- plan.back().prefix = makePrefix(g, regions, furthest->second,
- next(furthest)->second);
- }
- DEBUG_PRINTF("(final) picked is %u\n", picked->first);
-
- // The last region contributes reporters. If it's optional, the regions
- // before it do as well.
- map<u32, region_info>::const_reverse_iterator it = info.rbegin();
- do {
- DEBUG_PRINTF("region %u contributes reporters to last plan\n",
- it->first);
- addReporterVertices(it->second, g, plan.back().reporters);
- } while (it->second.optional && it != info.rend() &&
- (++it)->first > furthest->first);
-
- DEBUG_PRINTF("done!\n");
- return true;
-}
-
-static
-void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
- UNUSED size_t num) {
-#if defined(DEBUG) || defined(DUMP_PLANS)
- DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
- "parent=%u\n",
- num, p.prefix.get(),
- describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
- p.is_reset, p.parent);
- printf(" reporters:");
- for (auto v : p.reporters) {
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked,
+ vector<som_plan> &plan,
+ const Grey &grey,
+ dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
+ DEBUG_PRINTF("in picked is %u\n", picked->first);
+
+ /* Need to verify how far the lock covers */
+ u32 bad_region;
+ NGHolder *ap_pref = plan.back().prefix.get();
+ NGHolder ap_temp;
+ if (hasBigCycles(*ap_pref)) {
+ fillRoughMidfix(&ap_temp, g, regions, info, picked);
+ ap_pref = &ap_temp;
+ }
+
+ bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
+ picked, picked, plan.back().escapes,
+ plan.back(), &bad_region);
+
+ if (to_end) {
+ DEBUG_PRINTF("advanced through the whole graph in one go!\n");
+ addReporterVertices(g, plan.back().reporters);
+ return true;
+ }
+
+ map<u32, region_info>::const_iterator prev_furthest = picked;
+ map<u32, region_info>::const_iterator furthest;
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.begin() || furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ if (furthest->first <= picked->first) {
+ do_tree:
+ /* unable to establish SoM past the last picked region */
+ if (behaviour == DISALLOW_MODIFY_HOLDER) {
+ /* tree planning mutates the graph */
+ return false;
+ }
+
+ DEBUG_PRINTF("failed to make any progress\n");
+ assert(!plan.empty());
+ if (plan.size() == 1) {
+ DEBUG_PRINTF("not handling initial alternations yet\n");
+ return false;
+ }
+ plan.pop_back();
+ return doTreePlanning(g, furthest, prev_furthest, plan, grey);
+ }
+
+ furthest = picked;
+ while (!to_end) {
+ prev_furthest = furthest;
+
+ DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
+ DEBUG_PRINTF("first bad region now %u\n", bad_region);
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.begin() || furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ map<u32, region_info>::const_iterator furthest_lock = furthest;
+ CharReach next_escapes;
+ bool stuck;
+ do {
+ stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
+ } while (!stuck && (--furthest_lock)->first > prev_furthest->first);
+ DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
+ DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
+
+ if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
+ stuck = false;
+ }
+
+ if (!isMandRegionBetween(prev_furthest, furthest)) {
+ DEBUG_PRINTF("no mand region between %u and %u\n",
+ prev_furthest->first, furthest->first);
+ return false;
+ }
+
+ /* There is no certainty that the som at a reset location will always
+ * go forward */
+ if (plan.back().is_reset && stuck) {
+ NGHolder midfix;
+ fillHolderForLockCheck(&midfix, g, info, furthest_lock);
+
+ DEBUG_PRINTF("checking if midfix is suitable for lock\n");
+ if (!firstMatchIsFirst(midfix)) {
+ DEBUG_PRINTF("not stuck\n");
+ stuck = false;
+ }
+ }
+
+ assert(!plan.empty());
+ if (!addPlan(plan, plan.size() - 1)) {
+ return false;
+ }
+
+ to_end = false;
+
+ if (stuck && next_escapes.none()) {
+ picked = furthest_lock;
+ to_end = true;
+ }
+
+ if (!to_end) {
+ NGHolder conservative_midfix; /* for use in reset, exsl analysis */
+ fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
+
+ u32 old_bad_region = bad_region;
+ to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
+ furthest, furthest_lock, next_escapes,
+ plan.back(), &bad_region);
+
+ if (!to_end
+ && bad_region <= old_bad_region) { /* we failed to progress */
+ goto do_tree;
+ }
+ }
+
+ /* handle direct edge to accepts from region */
+ if (edge(furthest->second.exits.front(), g.accept, g).second
+ || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = furthest;
+ do {
+ DEBUG_PRINTF("direct edge to accept from region %u\n",
+ it->first);
+ addReporterVertices(it->second, g, plan.back().reporters_in);
+ } while (it != info.begin() && it->second.optional
+ && (it--)->first);
+ }
+
+ /* create second prefix */
+ plan.back().prefix = makePrefix(g, regions, furthest->second,
+ next(furthest)->second);
+ }
+ DEBUG_PRINTF("(final) picked is %u\n", picked->first);
+
+ // The last region contributes reporters. If it's optional, the regions
+ // before it do as well.
+ map<u32, region_info>::const_reverse_iterator it = info.rbegin();
+ do {
+ DEBUG_PRINTF("region %u contributes reporters to last plan\n",
+ it->first);
+ addReporterVertices(it->second, g, plan.back().reporters);
+ } while (it->second.optional && it != info.rend() &&
+ (++it)->first > furthest->first);
+
+ DEBUG_PRINTF("done!\n");
+ return true;
+}
+
+static
+void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
+ UNUSED size_t num) {
+#if defined(DEBUG) || defined(DUMP_PLANS)
+ DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
+ "parent=%u\n",
+ num, p.prefix.get(),
+ describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
+ p.is_reset, p.parent);
+ printf(" reporters:");
+ for (auto v : p.reporters) {
printf(" %zu", g[v].index);
- }
- printf("\n");
- printf(" reporters_in:");
- for (auto v : p.reporters_in) {
+ }
+ printf("\n");
+ printf(" reporters_in:");
+ for (auto v : p.reporters_in) {
printf(" %zu", g[v].index);
- }
- printf("\n");
-#endif
-}
-
-/**
- * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
- * large exception as (1) if previous ng modification have been applied (other
- * midfixes have been applied), ng will be an undefined state on return and (2)
- * if the head of a pattern cannot be implemented we are generally unable to
- * implement the full pattern.
- */
-static
+ }
+ printf("\n");
+#endif
+}
+
+/**
+ * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
+ * large exception as (1) if previous ng modification have been applied (other
+ * midfixes have been applied), ng will be an undefined state on return and (2)
+ * if the head of a pattern cannot be implemented we are generally unable to
+ * implement the full pattern.
+ */
+static
void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
NGHolder &g, vector<som_plan> &plan,
const u32 first_som_slot) {
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
-
- DEBUG_PRINTF("%zu plans\n", plan.size());
- assert(plan.size() <= MAX_SOM_PLANS);
- assert(!plan.empty());
-
- vector<u32> som_slots(plan.size());
- som_slots[0] = first_som_slot;
-
- // Root plan, which already has a SOM slot assigned (first_som_slot).
- dumpSomPlan(g, plan.front(), 0);
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+
+ DEBUG_PRINTF("%zu plans\n", plan.size());
+ assert(plan.size() <= MAX_SOM_PLANS);
+ assert(!plan.empty());
+
+ vector<u32> som_slots(plan.size());
+ som_slots[0] = first_som_slot;
+
+ // Root plan, which already has a SOM slot assigned (first_som_slot).
+ dumpSomPlan(g, plan.front(), 0);
dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
ng.cc.grey);
- assert(plan.front().prefix);
- if (plan.front().escapes.any() && !plan.front().is_reset) {
- /* setup escaper for first som location */
- if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
- first_som_slot)) {
+ assert(plan.front().prefix);
+ if (plan.front().escapes.any() && !plan.front().is_reset) {
+ /* setup escaper for first som location */
+ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
+ first_som_slot)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-
- assert(plan.front().reporters_in.empty());
- updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
-
- // Tree of plans, encoded in a vector.
- vector<som_plan>::const_iterator it = plan.begin();
- for (++it; it != plan.end(); ++it) {
- const u32 plan_num = it - plan.begin();
- dumpSomPlan(g, *it, plan_num);
+ }
+ }
+
+ assert(plan.front().reporters_in.empty());
+ updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
+
+ // Tree of plans, encoded in a vector.
+ vector<som_plan>::const_iterator it = plan.begin();
+ for (++it; it != plan.end(); ++it) {
+ const u32 plan_num = it - plan.begin();
+ dumpSomPlan(g, *it, plan_num);
dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
- plan_num, ng.cc.grey);
-
- assert(it->parent < plan_num);
- u32 som_slot_in = som_slots[it->parent];
- u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
- it->is_reset, som_slot_in);
- som_slots[plan_num] = som_slot_out;
-
- assert(!it->no_implement);
- if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
+ plan_num, ng.cc.grey);
+
+ assert(it->parent < plan_num);
+ u32 som_slot_in = som_slots[it->parent];
+ u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
+ it->is_reset, som_slot_in);
+ som_slots[plan_num] = som_slot_out;
+
+ assert(!it->no_implement);
+ if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
- updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
- }
-
- /* create prefix to set the som_loc */
- if (!plan.front().no_implement) {
+ }
+ updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
+ updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
+ }
+
+ /* create prefix to set the som_loc */
+ if (!plan.front().no_implement) {
renumber_vertices(*plan.front().prefix);
- assert(plan.front().prefix->kind == NFA_OUTFIX);
- if (!ng.addHolder(*plan.front().prefix)) {
+ assert(plan.front().prefix->kind == NFA_OUTFIX);
+ if (!ng.addHolder(*plan.front().prefix)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-}
-
-static
-void anchorStarts(NGHolder &g) {
- vector<NFAEdge> dead;
- for (const auto &e : out_edges_range(g.startDs, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
- add_edge_if_not_present(g.start, v, g[e], g);
- dead.push_back(e);
- }
- remove_edges(dead, g);
-}
-
-static
-void setZeroReports(NGHolder &g) {
- set<NFAVertex> acceptors;
- insert(&acceptors, inv_adjacent_vertices(g.accept, g));
- insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
- acceptors.erase(g.accept);
-
- for (auto v : vertices_range(g)) {
- auto &reports = g[v].reports;
- reports.clear();
-
- if (!contains(acceptors, v)) {
- continue;
- }
-
- // We use the report ID to store the offset adjustment used for virtual
- // starts.
-
- if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- reports.insert(1);
- } else {
- reports.insert(0);
- }
- }
-}
-
-/* updates the reports on all vertices leading to the sink */
-static
-void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
- const ReportID report, const u32 comp_id) {
- // Construct replacement report.
- Report ir = rm.getReport(report);
- ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
- ir.revNfaIndex = comp_id;
- ReportID new_report = rm.getInternalId(ir);
-
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
-
- auto &r = g[v].reports;
- if (contains(r, report)) {
- r.erase(report);
- r.insert(new_report);
- }
- }
-}
-
-static
-void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
- vector<NFAEdge> dead;
- for (const auto &e : in_edges_range(sink, g)) {
- if (source(e, g) == g.accept) {
- continue;
- }
- dead.push_back(e);
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUseless(g, false);
-}
-
-namespace {
-struct SomRevNfa {
+ }
+ }
+}
+
+static
+void anchorStarts(NGHolder &g) {
+ vector<NFAEdge> dead;
+ for (const auto &e : out_edges_range(g.startDs, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+ add_edge_if_not_present(g.start, v, g[e], g);
+ dead.push_back(e);
+ }
+ remove_edges(dead, g);
+}
+
+static
+void setZeroReports(NGHolder &g) {
+ set<NFAVertex> acceptors;
+ insert(&acceptors, inv_adjacent_vertices(g.accept, g));
+ insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
+ acceptors.erase(g.accept);
+
+ for (auto v : vertices_range(g)) {
+ auto &reports = g[v].reports;
+ reports.clear();
+
+ if (!contains(acceptors, v)) {
+ continue;
+ }
+
+ // We use the report ID to store the offset adjustment used for virtual
+ // starts.
+
+ if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
+ reports.insert(1);
+ } else {
+ reports.insert(0);
+ }
+ }
+}
+
+/* updates the reports on all vertices leading to the sink */
+static
+void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
+ const ReportID report, const u32 comp_id) {
+ // Construct replacement report.
+ Report ir = rm.getReport(report);
+ ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
+ ir.revNfaIndex = comp_id;
+ ReportID new_report = rm.getInternalId(ir);
+
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ auto &r = g[v].reports;
+ if (contains(r, report)) {
+ r.erase(report);
+ r.insert(new_report);
+ }
+ }
+}
+
+static
+void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
+ vector<NFAEdge> dead;
+ for (const auto &e : in_edges_range(sink, g)) {
+ if (source(e, g) == g.accept) {
+ continue;
+ }
+ dead.push_back(e);
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUseless(g, false);
+}
+
+namespace {
+struct SomRevNfa {
SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n)
- : sink(s), report(r), nfa(move(n)) {}
- NFAVertex sink;
- ReportID report;
+ : sink(s), report(r), nfa(move(n)) {}
+ NFAVertex sink;
+ ReportID report;
bytecode_ptr<NFA> nfa;
-};
-}
-
-static
+};
+}
+
+static
bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g,
const CompileContext &cc) {
- // Create a reversed anchored version of this NFA which fires a zero report
- // ID on accept.
- NGHolder g_rev;
- reverseHolder(g, g_rev);
- anchorStarts(g_rev);
- setZeroReports(g_rev);
-
- // Prep for actual construction.
+ // Create a reversed anchored version of this NFA which fires a zero report
+ // ID on accept.
+ NGHolder g_rev;
+ reverseHolder(g, g_rev);
+ anchorStarts(g_rev);
+ setZeroReports(g_rev);
+
+ // Prep for actual construction.
renumber_vertices(g_rev);
- g_rev.kind = NFA_REV_PREFIX;
- reduceGraphEquivalences(g_rev, cc);
- removeRedundancy(g_rev, SOM_NONE);
-
- DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
-
+ g_rev.kind = NFA_REV_PREFIX;
+ reduceGraphEquivalences(g_rev, cc);
+ removeRedundancy(g_rev, SOM_NONE);
+
+ DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
+
auto nfa = constructReversedNFA(g_rev, cc);
- if (!nfa) {
- return nfa;
- }
-
- // Set some useful properties.
- depth maxWidth = findMaxWidth(g);
- if (maxWidth.is_finite()) {
- nfa->maxWidth = (u32)maxWidth;
- } else {
- nfa->maxWidth = 0;
- }
- depth minWidth = findMinWidth(g);
- nfa->minWidth = (u32)minWidth;
-
- return nfa;
-}
-
-static
-bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
- const ReportID report, const NFAVertex sink,
- const CompileContext &cc) {
- // Clone the graph with ONLY the given report vertices on the given sink.
- NGHolder g2;
- cloneHolder(g2, g);
- clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
- pruneAllOtherReports(g2, report);
-
- if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
- DEBUG_PRINTF("no work to do for this sink\n");
- return true;
- }
-
+ if (!nfa) {
+ return nfa;
+ }
+
+ // Set some useful properties.
+ depth maxWidth = findMaxWidth(g);
+ if (maxWidth.is_finite()) {
+ nfa->maxWidth = (u32)maxWidth;
+ } else {
+ nfa->maxWidth = 0;
+ }
+ depth minWidth = findMinWidth(g);
+ nfa->minWidth = (u32)minWidth;
+
+ return nfa;
+}
+
+static
+bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
+ const ReportID report, const NFAVertex sink,
+ const CompileContext &cc) {
+ // Clone the graph with ONLY the given report vertices on the given sink.
+ NGHolder g2;
+ cloneHolder(g2, g);
+ clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
+ pruneAllOtherReports(g2, report);
+
+ if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
+ DEBUG_PRINTF("no work to do for this sink\n");
+ return true;
+ }
+
renumber_vertices(g2); // for findMinWidth, findMaxWidth.
-
+
auto nfa = makeBareSomRevNfa(g2, cc);
- if (!nfa) {
- DEBUG_PRINTF("couldn't build rev nfa\n");
- return false;
- }
-
- som_nfas.emplace_back(sink, report, move(nfa));
- return true;
-}
-
-static
-bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
- ReportManager &rm = ng.rm;
-
- // FIXME might want to work on a graph without extra redundancy?
- depth maxWidth = findMaxWidth(g);
- DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
-
- if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
- DEBUG_PRINTF("too wide\n");
- return false;
- }
-
- set<ReportID> reports = all_reports(g);
- DEBUG_PRINTF("%zu reports\n", reports.size());
-
- // We distinguish between reports and accept/acceptEod sinks in order to
- // correctly handle cases which do different things on eod/normal accepts.
- // Later, it might be more elegant to do this with a single NFA and
- // multi-tops.
-
- vector<SomRevNfa> som_nfas;
-
- for (auto report : reports) {
- if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
- return false;
- }
- if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
- return false;
- }
- }
-
- for (auto &som_nfa : som_nfas) {
- assert(som_nfa.nfa);
-
- // Transfer ownership of the NFA to the SOM slot manager.
- u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth);
-
- // Replace this report on 'g' with a SOM_REV_NFA report pointing at our
- // new component.
- makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
- }
-
- if (ng.cc.streaming) {
- assert(ng.ssm.somHistoryRequired() <=
- max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
- }
-
- return true;
-}
-
-static
+ if (!nfa) {
+ DEBUG_PRINTF("couldn't build rev nfa\n");
+ return false;
+ }
+
+ som_nfas.emplace_back(sink, report, move(nfa));
+ return true;
+}
+
+static
+bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
+ ReportManager &rm = ng.rm;
+
+ // FIXME might want to work on a graph without extra redundancy?
+ depth maxWidth = findMaxWidth(g);
+ DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
+
+ if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
+ DEBUG_PRINTF("too wide\n");
+ return false;
+ }
+
+ set<ReportID> reports = all_reports(g);
+ DEBUG_PRINTF("%zu reports\n", reports.size());
+
+ // We distinguish between reports and accept/acceptEod sinks in order to
+ // correctly handle cases which do different things on eod/normal accepts.
+ // Later, it might be more elegant to do this with a single NFA and
+ // multi-tops.
+
+ vector<SomRevNfa> som_nfas;
+
+ for (auto report : reports) {
+ if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
+ return false;
+ }
+ if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
+ return false;
+ }
+ }
+
+ for (auto &som_nfa : som_nfas) {
+ assert(som_nfa.nfa);
+
+ // Transfer ownership of the NFA to the SOM slot manager.
+ u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth);
+
+ // Replace this report on 'g' with a SOM_REV_NFA report pointing at our
+ // new component.
+ makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
+ }
+
+ if (ng.cc.streaming) {
+ assert(ng.ssm.somHistoryRequired() <=
+ max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
+ }
+
+ return true;
+}
+
+static
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
- const CompileContext &cc) {
- depth maxWidth = findMaxWidth(g);
-
- assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
- assert(all_reports(g).size() == 1);
-
- auto nfa = makeBareSomRevNfa(g, cc);
- if (!nfa) {
+ const CompileContext &cc) {
+ depth maxWidth = findMaxWidth(g);
+
+ assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
+ assert(all_reports(g).size() == 1);
+
+ auto nfa = makeBareSomRevNfa(g, cc);
+ if (!nfa) {
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- if (ng.cc.streaming) {
- assert(ng.ssm.somHistoryRequired() <=
- max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
- }
-
- return ng.ssm.addRevNfa(move(nfa), maxWidth);
-}
-
-static
-bool is_literable(const NGHolder &g, NFAVertex v) {
- const CharReach &cr = g[v].char_reach;
- return cr.count() == 1 || cr.isCaselessChar();
-}
-
-static
-void append(ue2_literal &s, const CharReach &cr) {
- assert(cr.count() == 1 || cr.isCaselessChar());
- s.push_back(cr.find_first(), cr.isCaselessChar());
-}
-
-static
-map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator lower_bound,
- ue2_literal &s_out, const Grey &grey) {
-#define MIN_LITERAL_LENGTH 3
- s_out.clear();
- bool past_lower = false;
- ue2_literal s;
- map<u32, region_info>::const_iterator it;
- for (it = info.begin(); it != info.end(); ++it) {
- if (it == lower_bound) {
- past_lower = true;
- }
- if (!it->second.optional && it->second.dag
- && it->second.full.size() == 1
- && is_literable(g, it->second.full.front())) {
- append(s, g[it->second.full.front()].char_reach);
-
- if (s.length() >= grey.maxHistoryAvailable && past_lower) {
- goto exit;
- }
- } else {
- if (past_lower && it != lower_bound
- && s.length() >= MIN_LITERAL_LENGTH) {
- --it;
- goto exit;
- }
- s.clear();
- }
- }
-
- if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
- --it;
- s_out = s;
- return it;
- }
- exit:
- if (s.length() > grey.maxHistoryAvailable) {
- ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
- for (; jt != s.end(); ++jt) {
- s_out.push_back(*jt);
- }
- } else {
- s_out = s;
- }
- return it;
-}
-
-static
-bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
+ }
+
+ if (ng.cc.streaming) {
+ assert(ng.ssm.somHistoryRequired() <=
+ max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
+ }
+
+ return ng.ssm.addRevNfa(move(nfa), maxWidth);
+}
+
+static
+bool is_literable(const NGHolder &g, NFAVertex v) {
+ const CharReach &cr = g[v].char_reach;
+ return cr.count() == 1 || cr.isCaselessChar();
+}
+
+static
+void append(ue2_literal &s, const CharReach &cr) {
+ assert(cr.count() == 1 || cr.isCaselessChar());
+ s.push_back(cr.find_first(), cr.isCaselessChar());
+}
+
+static
+map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator lower_bound,
+ ue2_literal &s_out, const Grey &grey) {
+#define MIN_LITERAL_LENGTH 3
+ s_out.clear();
+ bool past_lower = false;
+ ue2_literal s;
+ map<u32, region_info>::const_iterator it;
+ for (it = info.begin(); it != info.end(); ++it) {
+ if (it == lower_bound) {
+ past_lower = true;
+ }
+ if (!it->second.optional && it->second.dag
+ && it->second.full.size() == 1
+ && is_literable(g, it->second.full.front())) {
+ append(s, g[it->second.full.front()].char_reach);
+
+ if (s.length() >= grey.maxHistoryAvailable && past_lower) {
+ goto exit;
+ }
+ } else {
+ if (past_lower && it != lower_bound
+ && s.length() >= MIN_LITERAL_LENGTH) {
+ --it;
+ goto exit;
+ }
+ s.clear();
+ }
+ }
+
+ if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
+ --it;
+ s_out = s;
+ return it;
+ }
+ exit:
+ if (s.length() > grey.maxHistoryAvailable) {
+ ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
+ for (; jt != s.end(); ++jt) {
+ s_out.push_back(*jt);
+ }
+ } else {
+ s_out = s;
+ }
+ return it;
+}
+
+static
+bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked,
- const Grey &grey,
- vector<som_plan> *plan) {
- DEBUG_PRINTF("trying to chain from %u\n", picked->first);
- const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
-
- shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
- next(picked)->second);
-
- // Quick check to stop us from trying this on huge graphs, which causes us
- // to spend forever in ng_execute looking at cases that will most like
- // fail. See UE-2078.
- size_t prefix_size = num_vertices(*prefix);
- size_t total_size = num_vertices(g);
- assert(total_size >= prefix_size);
- if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
- DEBUG_PRINTF("suffix has %zu vertices, fail\n",
- total_size - prefix_size);
- return false;
- }
-
- clearReports(*prefix);
- for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
- (*prefix)[u].reports.insert(0);
- }
-
- dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
-
- CharReach escapes;
- bool stuck = isPossibleLock(g, picked, info, &escapes);
- if (stuck) {
- NGHolder gg;
- fillHolderForLockCheck(&gg, g, info, picked);
-
- stuck = firstMatchIsFirst(gg);
- }
-
- DEBUG_PRINTF("stuck = %d\n", (int)stuck);
-
- // Note: no-one should ever pay attention to the root plan's som_loc_in.
- plan->emplace_back(prefix, escapes, false, 0);
- plan->back().no_implement = true;
-
- dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
-
- /* don't allow tree planning to mutate the graph */
- if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
- DISALLOW_MODIFY_HOLDER)) {
- // Rollback SOM locations.
- ssm.rollbackSomTo(numSomLocsBefore);
-
- DEBUG_PRINTF("fail to chain\n");
- return false;
- }
-
- return true;
-}
-
-static
-void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
- ReportID haig_report_id = rose.getNewNfaReport();
- DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
-
- clearReports(h);
- for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
- h[u].reports.clear();
- h[u].reports.insert(haig_report_id);
- }
-}
-
-static
-bool tryHaig(RoseBuild &rose, NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked,
+ const Grey &grey,
+ vector<som_plan> *plan) {
+ DEBUG_PRINTF("trying to chain from %u\n", picked->first);
+ const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
+
+ shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
+ next(picked)->second);
+
+ // Quick check to stop us from trying this on huge graphs, which causes us
+ // to spend forever in ng_execute looking at cases that will most like
+ // fail. See UE-2078.
+ size_t prefix_size = num_vertices(*prefix);
+ size_t total_size = num_vertices(g);
+ assert(total_size >= prefix_size);
+ if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
+ DEBUG_PRINTF("suffix has %zu vertices, fail\n",
+ total_size - prefix_size);
+ return false;
+ }
+
+ clearReports(*prefix);
+ for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
+ (*prefix)[u].reports.insert(0);
+ }
+
+ dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
+
+ CharReach escapes;
+ bool stuck = isPossibleLock(g, picked, info, &escapes);
+ if (stuck) {
+ NGHolder gg;
+ fillHolderForLockCheck(&gg, g, info, picked);
+
+ stuck = firstMatchIsFirst(gg);
+ }
+
+ DEBUG_PRINTF("stuck = %d\n", (int)stuck);
+
+ // Note: no-one should ever pay attention to the root plan's som_loc_in.
+ plan->emplace_back(prefix, escapes, false, 0);
+ plan->back().no_implement = true;
+
+ dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
+
+ /* don't allow tree planning to mutate the graph */
+ if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
+ DISALLOW_MODIFY_HOLDER)) {
+ // Rollback SOM locations.
+ ssm.rollbackSomTo(numSomLocsBefore);
+
+ DEBUG_PRINTF("fail to chain\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
+ ReportID haig_report_id = rose.getNewNfaReport();
+ DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
+
+ clearReports(h);
+ for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
+ h[u].reports.clear();
+ h[u].reports.insert(haig_report_id);
+ }
+}
+
+static
+bool tryHaig(RoseBuild &rose, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- som_type som, u32 somPrecision,
- map<u32, region_info>::const_iterator picked,
- shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
- const Grey &grey) {
- DEBUG_PRINTF("trying to build a haig\n");
- shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
- next(picked)->second);
- prefix->kind = NFA_PREFIX;
- setReportOnHaigPrefix(rose, *prefix);
- dumpHolder(*prefix, 0, "haig_prefix", grey);
- vector<vector<CharReach> > triggers; /* empty for prefix */
- *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
- if (!*haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- *haig_prefix = prefix;
- return true;
-}
-
-static
-void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
- const shared_ptr<raw_som_dfa> &haig,
- const ue2_literal &lit, const set<ReportID> &reports) {
- assert(prefix && haig);
-
- DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
-
- assert(!reports.empty());
- RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
-
- calcVertexOffsets(ig);
-
- UNUSED bool rv = tb.addSombeRose(ig);
- assert(rv); // TODO: recover from addRose failure
-}
-
-static
+ som_type som, u32 somPrecision,
+ map<u32, region_info>::const_iterator picked,
+ shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
+ const Grey &grey) {
+ DEBUG_PRINTF("trying to build a haig\n");
+ shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
+ next(picked)->second);
+ prefix->kind = NFA_PREFIX;
+ setReportOnHaigPrefix(rose, *prefix);
+ dumpHolder(*prefix, 0, "haig_prefix", grey);
+ vector<vector<CharReach> > triggers; /* empty for prefix */
+ *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
+ if (!*haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ *haig_prefix = prefix;
+ return true;
+}
+
+static
+void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
+ const shared_ptr<raw_som_dfa> &haig,
+ const ue2_literal &lit, const set<ReportID> &reports) {
+ assert(prefix && haig);
+
+ DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
+
+ assert(!reports.empty());
+ RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+
+ calcVertexOffsets(ig);
+
+ UNUSED bool rv = tb.addSombeRose(ig);
+ assert(rv); // TODO: recover from addRose failure
+}
+
+static
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator lower_bound) {
- DEBUG_PRINTF("entry\n");
- assert(g.kind == NFA_OUTFIX);
- const CompileContext &cc = ng.cc;
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
-
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator lower_bound) {
+ DEBUG_PRINTF("entry\n");
+ assert(g.kind == NFA_OUTFIX);
+ const CompileContext &cc = ng.cc;
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+
if (!cc.grey.allowHaigLit) {
- return SOMBE_FAIL;
- }
-
- const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
- u32 som_loc = ssm.getPrivateSomSlot();
-
+ return SOMBE_FAIL;
+ }
+
+ const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
+ u32 som_loc = ssm.getPrivateSomSlot();
+
if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) {
- // This is an optimisation: if we can't build a Haig from a portion of
- // the graph, then we won't be able to manage it as an outfix either
- // when we fall back.
+ // This is an optimisation: if we can't build a Haig from a portion of
+ // the graph, then we won't be able to manage it as an outfix either
+ // when we fall back.
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- while (1) {
- DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
- ue2_literal s;
- map<u32, region_info>::const_iterator lit
- = findLaterLiteral(g, info, lower_bound, s, cc.grey);
- if (lit == info.end()) {
- DEBUG_PRINTF("failed to find literal\n");
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_FAIL;
- }
- DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
- lit->first);
-
- if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
- lower_bound = lit;
- ++lower_bound;
- continue;
- }
-
- shared_ptr<raw_som_dfa> haig;
- shared_ptr<NGHolder> haig_prefix;
- map<u32, region_info>::const_iterator haig_reg = lit;
-
- if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
- /* TODO: handle */
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_FAIL;
- }
-
- advance(haig_reg, -(s32)s.length());
-
- if (!haig_reg->first && haig_reg->second.full.size() == 2) {
- /* just starts */
-
- /* TODO: make below assertion true, reset checks could be stronger
- * (12356)
- */
- /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
- &plan)); */
-
- lower_bound = lit;
- ++lower_bound;
- continue; /* somebody else should have been able to chain */
- }
-
- bool ok = true;
- set<ReportID> rep;
- if (next(lit) != info.end()) {
- /* non terminal literal */
-
- /* TODO: handle edges to accept ? */
- vector<som_plan> plan;
- if (edge(lit->second.exits.front(), g.accept, g).second) {
- insert(&rep, g[lit->second.exits.front()].reports);
- remove_edge(lit->second.exits.front(), g.accept, g);
- g[lit->second.exits.front()].reports.clear();
-
- /* Note: we can mess with the graph as this is the last literal
- * we will find and on failure the graph will be thrown away */
- }
-
- ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
- cc.grey, &plan);
- ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
- haig_reg, &haig, &haig_prefix, cc.grey);
-
- if (!ok) {
- DEBUG_PRINTF(":( going to next attempt\n");
- goto next_try;
- }
-
+ }
+
+ while (1) {
+ DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
+ ue2_literal s;
+ map<u32, region_info>::const_iterator lit
+ = findLaterLiteral(g, info, lower_bound, s, cc.grey);
+ if (lit == info.end()) {
+ DEBUG_PRINTF("failed to find literal\n");
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_FAIL;
+ }
+ DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
+ lit->first);
+
+ if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
+ lower_bound = lit;
+ ++lower_bound;
+ continue;
+ }
+
+ shared_ptr<raw_som_dfa> haig;
+ shared_ptr<NGHolder> haig_prefix;
+ map<u32, region_info>::const_iterator haig_reg = lit;
+
+ if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
+ /* TODO: handle */
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_FAIL;
+ }
+
+ advance(haig_reg, -(s32)s.length());
+
+ if (!haig_reg->first && haig_reg->second.full.size() == 2) {
+ /* just starts */
+
+ /* TODO: make below assertion true, reset checks could be stronger
+ * (12356)
+ */
+ /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
+ &plan)); */
+
+ lower_bound = lit;
+ ++lower_bound;
+ continue; /* somebody else should have been able to chain */
+ }
+
+ bool ok = true;
+ set<ReportID> rep;
+ if (next(lit) != info.end()) {
+ /* non terminal literal */
+
+ /* TODO: handle edges to accept ? */
+ vector<som_plan> plan;
+ if (edge(lit->second.exits.front(), g.accept, g).second) {
+ insert(&rep, g[lit->second.exits.front()].reports);
+ remove_edge(lit->second.exits.front(), g.accept, g);
+ g[lit->second.exits.front()].reports.clear();
+
+ /* Note: we can mess with the graph as this is the last literal
+ * we will find and on failure the graph will be thrown away */
+ }
+
+ ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
+ cc.grey, &plan);
+ ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
+ haig_reg, &haig, &haig_prefix, cc.grey);
+
+ if (!ok) {
+ DEBUG_PRINTF(":( going to next attempt\n");
+ goto next_try;
+ }
+
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
-
- Report ir = makeCallback(0U, 0);
- assert(!plan.empty());
- if (plan.front().is_reset) {
- ir.type = INTERNAL_SOM_LOC_SET_FROM;
- } else {
- ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
- }
- ir.onmatch = som_loc;
- rep.insert(rm.getInternalId(ir));
- } else {
- /* terminal literal */
- ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
- &haig, &haig_prefix, cc.grey);
-
- /* find report */
- insert(&rep, g[lit->second.exits.front()].reports);
-
- /* TODO: som_loc is unused */
- }
-
- if (ok) {
- roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
- if (next(lit) != info.end()) {
- return SOMBE_HANDLED_INTERNAL;
- } else {
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_HANDLED_ALL;
- }
- }
-next_try:
- lower_bound = lit;
- ++lower_bound;
- }
- assert(0);
- return SOMBE_FAIL;
-}
-
-static
-bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
- set<NFAVertex> *terminals) {
- /* TODO: smarter (topo) */
-#define MAX_LEADING_LITERALS 20
- set<NFAVertex> s_succ;
- insert(&s_succ, adjacent_vertices(g.start, g));
-
- set<NFAVertex> sds_succ;
- insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
- if (!is_subset_of(s_succ, sds_succ)) {
- DEBUG_PRINTF("not floating\n");
- return false;
- }
-
- sds_succ.erase(g.startDs);
-
- map<NFAVertex, vector<ue2_literal> > curr;
- curr[g.startDs].push_back(ue2_literal());
-
- map<NFAVertex, set<NFAVertex> > seen;
- map<NFAVertex, vector<ue2_literal> > next;
-
- bool did_expansion = true;
- while (did_expansion) {
- did_expansion = false;
- u32 count = 0;
- assert(!curr.empty());
- for (const auto &m : curr) {
- const NFAVertex u = m.first;
- const vector<ue2_literal> &base = m.second;
+
+ Report ir = makeCallback(0U, 0);
+ assert(!plan.empty());
+ if (plan.front().is_reset) {
+ ir.type = INTERNAL_SOM_LOC_SET_FROM;
+ } else {
+ ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
+ }
+ ir.onmatch = som_loc;
+ rep.insert(rm.getInternalId(ir));
+ } else {
+ /* terminal literal */
+ ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
+ &haig, &haig_prefix, cc.grey);
+
+ /* find report */
+ insert(&rep, g[lit->second.exits.front()].reports);
+
+ /* TODO: som_loc is unused */
+ }
+
+ if (ok) {
+ roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
+ if (next(lit) != info.end()) {
+ return SOMBE_HANDLED_INTERNAL;
+ } else {
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_HANDLED_ALL;
+ }
+ }
+next_try:
+ lower_bound = lit;
+ ++lower_bound;
+ }
+ assert(0);
+ return SOMBE_FAIL;
+}
+
+static
+bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
+ set<NFAVertex> *terminals) {
+ /* TODO: smarter (topo) */
+#define MAX_LEADING_LITERALS 20
+ set<NFAVertex> s_succ;
+ insert(&s_succ, adjacent_vertices(g.start, g));
+
+ set<NFAVertex> sds_succ;
+ insert(&sds_succ, adjacent_vertices(g.startDs, g));
+
+ if (!is_subset_of(s_succ, sds_succ)) {
+ DEBUG_PRINTF("not floating\n");
+ return false;
+ }
+
+ sds_succ.erase(g.startDs);
+
+ map<NFAVertex, vector<ue2_literal> > curr;
+ curr[g.startDs].push_back(ue2_literal());
+
+ map<NFAVertex, set<NFAVertex> > seen;
+ map<NFAVertex, vector<ue2_literal> > next;
+
+ bool did_expansion = true;
+ while (did_expansion) {
+ did_expansion = false;
+ u32 count = 0;
+ assert(!curr.empty());
+ for (const auto &m : curr) {
+ const NFAVertex u = m.first;
+ const vector<ue2_literal> &base = m.second;
DEBUG_PRINTF("expanding from %zu\n", g[u].index);
- for (auto v : adjacent_vertices_range(u, g)) {
- if (v == g.startDs) {
- continue;
- }
- if (contains(seen[u], v)) {
- DEBUG_PRINTF("loop\n");
- goto skip_to_next_terminal;
- }
- if (is_any_accept(v, g) || is_match_vertex(v, g)) {
- DEBUG_PRINTF("match\n");
- goto skip_to_next_terminal;
- }
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ if (contains(seen[u], v)) {
+ DEBUG_PRINTF("loop\n");
+ goto skip_to_next_terminal;
+ }
+ if (is_any_accept(v, g) || is_match_vertex(v, g)) {
+ DEBUG_PRINTF("match\n");
+ goto skip_to_next_terminal;
+ }
if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("wide\n");
- goto skip_to_next_terminal;
- }
- }
-
- for (auto v : adjacent_vertices_range(u, g)) {
- assert(!contains(seen[u], v));
- if (v == g.startDs) {
- continue;
- }
- insert(&seen[v], seen[u]);
- seen[v].insert(v);
- CharReach cr = g[v].char_reach;
- vector<ue2_literal> &out = next[v];
-
+ DEBUG_PRINTF("wide\n");
+ goto skip_to_next_terminal;
+ }
+ }
+
+ for (auto v : adjacent_vertices_range(u, g)) {
+ assert(!contains(seen[u], v));
+ if (v == g.startDs) {
+ continue;
+ }
+ insert(&seen[v], seen[u]);
+ seen[v].insert(v);
+ CharReach cr = g[v].char_reach;
+ vector<ue2_literal> &out = next[v];
+
DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index,
cr.count());
- for (size_t c = cr.find_first(); c != CharReach::npos;
- c = cr.find_next(c)) {
- bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
- && cr.test(mytolower(c));
-
- if (nocase && (char)c == mytolower(c)) {
- continue; /* uppercase already handled us */
- }
-
- for (const auto &lit : base) {
- if (count >= MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("count %u\n", count);
- goto exit;
- }
- did_expansion = true;
- out.push_back(lit);
- out.back().push_back(c, nocase);
- count++;
- if (out.back().length() > MAX_MASK2_WIDTH
- && mixed_sensitivity(out.back())) {
- goto exit;
- }
-
- }
- }
- }
- if (0) {
- skip_to_next_terminal:
- insert(&next[u], next[u].end(), base);
- count += base.size();
- if (count > MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("count %u\n", count);
- goto exit;
- }
- }
- }
-
- curr.swap(next);
- next.clear();
- };
- exit:;
- for (const auto &m : curr) {
- NFAVertex t = m.first;
- if (t == g.startDs) {
- assert(curr.size() == 1);
- return false;
- }
- assert(!is_special(t, g));
- terminals->insert(t);
- insert(lits, m.second);
- }
- assert(lits->size() <= MAX_LEADING_LITERALS);
- return !lits->empty();
-}
-
-static
-bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
- NGHolder *rhs) {
- DEBUG_PRINTF("looking for a leading literals\n");
-
- set<NFAVertex> terms;
- if (!leadingLiterals(g, lit_out, &terms)) {
- return false;
- }
-
- for (UNUSED const auto &lit : *lit_out) {
- DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
- lit.length());
- }
-
- /* need to validate that it is a clean split */
- assert(!terms.empty());
- set<NFAVertex> adj_term1;
- insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
- for (auto v : terms) {
+ for (size_t c = cr.find_first(); c != CharReach::npos;
+ c = cr.find_next(c)) {
+ bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
+ && cr.test(mytolower(c));
+
+ if (nocase && (char)c == mytolower(c)) {
+ continue; /* uppercase already handled us */
+ }
+
+ for (const auto &lit : base) {
+ if (count >= MAX_LEADING_LITERALS) {
+ DEBUG_PRINTF("count %u\n", count);
+ goto exit;
+ }
+ did_expansion = true;
+ out.push_back(lit);
+ out.back().push_back(c, nocase);
+ count++;
+ if (out.back().length() > MAX_MASK2_WIDTH
+ && mixed_sensitivity(out.back())) {
+ goto exit;
+ }
+
+ }
+ }
+ }
+ if (0) {
+ skip_to_next_terminal:
+ insert(&next[u], next[u].end(), base);
+ count += base.size();
+ if (count > MAX_LEADING_LITERALS) {
+ DEBUG_PRINTF("count %u\n", count);
+ goto exit;
+ }
+ }
+ }
+
+ curr.swap(next);
+ next.clear();
+ };
+ exit:;
+ for (const auto &m : curr) {
+ NFAVertex t = m.first;
+ if (t == g.startDs) {
+ assert(curr.size() == 1);
+ return false;
+ }
+ assert(!is_special(t, g));
+ terminals->insert(t);
+ insert(lits, m.second);
+ }
+ assert(lits->size() <= MAX_LEADING_LITERALS);
+ return !lits->empty();
+}
+
+static
+bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
+ NGHolder *rhs) {
+ DEBUG_PRINTF("looking for a leading literals\n");
+
+ set<NFAVertex> terms;
+ if (!leadingLiterals(g, lit_out, &terms)) {
+ return false;
+ }
+
+ for (UNUSED const auto &lit : *lit_out) {
+ DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
+ lit.length());
+ }
+
+ /* need to validate that it is a clean split */
+ assert(!terms.empty());
+ set<NFAVertex> adj_term1;
+ insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
+ for (auto v : terms) {
DEBUG_PRINTF("term %zu\n", g[v].index);
- set<NFAVertex> temp;
- insert(&temp, adjacent_vertices(v, g));
- if (temp != adj_term1) {
- DEBUG_PRINTF("bad split\n");
- return false;
- }
- }
-
+ set<NFAVertex> temp;
+ insert(&temp, adjacent_vertices(v, g));
+ if (temp != adj_term1) {
+ DEBUG_PRINTF("bad split\n");
+ return false;
+ }
+ }
+
unordered_map<NFAVertex, NFAVertex> rhs_map;
- vector<NFAVertex> pivots;
- insert(&pivots, pivots.end(), adj_term1);
- splitRHS(g, pivots, rhs, &rhs_map);
-
- assert(is_triggered(*rhs));
- return true;
-}
-
-static
-void findBestLiteral(const NGHolder &g,
+ vector<NFAVertex> pivots;
+ insert(&pivots, pivots.end(), adj_term1);
+ splitRHS(g, pivots, rhs, &rhs_map);
+
+ assert(is_triggered(*rhs));
+ return true;
+}
+
+static
+void findBestLiteral(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- ue2_literal *lit_out, NFAVertex *v,
- const CompileContext &cc) {
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info, false);
-
- ue2_literal best;
+ ue2_literal *lit_out, NFAVertex *v,
+ const CompileContext &cc) {
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info, false);
+
+ ue2_literal best;
NFAVertex best_v = NGHolder::null_vertex();
-
- map<u32, region_info>::const_iterator lit = info.begin();
- while (1) {
- ue2_literal s;
- lit = findLaterLiteral(g, info, lit, s, cc.grey);
- if (lit == info.end()) {
- break;
- }
- DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
- lit->first);
-
- if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
- ++lit;
- continue;
- }
-
- if (s.length() > best.length()) {
- best = s;
- assert(!lit->second.exits.empty());
- best_v = lit->second.exits[0];
- }
-
- ++lit;
- }
-
- lit_out->swap(best);
- *v = best_v;
-}
-
-static
-bool splitOffBestLiteral(const NGHolder &g,
+
+ map<u32, region_info>::const_iterator lit = info.begin();
+ while (1) {
+ ue2_literal s;
+ lit = findLaterLiteral(g, info, lit, s, cc.grey);
+ if (lit == info.end()) {
+ break;
+ }
+ DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
+ lit->first);
+
+ if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
+ ++lit;
+ continue;
+ }
+
+ if (s.length() > best.length()) {
+ best = s;
+ assert(!lit->second.exits.empty());
+ best_v = lit->second.exits[0];
+ }
+
+ ++lit;
+ }
+
+ lit_out->swap(best);
+ *v = best_v;
+}
+
+static
+bool splitOffBestLiteral(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
- const CompileContext &cc) {
+ ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
+ const CompileContext &cc) {
NFAVertex v = NGHolder::null_vertex();
-
- findBestLiteral(g, regions, lit_out, &v, cc);
- if (lit_out->empty()) {
- return false;
- }
-
- DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
-
+
+ findBestLiteral(g, regions, lit_out, &v, cc);
+ if (lit_out->empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
+
unordered_map<NFAVertex, NFAVertex> lhs_map;
unordered_map<NFAVertex, NFAVertex> rhs_map;
-
- splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
-
+
+ splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
+
DEBUG_PRINTF("v = %zu\n", g[v].index);
-
- return true;
-}
-
+
+ return true;
+}
+
/**
* Replace the given graph's EXTERNAL_CALLBACK reports with
* EXTERNAL_CALLBACK_SOM_PASS reports.
@@ -2442,706 +2442,706 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
}
}
-static
-bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
- ue2_literal lit;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
-
- if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
- DEBUG_PRINTF("no literal\n");
- return false;
- }
-
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
-
+static
+bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
+ ue2_literal lit;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
+
+ if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
+ DEBUG_PRINTF("no literal\n");
+ return false;
+ }
+
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
-
- vector<vector<CharReach> > triggers;
- triggers.push_back(as_cr_seq(lit));
-
- assert(rhs->kind == NFA_SUFFIX);
- shared_ptr<raw_som_dfa> haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey, false /* lit implies adv som */);
- if (!haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("haig %p\n", haig.get());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
-
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- add_edge(v, a, RoseInEdgeProps(haig), ig);
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool doHaigLitHaigSom(NG &ng, NGHolder &g,
+ dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
+
+ vector<vector<CharReach> > triggers;
+ triggers.push_back(as_cr_seq(lit));
+
+ assert(rhs->kind == NFA_SUFFIX);
+ shared_ptr<raw_som_dfa> haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey, false /* lit implies adv som */);
+ if (!haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("haig %p\n", haig.get());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ add_edge(v, a, RoseInEdgeProps(haig), ig);
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool doHaigLitHaigSom(NG &ng, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- som_type som) {
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- // In streaming mode, we can only delay up to our max available history.
- const u32 max_delay =
- ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
-
- ue2_literal lit;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
- if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
- return false;
- }
-
- DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
- lit.length());
-
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
-
- if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
- return false; /* TODO: handle */
- }
-
+ som_type som) {
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ // In streaming mode, we can only delay up to our max available history.
+ const u32 max_delay =
+ ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
+
+ ue2_literal lit;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
+ if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
+ lit.length());
+
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+
+ if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
+ return false; /* TODO: handle */
+ }
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
- dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
-
- u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
-
- RoseInGraph ig;
- RoseInVertex s
- = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- bool lhs_all_vac = true;
+ dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
+ dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
+
+ u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
+
+ RoseInGraph ig;
+ RoseInVertex s
+ = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ bool lhs_all_vac = true;
NGHolder::adjacency_iterator ai, ae;
- for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
- ai != ae && lhs_all_vac; ++ai) {
- if (!is_special(*ai, *lhs)) {
- lhs_all_vac = false;
- }
- }
- for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
- ai != ae && lhs_all_vac; ++ai) {
- if (!is_special(*ai, *lhs)) {
- lhs_all_vac = false;
- }
- }
-
- if (lhs_all_vac) {
- /* lhs is completely vacuous --> no prefix needed */
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
- } else {
- assert(delay == lit.length());
- setReportOnHaigPrefix(*ng.rose, *lhs);
- vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
- assert(lhs->kind == NFA_PREFIX);
- shared_ptr<raw_som_dfa> l_haig
- = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
- prefix_triggers, ng.cc.grey);
- if (!l_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
-
- add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
- }
-
- if (!edge(rhs->start, rhs->accept, *rhs).second) {
- assert(rhs->kind == NFA_SUFFIX);
-
- vector<vector<CharReach> > triggers;
- triggers.push_back(as_cr_seq(lit));
-
- ue2_literal lit2;
- if (getTrailingLiteral(g, &lit2)
- && lit2.length() >= ng.cc.grey.minRoseLiteralLength
- && minStringPeriod(lit2) >= 2) {
-
- /* TODO: handle delay */
- size_t overlap = maxOverlap(lit, lit2, 0);
- u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
- delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
- rhs->kind = NFA_INFIX;
- assert(delay2 <= lit2.length());
- setReportOnHaigPrefix(*ng.rose, *rhs);
-
- shared_ptr<raw_som_dfa> m_haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
- triggers, ng.cc.grey, true);
- DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
- if (!m_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
-
- RoseInVertex w
- = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
- add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
-
- NFAVertex reporter = getSoleSourceVertex(g, g.accept);
- assert(reporter);
- const auto &reports = g[reporter].reports;
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
- } else {
- /* TODO: analysis to see if som is in fact always increasing */
- shared_ptr<raw_som_dfa> r_haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
- triggers, ng.cc.grey, true);
- DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
- if (!r_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
- ig);
- add_edge(v, a, RoseInEdgeProps(r_haig), ig);
- }
- } else {
- DEBUG_PRINTF("has start->accept edge\n");
+ for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
+ ai != ae && lhs_all_vac; ++ai) {
+ if (!is_special(*ai, *lhs)) {
+ lhs_all_vac = false;
+ }
+ }
+ for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
+ ai != ae && lhs_all_vac; ++ai) {
+ if (!is_special(*ai, *lhs)) {
+ lhs_all_vac = false;
+ }
+ }
+
+ if (lhs_all_vac) {
+ /* lhs is completely vacuous --> no prefix needed */
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+ } else {
+ assert(delay == lit.length());
+ setReportOnHaigPrefix(*ng.rose, *lhs);
+ vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
+ assert(lhs->kind == NFA_PREFIX);
+ shared_ptr<raw_som_dfa> l_haig
+ = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
+ prefix_triggers, ng.cc.grey);
+ if (!l_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
+
+ add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
+ }
+
+ if (!edge(rhs->start, rhs->accept, *rhs).second) {
+ assert(rhs->kind == NFA_SUFFIX);
+
+ vector<vector<CharReach> > triggers;
+ triggers.push_back(as_cr_seq(lit));
+
+ ue2_literal lit2;
+ if (getTrailingLiteral(g, &lit2)
+ && lit2.length() >= ng.cc.grey.minRoseLiteralLength
+ && minStringPeriod(lit2) >= 2) {
+
+ /* TODO: handle delay */
+ size_t overlap = maxOverlap(lit, lit2, 0);
+ u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
+ delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
+ rhs->kind = NFA_INFIX;
+ assert(delay2 <= lit2.length());
+ setReportOnHaigPrefix(*ng.rose, *rhs);
+
+ shared_ptr<raw_som_dfa> m_haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
+ triggers, ng.cc.grey, true);
+ DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
+ if (!m_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+
+ RoseInVertex w
+ = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
+ add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
+
+ NFAVertex reporter = getSoleSourceVertex(g, g.accept);
+ assert(reporter);
+ const auto &reports = g[reporter].reports;
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
+ } else {
+ /* TODO: analysis to see if som is in fact always increasing */
+ shared_ptr<raw_som_dfa> r_haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
+ triggers, ng.cc.grey, true);
+ DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
+ if (!r_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
+ ig);
+ add_edge(v, a, RoseInEdgeProps(r_haig), ig);
+ }
+ } else {
+ DEBUG_PRINTF("has start->accept edge\n");
if (in_degree(g.acceptEod, g) > 1) {
- DEBUG_PRINTF("also has a path to EOD\n");
- return false;
- }
- NFAVertex reporter = getSoleSourceVertex(g, g.accept);
- if (!reporter) {
- return false; /* TODO: later */
- }
- const auto &reports = g[reporter].reports;
- assert(!reports.empty());
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
- }
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
- set<ue2_literal> lits;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
-
- if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
- DEBUG_PRINTF("no literal\n");
- return false;
- }
-
+ DEBUG_PRINTF("also has a path to EOD\n");
+ return false;
+ }
+ NFAVertex reporter = getSoleSourceVertex(g, g.accept);
+ if (!reporter) {
+ return false; /* TODO: later */
+ }
+ const auto &reports = g[reporter].reports;
+ assert(!reports.empty());
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
+ set<ue2_literal> lits;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
+
+ if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
+ DEBUG_PRINTF("no literal\n");
+ return false;
+ }
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
-
- vector<vector<CharReach>> triggers;
- for (const auto &lit : lits) {
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
- triggers.push_back(as_cr_seq(lit));
- }
-
- bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
- * we can promise ordering */
-
- assert(rhs->kind == NFA_SUFFIX);
- shared_ptr<raw_som_dfa> haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey, unordered_som_triggers);
- if (!haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("haig %p\n", haig.get());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
-
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
-
- for (const auto &lit : lits) {
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
- add_edge(v, a, RoseInEdgeProps(haig), ig);
- }
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool trySombe(NG &ng, NGHolder &g, som_type som) {
- if (doLitHaigSom(ng, g, som)) {
- return true;
- }
-
- auto regions = assignRegions(g);
-
- if (doHaigLitHaigSom(ng, g, regions, som)) {
- return true;
- }
-
- if (doMultiLitHaigSom(ng, g, som)) {
- return true;
- }
-
- return false;
-}
-
-static
-map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
+ dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
+
+ vector<vector<CharReach>> triggers;
+ for (const auto &lit : lits) {
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+ triggers.push_back(as_cr_seq(lit));
+ }
+
+ bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
+ * we can promise ordering */
+
+ assert(rhs->kind == NFA_SUFFIX);
+ shared_ptr<raw_som_dfa> haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey, unordered_som_triggers);
+ if (!haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("haig %p\n", haig.get());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+
+ for (const auto &lit : lits) {
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+ add_edge(v, a, RoseInEdgeProps(haig), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool trySombe(NG &ng, NGHolder &g, som_type som) {
+ if (doLitHaigSom(ng, g, som)) {
+ return true;
+ }
+
+ auto regions = assignRegions(g);
+
+ if (doHaigLitHaigSom(ng, g, regions, som)) {
+ return true;
+ }
+
+ if (doMultiLitHaigSom(ng, g, som)) {
+ return true;
+ }
+
+ return false;
+}
+
+static
+map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const vector<DepthMinMax> &depths) {
- map<u32, region_info>::const_iterator picked = info.end();
- for (map<u32, region_info>::const_iterator it = info.begin();
- it != info.end(); ++it) {
- if (it->second.exits.empty()) {
- assert(it == info.begin());
- continue;
- }
-
- if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
- depths)) {
- /* last region is as far as we can go */
- DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
- break;
- }
- picked = it;
- }
-
- return picked;
-}
-
-static
-map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
+ const map<u32, region_info> &info,
+ const vector<DepthMinMax> &depths) {
+ map<u32, region_info>::const_iterator picked = info.end();
+ for (map<u32, region_info>::const_iterator it = info.begin();
+ it != info.end(); ++it) {
+ if (it->second.exits.empty()) {
+ assert(it == info.begin());
+ continue;
+ }
+
+ if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
+ depths)) {
+ /* last region is as far as we can go */
+ DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
+ break;
+ }
+ picked = it;
+ }
+
+ return picked;
+}
+
+static
+map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const vector<DepthMinMax> &depths,
- const map<u32, region_info>::const_iterator &orig,
- const CompileContext &cc) {
- DEBUG_PRINTF("trying for later rev nfa cut\n");
- assert(orig != info.end());
-
- vector<map<u32, region_info>::const_iterator> cands;
-
- map<u32, region_info>::const_iterator it = orig;
- ++it;
- for (; it != info.end(); ++it) {
- /* for simplicity */
- if (it->second.exits.size() != 1 || it->second.optional) {
- continue;
- }
- NFAVertex v = *it->second.exits.begin();
-
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
- continue; /* for simplicity would require external som nfa reports
- * as well. */
- }
-
- const depth &max_depth = depths[g[v].index].max;
- if (max_depth >
- depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
- continue;
- }
-
- if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
- /* probably not a good idea, anyway */
- continue;
- }
-
- cands.push_back(it);
- }
-
- while (!cands.empty()) {
- map<u32, region_info>::const_iterator rv = cands.back();
- cands.pop_back();
-
- NFAVertex v = *rv->second.exits.begin();
-
- set<ue2_literal> lits = getLiteralSet(g, v);
- compressAndScore(lits);
- if (lits.empty()) {
- next_region:
- continue;
- }
- for (const auto &lit : lits) {
- if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
- goto next_region;
- }
- }
-
- if (rv->second.enters.empty()
- || find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
- != rv->second.full.end()) {
- continue;
- }
-
- if (!isMandRegionBetween(info.begin(), rv)
- && info.begin()->second.optional) {
- continue;
- }
-
- /* check to see if it is a reasonable size */
- auto prefix =
- makePrefix(g, regions, rv->second, next(rv)->second, false);
-
- NGHolder g_rev;
- reverseHolder(*prefix, g_rev);
- anchorStarts(g_rev);
-
+ const map<u32, region_info> &info,
+ const vector<DepthMinMax> &depths,
+ const map<u32, region_info>::const_iterator &orig,
+ const CompileContext &cc) {
+ DEBUG_PRINTF("trying for later rev nfa cut\n");
+ assert(orig != info.end());
+
+ vector<map<u32, region_info>::const_iterator> cands;
+
+ map<u32, region_info>::const_iterator it = orig;
+ ++it;
+ for (; it != info.end(); ++it) {
+ /* for simplicity */
+ if (it->second.exits.size() != 1 || it->second.optional) {
+ continue;
+ }
+ NFAVertex v = *it->second.exits.begin();
+
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ continue; /* for simplicity would require external som nfa reports
+ * as well. */
+ }
+
+ const depth &max_depth = depths[g[v].index].max;
+ if (max_depth >
+ depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
+ continue;
+ }
+
+ if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
+ /* probably not a good idea, anyway */
+ continue;
+ }
+
+ cands.push_back(it);
+ }
+
+ while (!cands.empty()) {
+ map<u32, region_info>::const_iterator rv = cands.back();
+ cands.pop_back();
+
+ NFAVertex v = *rv->second.exits.begin();
+
+ set<ue2_literal> lits = getLiteralSet(g, v);
+ compressAndScore(lits);
+ if (lits.empty()) {
+ next_region:
+ continue;
+ }
+ for (const auto &lit : lits) {
+ if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
+ goto next_region;
+ }
+ }
+
+ if (rv->second.enters.empty()
+ || find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
+ != rv->second.full.end()) {
+ continue;
+ }
+
+ if (!isMandRegionBetween(info.begin(), rv)
+ && info.begin()->second.optional) {
+ continue;
+ }
+
+ /* check to see if it is a reasonable size */
+ auto prefix =
+ makePrefix(g, regions, rv->second, next(rv)->second, false);
+
+ NGHolder g_rev;
+ reverseHolder(*prefix, g_rev);
+ anchorStarts(g_rev);
+
renumber_vertices(g_rev);
- g_rev.kind = NFA_REV_PREFIX;
- reduceGraphEquivalences(g_rev, cc);
- removeRedundancy(g_rev, SOM_NONE);
-
- if (num_vertices(g_rev) > 128) { /* too big */
- continue;
- }
-
- return rv;
- }
-
- return info.end();
-}
-
-static
-unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
+ g_rev.kind = NFA_REV_PREFIX;
+ reduceGraphEquivalences(g_rev, cc);
+ removeRedundancy(g_rev, SOM_NONE);
+
+ if (num_vertices(g_rev) > 128) { /* too big */
+ continue;
+ }
+
+ return rv;
+ }
+
+ return info.end();
+}
+
+static
+unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const map<u32, region_info>::const_iterator &picked,
- vector<DepthMinMax> *depths, bool prefix_by_rev,
- ReportManager &rm) {
- DEBUG_PRINTF("making prefix for chain attempt\n");
- auto prefix =
- makePrefix(g, regions, picked->second, next(picked)->second, false);
-
- /* For the root SOM plan, we use a temporary SOM slot to start with so that
- * we don't have to do any complicated rollback operations if the call to
- * doSomPlanning() below fails. The temporary SOM slot is replaced with a
- * real one afterwards. */
- const u32 temp_som_loc = UINT32_MAX;
- setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
- temp_som_loc, *depths, prefix_by_rev);
-
- /* handle direct edge to accepts from region */
- if (edge(picked->second.exits.front(), g.accept, g).second
- || edge(picked->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = picked;
- do {
- makeSomRelReports(rm, g, it->second.exits, *depths);
- } while (it != info.begin() && it->second.optional && (it--)->first);
- }
-
- depths->clear(); /* renumbering invalidates depths */
+ const map<u32, region_info> &info,
+ const map<u32, region_info>::const_iterator &picked,
+ vector<DepthMinMax> *depths, bool prefix_by_rev,
+ ReportManager &rm) {
+ DEBUG_PRINTF("making prefix for chain attempt\n");
+ auto prefix =
+ makePrefix(g, regions, picked->second, next(picked)->second, false);
+
+ /* For the root SOM plan, we use a temporary SOM slot to start with so that
+ * we don't have to do any complicated rollback operations if the call to
+ * doSomPlanning() below fails. The temporary SOM slot is replaced with a
+ * real one afterwards. */
+ const u32 temp_som_loc = UINT32_MAX;
+ setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
+ temp_som_loc, *depths, prefix_by_rev);
+
+ /* handle direct edge to accepts from region */
+ if (edge(picked->second.exits.front(), g.accept, g).second
+ || edge(picked->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = picked;
+ do {
+ makeSomRelReports(rm, g, it->second.exits, *depths);
+ } while (it != info.begin() && it->second.optional && (it--)->first);
+ }
+
+ depths->clear(); /* renumbering invalidates depths */
renumber_vertices(*prefix);
-
- DEBUG_PRINTF("done\n");
- return prefix;
-}
-
+
+ DEBUG_PRINTF("done\n");
+ return prefix;
+}
+
sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
- som_type som) {
- assert(som);
- DEBUG_PRINTF("som hello\n");
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
- const CompileContext &cc = ng.cc;
-
- // Special case: if g is completely anchored or begins with a dot-star, we
- // know that we have an absolute SOM of zero all the time.
+ som_type som) {
+ assert(som);
+ DEBUG_PRINTF("som hello\n");
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+ const CompileContext &cc = ng.cc;
+
+ // Special case: if g is completely anchored or begins with a dot-star, we
+ // know that we have an absolute SOM of zero all the time.
if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) {
- makeSomAbsReports(rm, g, g.accept);
- makeSomAbsReports(rm, g, g.acceptEod);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- if (!cc.grey.allowSomChain) {
- return SOMBE_FAIL;
- }
-
- // A pristine copy of the input graph, which must be restored to in paths
- // that return false. Also used as the forward graph for som rev nfa
- // construction.
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- // try a redundancy pass.
- if (addSomRedundancy(g, depths)) {
- depths = getDistancesFromSOM(g); // recalc
- }
-
- auto regions = assignRegions(g);
-
- dumpHolder(g, regions, 11, "som_explode", cc.grey);
-
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info);
-
- map<u32, region_info>::const_iterator picked
- = pickInitialSomCut(g, regions, info, depths);
- DEBUG_PRINTF("picked %u\n", picked->first);
- if (picked == info.end() || picked->second.exits.empty()) {
- DEBUG_PRINTF("no regions/no progress possible\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- if (doSomRevNfa(ng, g, cc)) {
- return SOMBE_HANDLED_INTERNAL;
- } else {
- return SOMBE_FAIL;
- }
- }
-
- if (finalRegion(g, regions, picked->second.exits[0])) {
- makeSomRelReports(rm, g, g.accept, depths);
- makeSomRelReports(rm, g, g.acceptEod, depths);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- if (doSomRevNfa(ng, g_pristine, cc)) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- bool prefix_by_rev = false;
- map<u32, region_info>::const_iterator picked_old = picked;
- map<u32, region_info>::const_iterator rev_pick
- = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
- if (rev_pick != info.end()) {
- DEBUG_PRINTF("found later rev prefix cut point\n");
- assert(rev_pick != picked);
- picked = rev_pick;
- prefix_by_rev = true;
- } else {
- /* sanity checks for picked region, these checks have already been done
- * if we are using a prefix reverse nfa. */
- if (picked->second.enters.empty()
- || find(picked->second.full.begin(), picked->second.full.end(),
- g.startDs) != picked->second.full.end()) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
-
- if (!isMandRegionBetween(info.begin(), picked)
- && info.begin()->second.optional) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
- }
-
- DEBUG_PRINTF("region %u is the final\n", picked->first);
-
- shared_ptr<NGHolder> prefix = makePrefixForChain(
- g, regions, info, picked, &depths, prefix_by_rev, rm);
- /* note depths cleared as we have renumbered */
-
- CharReach escapes;
- bool stuck = isPossibleLock(g, picked, info, &escapes);
- if (stuck) {
- DEBUG_PRINTF("investigating potential lock\n");
-
- NGHolder gg;
- fillHolderForLockCheck(&gg, g, info, picked);
-
- stuck = firstMatchIsFirst(gg);
- }
-
- if (stuck && escapes.none()) {
- /* leads directly to .* --> woot */
- DEBUG_PRINTF("initial slot is full lock\n");
- u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
- SomSlotManager::NO_PARENT);
- replaceTempSomSlot(rm, *prefix, som_loc);
-
- /* update all reports on g to report the som_loc's som */
- updateReportToUseRecordedSom(rm, g, som_loc);
-
- /* create prefix to set the som_loc */
- updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
- if (prefix_by_rev) {
+ makeSomAbsReports(rm, g, g.accept);
+ makeSomAbsReports(rm, g, g.acceptEod);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ if (!cc.grey.allowSomChain) {
+ return SOMBE_FAIL;
+ }
+
+ // A pristine copy of the input graph, which must be restored to in paths
+ // that return false. Also used as the forward graph for som rev nfa
+ // construction.
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ // try a redundancy pass.
+ if (addSomRedundancy(g, depths)) {
+ depths = getDistancesFromSOM(g); // recalc
+ }
+
+ auto regions = assignRegions(g);
+
+ dumpHolder(g, regions, 11, "som_explode", cc.grey);
+
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info);
+
+ map<u32, region_info>::const_iterator picked
+ = pickInitialSomCut(g, regions, info, depths);
+ DEBUG_PRINTF("picked %u\n", picked->first);
+ if (picked == info.end() || picked->second.exits.empty()) {
+ DEBUG_PRINTF("no regions/no progress possible\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ if (doSomRevNfa(ng, g, cc)) {
+ return SOMBE_HANDLED_INTERNAL;
+ } else {
+ return SOMBE_FAIL;
+ }
+ }
+
+ if (finalRegion(g, regions, picked->second.exits[0])) {
+ makeSomRelReports(rm, g, g.accept, depths);
+ makeSomRelReports(rm, g, g.acceptEod, depths);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ if (doSomRevNfa(ng, g_pristine, cc)) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ bool prefix_by_rev = false;
+ map<u32, region_info>::const_iterator picked_old = picked;
+ map<u32, region_info>::const_iterator rev_pick
+ = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
+ if (rev_pick != info.end()) {
+ DEBUG_PRINTF("found later rev prefix cut point\n");
+ assert(rev_pick != picked);
+ picked = rev_pick;
+ prefix_by_rev = true;
+ } else {
+ /* sanity checks for picked region, these checks have already been done
+ * if we are using a prefix reverse nfa. */
+ if (picked->second.enters.empty()
+ || find(picked->second.full.begin(), picked->second.full.end(),
+ g.startDs) != picked->second.full.end()) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+
+ if (!isMandRegionBetween(info.begin(), picked)
+ && info.begin()->second.optional) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+ }
+
+ DEBUG_PRINTF("region %u is the final\n", picked->first);
+
+ shared_ptr<NGHolder> prefix = makePrefixForChain(
+ g, regions, info, picked, &depths, prefix_by_rev, rm);
+ /* note depths cleared as we have renumbered */
+
+ CharReach escapes;
+ bool stuck = isPossibleLock(g, picked, info, &escapes);
+ if (stuck) {
+ DEBUG_PRINTF("investigating potential lock\n");
+
+ NGHolder gg;
+ fillHolderForLockCheck(&gg, g, info, picked);
+
+ stuck = firstMatchIsFirst(gg);
+ }
+
+ if (stuck && escapes.none()) {
+ /* leads directly to .* --> woot */
+ DEBUG_PRINTF("initial slot is full lock\n");
+ u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
+ SomSlotManager::NO_PARENT);
+ replaceTempSomSlot(rm, *prefix, som_loc);
+
+ /* update all reports on g to report the som_loc's som */
+ updateReportToUseRecordedSom(rm, g, som_loc);
+
+ /* create prefix to set the som_loc */
+ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
+ if (prefix_by_rev) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
- updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
- }
+ updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
+ }
renumber_vertices(*prefix);
- if (!ng.addHolder(*prefix)) {
- DEBUG_PRINTF("failed to add holder\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
-
- DEBUG_PRINTF("ok found initial lock\n");
- return SOMBE_HANDLED_INTERNAL;
- }
-
- vector<som_plan> plan;
- retry:
- // Note: no-one should ever pay attention to the root plan's parent.
- plan.push_back(som_plan(prefix, escapes, false, 0));
- dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
- if (!prefix_by_rev) {
- if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
- DEBUG_PRINTF("failed\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
- } else {
- DEBUG_PRINTF("trying for som plan\n");
- if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
- DISALLOW_MODIFY_HOLDER)) {
- /* Note: the larger prefixes generated by reverse nfas may not
- * advance as fair as the original prefix - so we should retry
- * with a smaller prefix. */
-
- prefix_by_rev = false;
- stuck = false; /* if we reached a lock, then prefix_by_rev would not
- * have advanced. */
- picked = picked_old;
- plan.clear();
- depths = getDistancesFromSOM(g); /* due to renumbering, need to
- * regenerate */
- prefix = makePrefixForChain(g, regions, info, picked, &depths,
- prefix_by_rev, rm);
- escapes.clear();
- DEBUG_PRINTF("retrying\n");
- goto retry;
- }
- }
- DEBUG_PRINTF("som planning ok\n");
-
- /* if the initial prefix is weak is if sombe approaches are better */
- if (findMinWidth(*prefix) <= depth(2)) {
- DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
- NGHolder g2;
- cloneHolder(g2, g_pristine);
- if (trySombe(ng, g2, som)) {
- return SOMBE_HANDLED_ALL;
- }
- }
-
- /* From this point we know that we are going to succeed or die horribly with
- * a pattern too large. Anything done past this point can be considered
- * committed to the compile. */
-
- regions = assignRegions(g); // Update as g may have changed.
-
- DEBUG_PRINTF("-- get slot for initial plan\n");
- u32 som_loc;
- if (plan[0].is_reset) {
- som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
- picked->first, &plan[0].no_implement);
- } else {
- som_loc = ssm.getSomSlot(*prefix, escapes, false,
- SomSlotManager::NO_PARENT);
- }
-
- replaceTempSomSlot(rm, *prefix, som_loc);
-
- if (plan.front().is_reset) {
- updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
- }
- if (prefix_by_rev && !plan.front().no_implement) {
+ if (!ng.addHolder(*prefix)) {
+ DEBUG_PRINTF("failed to add holder\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+
+ DEBUG_PRINTF("ok found initial lock\n");
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ vector<som_plan> plan;
+ retry:
+ // Note: no-one should ever pay attention to the root plan's parent.
+ plan.push_back(som_plan(prefix, escapes, false, 0));
+ dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
+ if (!prefix_by_rev) {
+ if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
+ DEBUG_PRINTF("failed\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+ } else {
+ DEBUG_PRINTF("trying for som plan\n");
+ if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
+ DISALLOW_MODIFY_HOLDER)) {
+ /* Note: the larger prefixes generated by reverse nfas may not
+ * advance as fair as the original prefix - so we should retry
+ * with a smaller prefix. */
+
+ prefix_by_rev = false;
+ stuck = false; /* if we reached a lock, then prefix_by_rev would not
+ * have advanced. */
+ picked = picked_old;
+ plan.clear();
+ depths = getDistancesFromSOM(g); /* due to renumbering, need to
+ * regenerate */
+ prefix = makePrefixForChain(g, regions, info, picked, &depths,
+ prefix_by_rev, rm);
+ escapes.clear();
+ DEBUG_PRINTF("retrying\n");
+ goto retry;
+ }
+ }
+ DEBUG_PRINTF("som planning ok\n");
+
+ /* if the initial prefix is weak is if sombe approaches are better */
+ if (findMinWidth(*prefix) <= depth(2)) {
+ DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
+ NGHolder g2;
+ cloneHolder(g2, g_pristine);
+ if (trySombe(ng, g2, som)) {
+ return SOMBE_HANDLED_ALL;
+ }
+ }
+
+ /* From this point we know that we are going to succeed or die horribly with
+ * a pattern too large. Anything done past this point can be considered
+ * committed to the compile. */
+
+ regions = assignRegions(g); // Update as g may have changed.
+
+ DEBUG_PRINTF("-- get slot for initial plan\n");
+ u32 som_loc;
+ if (plan[0].is_reset) {
+ som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
+ picked->first, &plan[0].no_implement);
+ } else {
+ som_loc = ssm.getSomSlot(*prefix, escapes, false,
+ SomSlotManager::NO_PARENT);
+ }
+
+ replaceTempSomSlot(rm, *prefix, som_loc);
+
+ if (plan.front().is_reset) {
+ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
+ }
+ if (prefix_by_rev && !plan.front().no_implement) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
- updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
- }
-
+ updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
+ }
+
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
-
- DEBUG_PRINTF("success\n");
- return SOMBE_HANDLED_INTERNAL;
-}
-
+
+ DEBUG_PRINTF("success\n");
+ return SOMBE_HANDLED_INTERNAL;
+}
+
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som) {
- assert(som);
-
- DEBUG_PRINTF("som+haig hello\n");
-
- // A pristine copy of the input graph, which must be restored to in paths
- // that return false. Also used as the forward graph for som rev nfa
- // construction.
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- if (trySombe(ng, g, som)) {
- return SOMBE_HANDLED_ALL;
- }
-
- if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
- return SOMBE_FAIL;
- }
-
- // know that we have an absolute SOM of zero all the time.
- assert(edge(g.startDs, g.startDs, g).second);
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- // try a redundancy pass.
- if (addSomRedundancy(g, depths)) {
- depths = getDistancesFromSOM(g);
- }
-
- auto regions = assignRegions(g);
-
- dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
-
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info, true);
-
- sombe_rv rv =
+ assert(som);
+
+ DEBUG_PRINTF("som+haig hello\n");
+
+ // A pristine copy of the input graph, which must be restored to in paths
+ // that return false. Also used as the forward graph for som rev nfa
+ // construction.
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ if (trySombe(ng, g, som)) {
+ return SOMBE_HANDLED_ALL;
+ }
+
+ if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
+ return SOMBE_FAIL;
+ }
+
+ // know that we have an absolute SOM of zero all the time.
+ assert(edge(g.startDs, g.startDs, g).second);
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ // try a redundancy pass.
+ if (addSomRedundancy(g, depths)) {
+ depths = getDistancesFromSOM(g);
+ }
+
+ auto regions = assignRegions(g);
+
+ dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
+
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info, true);
+
+ sombe_rv rv =
doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
- if (rv == SOMBE_FAIL) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- }
- return rv;
-}
-
-} // namespace ue2
+ if (rv == SOMBE_FAIL) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ }
+ return rv;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.h b/contrib/libs/hyperscan/src/nfagraph/ng_som.h
index ecae4c67fb..b39c239ba2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.h
@@ -1,81 +1,81 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SOM ("Start of Match") analysis.
- */
-
-#ifndef NG_SOM_H
-#define NG_SOM_H
-
-#include "som/som.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SOM ("Start of Match") analysis.
+ */
+
+#ifndef NG_SOM_H
+#define NG_SOM_H
+
+#include "som/som.h"
#include "ue2common.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
class ExpressionInfo;
-class NG;
-class NGHolder;
+class NG;
+class NGHolder;
class ReportManager;
-struct Grey;
-
-enum sombe_rv {
- SOMBE_FAIL,
- SOMBE_HANDLED_INTERNAL,
- SOMBE_HANDLED_ALL
-};
-
-/** \brief Perform SOM analysis on the given graph.
- *
- * This function will replace report IDs and mutate the graph, then return
- * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still
- * needs to be handled (rose, etc).
- *
- * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has
- * been handled in all its glory.
- *
- * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be
- * established.
- *
- * May throw a "Pattern too large" exception if prefixes of the
- * pattern are too large to compile.
- */
+struct Grey;
+
+enum sombe_rv {
+ SOMBE_FAIL,
+ SOMBE_HANDLED_INTERNAL,
+ SOMBE_HANDLED_ALL
+};
+
+/** \brief Perform SOM analysis on the given graph.
+ *
+ * This function will replace report IDs and mutate the graph, then return
+ * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still
+ * needs to be handled (rose, etc).
+ *
+ * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has
+ * been handled in all its glory.
+ *
+ * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be
+ * established.
+ *
+ * May throw a "Pattern too large" exception if prefixes of the
+ * pattern are too large to compile.
+ */
sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
- som_type som);
-
-/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
- * May also throw pattern too large if prefixes of the pattern are too large to
- * compile. */
+ som_type som);
+
+/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
+ * May also throw pattern too large if prefixes of the pattern are too large to
+ * compile. */
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
u32 comp_id, som_type som);
-
+
void makeReportsSomPass(ReportManager &rm, NGHolder &g);
-} // namespace ue2
-
-#endif // NG_SOM_H
+} // namespace ue2
+
+#endif // NG_SOM_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
index 33544ec173..776d54f4f1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
@@ -1,198 +1,198 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Add redundancy to graph to assist in SOM analysis.
- *
- * Currently patterns of the form:
- *
- * /(GET|POST).*foo/
- *
- * baffle our SOM analysis as the T's get merged into one by our graph
- * reductions and they lose the fixed depth property. One way to solve this is
- * to tell the T vertex to go fork itself before we do the main SOM pass.
- *
- * Overall plan:
- *
- * 1. build a topo ordering
- * 2. walk vertices in topo order
- * 3. fix up vertices where possible
- * 4. go home
- *
- * Vertex fix up plan:
- *
- * 1. consider depth of vertex
- * - if vertex is at fixed depth continue to next vertex
- * - if vertex can be at an unbounded depth continue to next vertex
- * - if vertex has a pred which is not a fixed depth continue to next vertex
- * 2. group preds by their depth
- * 3. for each group:
- * - create a clone of the vertex (vertex props and out edges)
- * - create edges from each vertex in the group to the clone
- * - work out the depth for the clone
- * 4. blow away original vertex
- *
- * Originally in UE-1862.
- */
-#include "ng_som_add_redundancy.h"
-
-#include "ng_dump.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum number of new vertices to create. */
-static const size_t MAX_NEW_VERTICES = 32;
-
-static
-const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g,
- const vector<DepthMinMax> &depths) {
- return depths.at(g[v].index);
-}
-
-static
-bool hasFloatingPred(NFAVertex v, const NGHolder &g,
- const vector<DepthMinMax> &depths) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- const DepthMinMax &d = getDepth(u, g, depths);
- if (d.min != d.max) {
- return true;
- }
- }
- return false;
-}
-
-static
-bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
- set<NFAVertex> &dead, size_t *numNewVertices) {
- map<depth, vector<NFAEdge>> predGroups;
- for (const auto &e : in_edges_range(v, g)) {
- const DepthMinMax &d = getDepth(source(e, g), g, depths);
- assert(d.min == d.max);
- predGroups[d.min].push_back(e);
- }
-
- DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size());
-
- if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) {
- return false;
- }
- *numNewVertices += predGroups.size();
-
- for (auto &group : predGroups) {
- const depth &predDepth = group.first;
- const vector<NFAEdge> &preds = group.second;
-
- // Clone v for this depth with all its associated out-edges.
- u32 clone_idx = depths.size(); // next index to be used
- NFAVertex clone = add_vertex(g[v], g);
- depth clone_depth = predDepth + 1;
- g[clone].index = clone_idx;
- depths.push_back(DepthMinMax(clone_depth, clone_depth));
- DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx,
- clone_depth.str().c_str());
-
- // Add copies of the out-edges from v.
- for (const auto &e : out_edges_range(v, g)) {
- add_edge(clone, target(e, g), g[e], g);
- }
-
- // Add in-edges from preds in this group.
- for (const auto &e : preds) {
- add_edge(source(e, g), clone, g[e], g);
- }
- }
-
- clear_vertex(v, g);
- dead.insert(v);
- return true;
-}
-
-bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) {
- DEBUG_PRINTF("entry\n");
-
- const vector<NFAVertex> ordering = getTopoOrdering(g);
-
- set<NFAVertex> dead;
- size_t numNewVertices = 0;
-
- for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)) {
- continue;
- }
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Add redundancy to graph to assist in SOM analysis.
+ *
+ * Currently patterns of the form:
+ *
+ * /(GET|POST).*foo/
+ *
+ * baffle our SOM analysis as the T's get merged into one by our graph
+ * reductions and they lose the fixed depth property. One way to solve this is
+ * to tell the T vertex to go fork itself before we do the main SOM pass.
+ *
+ * Overall plan:
+ *
+ * 1. build a topo ordering
+ * 2. walk vertices in topo order
+ * 3. fix up vertices where possible
+ * 4. go home
+ *
+ * Vertex fix up plan:
+ *
+ * 1. consider depth of vertex
+ * - if vertex is at fixed depth continue to next vertex
+ * - if vertex can be at an unbounded depth continue to next vertex
+ * - if vertex has a pred which is not a fixed depth continue to next vertex
+ * 2. group preds by their depth
+ * 3. for each group:
+ * - create a clone of the vertex (vertex props and out edges)
+ * - create edges from each vertex in the group to the clone
+ * - work out the depth for the clone
+ * 4. blow away original vertex
+ *
+ * Originally in UE-1862.
+ */
+#include "ng_som_add_redundancy.h"
+
+#include "ng_dump.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Hard limit on the maximum number of new vertices to create. */
+static const size_t MAX_NEW_VERTICES = 32;
+
+static
+const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g,
+ const vector<DepthMinMax> &depths) {
+ return depths.at(g[v].index);
+}
+
+static
+bool hasFloatingPred(NFAVertex v, const NGHolder &g,
+ const vector<DepthMinMax> &depths) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ const DepthMinMax &d = getDepth(u, g, depths);
+ if (d.min != d.max) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
+ set<NFAVertex> &dead, size_t *numNewVertices) {
+ map<depth, vector<NFAEdge>> predGroups;
+ for (const auto &e : in_edges_range(v, g)) {
+ const DepthMinMax &d = getDepth(source(e, g), g, depths);
+ assert(d.min == d.max);
+ predGroups[d.min].push_back(e);
+ }
+
+ DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size());
+
+ if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) {
+ return false;
+ }
+ *numNewVertices += predGroups.size();
+
+ for (auto &group : predGroups) {
+ const depth &predDepth = group.first;
+ const vector<NFAEdge> &preds = group.second;
+
+ // Clone v for this depth with all its associated out-edges.
+ u32 clone_idx = depths.size(); // next index to be used
+ NFAVertex clone = add_vertex(g[v], g);
+ depth clone_depth = predDepth + 1;
+ g[clone].index = clone_idx;
+ depths.push_back(DepthMinMax(clone_depth, clone_depth));
+ DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx,
+ clone_depth.str().c_str());
+
+ // Add copies of the out-edges from v.
+ for (const auto &e : out_edges_range(v, g)) {
+ add_edge(clone, target(e, g), g[e], g);
+ }
+
+ // Add in-edges from preds in this group.
+ for (const auto &e : preds) {
+ add_edge(source(e, g), clone, g[e], g);
+ }
+ }
+
+ clear_vertex(v, g);
+ dead.insert(v);
+ return true;
+}
+
+bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) {
+ DEBUG_PRINTF("entry\n");
+
+ const vector<NFAVertex> ordering = getTopoOrdering(g);
+
+ set<NFAVertex> dead;
+ size_t numNewVertices = 0;
+
+ for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)) {
+ continue;
+ }
if (!in_degree(v, g)) {
- continue; // unreachable, probably killed
- }
-
- const DepthMinMax &d = getDepth(v, g, depths);
-
+ continue; // unreachable, probably killed
+ }
+
+ const DepthMinMax &d = getDepth(v, g, depths);
+
DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index,
- d.str().c_str());
-
- if (d.min == d.max) {
- DEBUG_PRINTF("fixed depth\n");
- continue;
- }
-
- if (d.max.is_unreachable()) {
- DEBUG_PRINTF("unbounded depth\n");
- continue;
- }
-
- if (hasFloatingPred(v, g, depths)) {
- DEBUG_PRINTF("has floating pred\n");
- continue;
- }
-
- if (!forkVertex(v, g, depths, dead, &numNewVertices)) {
- DEBUG_PRINTF("new vertex limit reached\n");
- break;
- }
- }
-
- assert(numNewVertices <= MAX_NEW_VERTICES);
-
- if (dead.empty()) {
- return false; // no changes made to the graph
- }
-
- remove_vertices(dead, g);
- return true;
-}
-
-} // namespace ue2
+ d.str().c_str());
+
+ if (d.min == d.max) {
+ DEBUG_PRINTF("fixed depth\n");
+ continue;
+ }
+
+ if (d.max.is_unreachable()) {
+ DEBUG_PRINTF("unbounded depth\n");
+ continue;
+ }
+
+ if (hasFloatingPred(v, g, depths)) {
+ DEBUG_PRINTF("has floating pred\n");
+ continue;
+ }
+
+ if (!forkVertex(v, g, depths, dead, &numNewVertices)) {
+ DEBUG_PRINTF("new vertex limit reached\n");
+ break;
+ }
+ }
+
+ assert(numNewVertices <= MAX_NEW_VERTICES);
+
+ if (dead.empty()) {
+ return false; // no changes made to the graph
+ }
+
+ remove_vertices(dead, g);
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
index 890dc9c942..bec63ccd18 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
@@ -1,47 +1,47 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Add redundancy to graph to assist in SOM analysis.
- */
-
-#ifndef NG_SOM_ADD_REDUNDANCY_H
-#define NG_SOM_ADD_REDUNDANCY_H
-
-#include "util/depth.h"
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Add redundancy to graph to assist in SOM analysis.
+ */
+
+#ifndef NG_SOM_ADD_REDUNDANCY_H
+#define NG_SOM_ADD_REDUNDANCY_H
+
+#include "util/depth.h"
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
index 1e7a41bb0c..3d49bd15db 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
@@ -1,357 +1,357 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions related to SOM ("Start of Match").
- */
-#include "ng_som_util.h"
-
-#include "ng_depth.h"
-#include "ng_execute.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
- for (auto v : adjacent_vertices_range(u, g)) {
- add_edge_if_not_present(g.start, v, g);
- }
-}
-
-vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
- // We operate on a temporary copy of the original graph here, so we don't
- // have to mutate the original.
- NGHolder g;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions related to SOM ("Start of Match").
+ */
+#include "ng_som_util.h"
+
+#include "ng_depth.h"
+#include "ng_execute.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
+ for (auto v : adjacent_vertices_range(u, g)) {
+ add_edge_if_not_present(g.start, v, g);
+ }
+}
+
+vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
+ // We operate on a temporary copy of the original graph here, so we don't
+ // have to mutate the original.
+ NGHolder g;
unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g
- cloneHolder(g, g_orig, &vmap);
-
- vector<NFAVertex> vstarts;
- for (auto v : vertices_range(g)) {
- if (is_virtual_start(v, g)) {
- vstarts.push_back(v);
- }
- }
- vstarts.push_back(g.startDs);
-
- // wire the successors of every virtual start or startDs to g.start.
- for (auto v : vstarts) {
- wireSuccessorsToStart(g, v);
- }
-
- // drop the in-edges of every virtual start so that they don't participate
- // in the depth calculation.
- for (auto v : vstarts) {
- clear_in_edges(v, g);
- }
-
+ cloneHolder(g, g_orig, &vmap);
+
+ vector<NFAVertex> vstarts;
+ for (auto v : vertices_range(g)) {
+ if (is_virtual_start(v, g)) {
+ vstarts.push_back(v);
+ }
+ }
+ vstarts.push_back(g.startDs);
+
+ // wire the successors of every virtual start or startDs to g.start.
+ for (auto v : vstarts) {
+ wireSuccessorsToStart(g, v);
+ }
+
+ // drop the in-edges of every virtual start so that they don't participate
+ // in the depth calculation.
+ for (auto v : vstarts) {
+ clear_in_edges(v, g);
+ }
+
//dumpGraph("som_depth.dot", g);
-
+
// Find depths, indexed by vertex index in g
auto temp_depths = calcDepthsFrom(g, g.start);
-
- // Transfer depths, indexed by vertex index in g_orig.
- vector<DepthMinMax> depths(num_vertices(g_orig));
-
- for (auto v_orig : vertices_range(g_orig)) {
- assert(contains(vmap, v_orig));
- NFAVertex v_new = vmap[v_orig];
-
- u32 orig_idx = g_orig[v_orig].index;
-
- DepthMinMax &d = depths.at(orig_idx);
-
- if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
- // StartDs and virtual starts always have zero depth.
+
+ // Transfer depths, indexed by vertex index in g_orig.
+ vector<DepthMinMax> depths(num_vertices(g_orig));
+
+ for (auto v_orig : vertices_range(g_orig)) {
+ assert(contains(vmap, v_orig));
+ NFAVertex v_new = vmap[v_orig];
+
+ u32 orig_idx = g_orig[v_orig].index;
+
+ DepthMinMax &d = depths.at(orig_idx);
+
+ if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
+ // StartDs and virtual starts always have zero depth.
d = DepthMinMax(depth(0), depth(0));
- } else {
- u32 new_idx = g[v_new].index;
- d = temp_depths.at(new_idx);
- }
- }
-
- return depths;
-}
-
-bool firstMatchIsFirst(const NGHolder &p) {
- /* If the first match (by end offset) is not the first match (by start
- * offset) then we can't create a lock after it.
- *
- * Consider: 4009:/(foobar|ob).*bugger/s
- *
- * We don't care about races on the last byte as they can be resolved easily
- * at runtime /(foobar|obar).*hi/
- *
- * It should be obvious we don't care about one match being a prefix
- * of another as they share the same start offset.
- *
- * Therefore, the case were we cannot establish that the som does not
- * regress is when there exists s1 and s2 in the language of p and s2 is a
- * proper infix of s1.
- *
- * It is tempting to add the further restriction that there does not exist a
- * prefix of s1 that is in the language of p (as in which case we would
- * presume, the lock has already been set). However, we have no way of
- * knowing if the lock can be cleared by some characters, and if so, if it
- * is still set. TODO: if we knew the lock's escapes where we could verify
- * that the rest of s1 does not clear the lock. (1)
- */
-
- DEBUG_PRINTF("entry\n");
-
- /* If there are any big cycles throw up our hands in despair */
- if (hasBigCycles(p)) {
- DEBUG_PRINTF("fail, big cycles\n");
- return false;
- }
-
+ } else {
+ u32 new_idx = g[v_new].index;
+ d = temp_depths.at(new_idx);
+ }
+ }
+
+ return depths;
+}
+
+bool firstMatchIsFirst(const NGHolder &p) {
+ /* If the first match (by end offset) is not the first match (by start
+ * offset) then we can't create a lock after it.
+ *
+ * Consider: 4009:/(foobar|ob).*bugger/s
+ *
+ * We don't care about races on the last byte as they can be resolved easily
+ * at runtime /(foobar|obar).*hi/
+ *
+ * It should be obvious we don't care about one match being a prefix
+ * of another as they share the same start offset.
+ *
+ * Therefore, the case were we cannot establish that the som does not
+ * regress is when there exists s1 and s2 in the language of p and s2 is a
+ * proper infix of s1.
+ *
+ * It is tempting to add the further restriction that there does not exist a
+ * prefix of s1 that is in the language of p (as in which case we would
+ * presume, the lock has already been set). However, we have no way of
+ * knowing if the lock can be cleared by some characters, and if so, if it
+ * is still set. TODO: if we knew the lock's escapes where we could verify
+ * that the rest of s1 does not clear the lock. (1)
+ */
+
+ DEBUG_PRINTF("entry\n");
+
+ /* If there are any big cycles throw up our hands in despair */
+ if (hasBigCycles(p)) {
+ DEBUG_PRINTF("fail, big cycles\n");
+ return false;
+ }
+
flat_set<NFAVertex> states;
- /* turn on all states (except starts - avoid suffix matches) */
- /* If we were doing (1) we would also except states leading to accepts -
- avoid prefix matches */
- for (auto v : vertices_range(p)) {
- assert(!is_virtual_start(v, p));
- if (!is_special(v, p)) {
+ /* turn on all states (except starts - avoid suffix matches) */
+ /* If we were doing (1) we would also except states leading to accepts -
+ avoid prefix matches */
+ for (auto v : vertices_range(p)) {
+ assert(!is_virtual_start(v, p));
+ if (!is_special(v, p)) {
DEBUG_PRINTF("turning on %zu\n", p[v].index);
- states.insert(v);
- }
- }
-
- /* run the prefix the main graph */
- states = execute_graph(p, p, states);
-
- for (auto v : states) {
- /* need to check if this vertex may represent an infix match - ie
- * it does not have an edge to accept. */
+ states.insert(v);
+ }
+ }
+
+ /* run the prefix the main graph */
+ states = execute_graph(p, p, states);
+
+ for (auto v : states) {
+ /* need to check if this vertex may represent an infix match - ie
+ * it does not have an edge to accept. */
DEBUG_PRINTF("check %zu\n", p[v].index);
- if (!edge(v, p.accept, p).second) {
+ if (!edge(v, p.accept, p).second) {
DEBUG_PRINTF("fail %zu\n", p[v].index);
- return false;
- }
- }
-
- DEBUG_PRINTF("done first is first check\n");
- return true;
-}
-
-bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("done first is first check\n");
+ return true;
+}
+
+bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
- /* Need to ensure all matches of the graph g up to u contain no infixes
- * which are also matches of the graph to u.
- *
- * This is basically the same as firstMatchIsFirst except we g is not
- * always a dag. As we haven't gotten around to writing an execute_graph
- * that operates on general graphs, we take some (hopefully) conservative
- * short cuts.
- *
- * Note: if the u can be jumped we will take jump edges
- * into account as a possibility of som going backwards
- *
- * TODO: write a generalised ng_execute_graph/make this less hacky
- */
- assert(&g == &cache.g);
- if (contains(cache.smgb, u)) {
- return cache.smgb[u];
- }
-
+ smgb_cache &cache) {
+ /* Need to ensure all matches of the graph g up to u contain no infixes
+ * which are also matches of the graph to u.
+ *
+ * This is basically the same as firstMatchIsFirst except we g is not
+ * always a dag. As we haven't gotten around to writing an execute_graph
+ * that operates on general graphs, we take some (hopefully) conservative
+ * short cuts.
+ *
+ * Note: if the u can be jumped we will take jump edges
+ * into account as a possibility of som going backwards
+ *
+ * TODO: write a generalised ng_execute_graph/make this less hacky
+ */
+ assert(&g == &cache.g);
+ if (contains(cache.smgb, u)) {
+ return cache.smgb[u];
+ }
+
DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index);
-
- set<NFAEdge> be;
- BackEdges<set<NFAEdge>> backEdgeVisitor(be);
+
+ set<NFAEdge> be;
+ BackEdges<set<NFAEdge>> backEdgeVisitor(be);
boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
-
- bool rv;
- if (0) {
- exit:
- DEBUG_PRINTF("using cached result\n");
- cache.smgb[u] = rv;
- return rv;
- }
-
- assert(contains(region_map, u));
- const u32 u_region = region_map.at(u);
-
- for (const auto &e : be) {
- NFAVertex s = source(e, g);
- NFAVertex t = target(e, g);
- /* only need to worry about big cycles including/before u */
+
+ bool rv;
+ if (0) {
+ exit:
+ DEBUG_PRINTF("using cached result\n");
+ cache.smgb[u] = rv;
+ return rv;
+ }
+
+ assert(contains(region_map, u));
+ const u32 u_region = region_map.at(u);
+
+ for (const auto &e : be) {
+ NFAVertex s = source(e, g);
+ NFAVertex t = target(e, g);
+ /* only need to worry about big cycles including/before u */
DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index);
- if (s != t && region_map.at(s) <= u_region) {
- DEBUG_PRINTF("eek big cycle\n");
- rv = true; /* big cycle -> eek */
- goto exit;
- }
- }
-
+ if (s != t && region_map.at(s) <= u_region) {
+ DEBUG_PRINTF("eek big cycle\n");
+ rv = true; /* big cycle -> eek */
+ goto exit;
+ }
+ }
+
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- NGHolder c_g;
- cloneHolder(c_g, g, &orig_to_copy);
-
+ NGHolder c_g;
+ cloneHolder(c_g, g, &orig_to_copy);
+
/* treat virtual starts as unconditional - wire to startDs instead */
- for (NFAVertex v : vertices_range(g)) {
- if (!is_virtual_start(v, g)) {
- continue;
- }
- NFAVertex c_v = orig_to_copy[v];
- orig_to_copy[v] = c_g.startDs;
- for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
- add_edge_if_not_present(c_g.startDs, c_w, c_g);
- }
- clear_vertex(c_v, c_g);
- }
-
+ for (NFAVertex v : vertices_range(g)) {
+ if (!is_virtual_start(v, g)) {
+ continue;
+ }
+ NFAVertex c_v = orig_to_copy[v];
+ orig_to_copy[v] = c_g.startDs;
+ for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
+ add_edge_if_not_present(c_g.startDs, c_w, c_g);
+ }
+ clear_vertex(c_v, c_g);
+ }
+
/* treat u as the only accept state */
- NFAVertex c_u = orig_to_copy[u];
- clear_in_edges(c_g.acceptEod, c_g);
- add_edge(c_g.accept, c_g.acceptEod, c_g);
- clear_in_edges(c_g.accept, c_g);
- clear_out_edges(c_u, c_g);
- if (hasSelfLoop(u, g)) {
- add_edge(c_u, c_u, c_g);
- }
- add_edge(c_u, c_g.accept, c_g);
-
- set<NFAVertex> u_succ;
- insert(&u_succ, adjacent_vertices(u, g));
- u_succ.erase(u);
-
- for (auto t : inv_adjacent_vertices_range(u, g)) {
- if (t == u) {
- continue;
- }
- for (auto v : adjacent_vertices_range(t, g)) {
- if (contains(u_succ, v)) {
+ NFAVertex c_u = orig_to_copy[u];
+ clear_in_edges(c_g.acceptEod, c_g);
+ add_edge(c_g.accept, c_g.acceptEod, c_g);
+ clear_in_edges(c_g.accept, c_g);
+ clear_out_edges(c_u, c_g);
+ if (hasSelfLoop(u, g)) {
+ add_edge(c_u, c_u, c_g);
+ }
+ add_edge(c_u, c_g.accept, c_g);
+
+ set<NFAVertex> u_succ;
+ insert(&u_succ, adjacent_vertices(u, g));
+ u_succ.erase(u);
+
+ for (auto t : inv_adjacent_vertices_range(u, g)) {
+ if (t == u) {
+ continue;
+ }
+ for (auto v : adjacent_vertices_range(t, g)) {
+ if (contains(u_succ, v)) {
/* due to virtual starts being aliased with normal starts in the
* copy of the graph, we may have already added the edges. */
add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g);
- break;
- }
- }
- }
-
- pruneUseless(c_g);
-
- be.clear();
+ break;
+ }
+ }
+ }
+
+ pruneUseless(c_g);
+
+ be.clear();
boost::depth_first_search(c_g, visitor(backEdgeVisitor)
.root_vertex(c_g.start));
-
- for (const auto &e : be) {
- NFAVertex s = source(e, c_g);
- NFAVertex t = target(e, c_g);
+
+ for (const auto &e : be) {
+ NFAVertex s = source(e, c_g);
+ NFAVertex t = target(e, c_g);
DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index);
- if (s != t) {
- assert(0);
- DEBUG_PRINTF("eek big cycle\n");
- rv = true; /* big cycle -> eek */
- goto exit;
- }
- }
-
- DEBUG_PRINTF("checking acyclic+selfloop graph\n");
-
- rv = !firstMatchIsFirst(c_g);
- DEBUG_PRINTF("som may regress? %d\n", (int)rv);
- goto exit;
-}
-
-bool sentClearsTail(const NGHolder &g,
+ if (s != t) {
+ assert(0);
+ DEBUG_PRINTF("eek big cycle\n");
+ rv = true; /* big cycle -> eek */
+ goto exit;
+ }
+ }
+
+ DEBUG_PRINTF("checking acyclic+selfloop graph\n");
+
+ rv = !firstMatchIsFirst(c_g);
+ DEBUG_PRINTF("som may regress? %d\n", (int)rv);
+ goto exit;
+}
+
+bool sentClearsTail(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- const NGHolder &sent, u32 last_head_region,
- u32 *bad_region) {
- /* if a subsequent match from the prefix clears the rest of the pattern
- * we can just keep track of the last match of the prefix.
- * To see if this property holds, we could:
- *
- * 1A: turn on all states in the tail and run all strings that may
- * match the prefix past the tail, if we are still in any states then
- * this property does not hold.
- *
- * 1B: we turn on the initial states of the tail and run any strings which
- * may finish any partial matches in the prefix and see if we end up with
- * anything which would also imply that this property does not hold.
- *
- * OR
- *
- * 2: we just turn everything and run the prefix inputs past it and see what
- * we are left with. I think that is equivalent to scheme 1 and is easier to
- * implement. TODO: ponder
- *
- * Anyway, we are going with scheme 2 until further notice.
- */
-
- u32 first_bad_region = ~0U;
+ const NGHolder &sent, u32 last_head_region,
+ u32 *bad_region) {
+ /* if a subsequent match from the prefix clears the rest of the pattern
+ * we can just keep track of the last match of the prefix.
+ * To see if this property holds, we could:
+ *
+ * 1A: turn on all states in the tail and run all strings that may
+ * match the prefix past the tail, if we are still in any states then
+ * this property does not hold.
+ *
+ * 1B: we turn on the initial states of the tail and run any strings which
+ * may finish any partial matches in the prefix and see if we end up with
+ * anything which would also imply that this property does not hold.
+ *
+ * OR
+ *
+ * 2: we just turn everything and run the prefix inputs past it and see what
+ * we are left with. I think that is equivalent to scheme 1 and is easier to
+ * implement. TODO: ponder
+ *
+ * Anyway, we are going with scheme 2 until further notice.
+ */
+
+ u32 first_bad_region = ~0U;
flat_set<NFAVertex> states;
- /* turn on all states */
- DEBUG_PRINTF("region %u is cutover\n", last_head_region);
- for (auto v : vertices_range(g)) {
- if (v != g.accept && v != g.acceptEod) {
- states.insert(v);
- }
- }
-
- for (UNUSED auto v : states) {
+ /* turn on all states */
+ DEBUG_PRINTF("region %u is cutover\n", last_head_region);
+ for (auto v : vertices_range(g)) {
+ if (v != g.accept && v != g.acceptEod) {
+ states.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : states) {
DEBUG_PRINTF("start state: %zu\n", g[v].index);
- }
-
- /* run the prefix the main graph */
- states = execute_graph(g, sent, states);
-
- /* .. and check if we are left with anything in the tail region */
- for (auto v : states) {
- if (v == g.start || v == g.startDs) {
- continue; /* not in tail */
- }
-
+ }
+
+ /* run the prefix the main graph */
+ states = execute_graph(g, sent, states);
+
+ /* .. and check if we are left with anything in the tail region */
+ for (auto v : states) {
+ if (v == g.start || v == g.startDs) {
+ continue; /* not in tail */
+ }
+
DEBUG_PRINTF("v %zu is still on\n", g[v].index);
- assert(v != g.accept && v != g.acceptEod); /* no cr */
-
- assert(contains(region_map, v));
- const u32 v_region = region_map.at(v);
- if (v_region > last_head_region) {
- DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
- first_bad_region = min(first_bad_region, v_region);
- }
- }
-
- if (first_bad_region != ~0U) {
- DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-} // namespace ue2
+ assert(v != g.accept && v != g.acceptEod); /* no cr */
+
+ assert(contains(region_map, v));
+ const u32 v_region = region_map.at(v);
+ if (v_region > last_head_region) {
+ DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
+ first_bad_region = min(first_bad_region, v_region);
+ }
+ }
+
+ if (first_bad_region != ~0U) {
+ DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
index e2d38642c4..3f4fcb5b3a 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
@@ -1,84 +1,84 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions related to SOM ("Start of Match").
- */
-
-#ifndef NG_SOM_UTIL_H
-#define NG_SOM_UTIL_H
-
-#include "ng_util.h"
-#include "util/depth.h"
-
-#include <map>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions related to SOM ("Start of Match").
+ */
+
+#ifndef NG_SOM_UTIL_H
+#define NG_SOM_UTIL_H
+
+#include "ng_util.h"
+#include "util/depth.h"
+
+#include <map>
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/**
- * Returns min/max distance from start of match, index by vertex_id.
- */
-std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g);
-
-/**
- * Returns true if the first match by end-offset must always be the first match
- * by start-offset.
- */
-bool firstMatchIsFirst(const NGHolder &p);
-
-struct smgb_cache : public mbsb_cache {
- explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {}
- std::map<NFAVertex, bool> smgb;
-};
-
-bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/**
+ * Returns min/max distance from start of match, index by vertex_id.
+ */
+std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g);
+
+/**
+ * Returns true if the first match by end-offset must always be the first match
+ * by start-offset.
+ */
+bool firstMatchIsFirst(const NGHolder &p);
+
+struct smgb_cache : public mbsb_cache {
+ explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {}
+ std::map<NFAVertex, bool> smgb;
+};
+
+bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
const std::unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache);
-
-/**
- * Returns true if matching 'sent' causes all tail states in the main graph \a
- * g to go dead. A tail state is any state with a region greater than
- * \a last_head_region.
- *
- * - The graph \a sent must be a "kinda-DAG", where the only back-edges present
- * are self-loops.
- * - If the result is false, \a bad_region will be updated with the smallest
- * region ID associated with a tail state that is still on.
- */
-bool sentClearsTail(const NGHolder &g,
+ smgb_cache &cache);
+
+/**
+ * Returns true if matching 'sent' causes all tail states in the main graph \a
+ * g to go dead. A tail state is any state with a region greater than
+ * \a last_head_region.
+ *
+ * - The graph \a sent must be a "kinda-DAG", where the only back-edges present
+ * are self-loops.
+ * - If the result is false, \a bad_region will be updated with the smallest
+ * region ID associated with a tail state that is still on.
+ */
+bool sentClearsTail(const NGHolder &g,
const std::unordered_map<NFAVertex, u32> &region_map,
- const NGHolder &sent, u32 last_head_region,
- u32 *bad_region);
-
-} // namespace ue2
-
-#endif // NG_SOM_UTIL_H
+ const NGHolder &sent, u32 last_head_region,
+ u32 *bad_region);
+
+} // namespace ue2
+
+#endif // NG_SOM_UTIL_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
index 91a099fc38..73170a9104 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
@@ -1,244 +1,244 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for splitting NFAGraphs into LHS and RHS.
- */
-#include "ng_split.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void clearAccepts(NGHolder &g) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- g[v].reports.clear();
- }
-
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- g[v].reports.clear();
- }
-
- clear_in_edges(g.accept, g);
- clear_in_edges(g.acceptEod, g);
- add_edge(g.accept, g.acceptEod, g);
-}
-
-static
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for splitting NFAGraphs into LHS and RHS.
+ */
+#include "ng_split.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void clearAccepts(NGHolder &g) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ g[v].reports.clear();
+ }
+
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ g[v].reports.clear();
+ }
+
+ clear_in_edges(g.accept, g);
+ clear_in_edges(g.acceptEod, g);
+ add_edge(g.accept, g.acceptEod, g);
+}
+
+static
void filterSplitMap(const NGHolder &g,
unordered_map<NFAVertex, NFAVertex> *out_map) {
unordered_set<NFAVertex> verts;
- insert(&verts, vertices(g));
+ insert(&verts, vertices(g));
auto it = out_map->begin();
- while (it != out_map->end()) {
+ while (it != out_map->end()) {
auto jt = it;
- ++it;
- if (!contains(verts, jt->second)) {
- out_map->erase(jt);
- }
- }
-}
-
-static
-void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
+ ++it;
+ if (!contains(verts, jt->second)) {
+ out_map->erase(jt);
+ }
+ }
+}
+
+static
+void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
const vector<NFAVertex> &rhs_pivots, NGHolder *lhs,
unordered_map<NFAVertex, NFAVertex> *lhs_map) {
- assert(lhs && lhs_map);
-
- cloneHolder(*lhs, base, lhs_map);
-
- clearAccepts(*lhs);
-
- for (auto pivot : pivots) {
+ assert(lhs && lhs_map);
+
+ cloneHolder(*lhs, base, lhs_map);
+
+ clearAccepts(*lhs);
+
+ for (auto pivot : pivots) {
DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index,
- num_vertices(*lhs), lhs_map->size());
- assert(contains(*lhs_map, pivot));
-
- for (auto v : rhs_pivots) {
- assert(contains(*lhs_map, v));
- remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs);
- }
-
- (*lhs)[(*lhs_map)[pivot]].reports.insert(0);
- add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
- }
-
+ num_vertices(*lhs), lhs_map->size());
+ assert(contains(*lhs_map, pivot));
+
+ for (auto v : rhs_pivots) {
+ assert(contains(*lhs_map, v));
+ remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs);
+ }
+
+ (*lhs)[(*lhs_map)[pivot]].reports.insert(0);
+ add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
+ }
+
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*lhs, false);
renumber_edges(*lhs);
renumber_vertices(*lhs);
- filterSplitMap(*lhs, lhs_map);
-
- switch (base.kind) {
- case NFA_PREFIX:
- case NFA_OUTFIX:
- lhs->kind = NFA_PREFIX;
- break;
- case NFA_INFIX:
- case NFA_SUFFIX:
- lhs->kind = NFA_INFIX;
- break;
+ filterSplitMap(*lhs, lhs_map);
+
+ switch (base.kind) {
+ case NFA_PREFIX:
+ case NFA_OUTFIX:
+ lhs->kind = NFA_PREFIX;
+ break;
+ case NFA_INFIX:
+ case NFA_SUFFIX:
+ lhs->kind = NFA_INFIX;
+ break;
case NFA_EAGER_PREFIX:
/* Current code should not be assigning eager until well after all the
* splitting is done. */
assert(0);
lhs->kind = NFA_EAGER_PREFIX;
break;
- case NFA_REV_PREFIX:
+ case NFA_REV_PREFIX:
case NFA_OUTFIX_RAW:
- assert(0);
- break;
- }
-}
-
-void splitLHS(const NGHolder &base, NFAVertex pivot,
+ assert(0);
+ break;
+ }
+}
+
+void splitLHS(const NGHolder &base, NFAVertex pivot,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) {
- vector<NFAVertex> pivots(1, pivot);
- vector<NFAVertex> rhs_pivots;
- insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base));
- splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
-}
-
-void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
+ vector<NFAVertex> pivots(1, pivot);
+ vector<NFAVertex> rhs_pivots;
+ insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base));
+ splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
+}
+
+void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- assert(rhs && rhs_map);
-
- cloneHolder(*rhs, base, rhs_map);
-
- clear_out_edges(rhs->start, *rhs);
- clear_out_edges(rhs->startDs, *rhs);
- add_edge(rhs->start, rhs->startDs, *rhs);
- add_edge(rhs->startDs, rhs->startDs, *rhs);
-
- for (auto pivot : pivots) {
- assert(contains(*rhs_map, pivot));
+ assert(rhs && rhs_map);
+
+ cloneHolder(*rhs, base, rhs_map);
+
+ clear_out_edges(rhs->start, *rhs);
+ clear_out_edges(rhs->startDs, *rhs);
+ add_edge(rhs->start, rhs->startDs, *rhs);
+ add_edge(rhs->startDs, rhs->startDs, *rhs);
+
+ for (auto pivot : pivots) {
+ assert(contains(*rhs_map, pivot));
NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
(*rhs)[e].tops.insert(DEFAULT_TOP);
- }
+ }
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*rhs, false);
renumber_edges(*rhs);
renumber_vertices(*rhs);
- filterSplitMap(*rhs, rhs_map);
-
- switch (base.kind) {
- case NFA_PREFIX:
- case NFA_INFIX:
- rhs->kind = NFA_INFIX;
- break;
- case NFA_SUFFIX:
- case NFA_OUTFIX:
- rhs->kind = NFA_SUFFIX;
- break;
+ filterSplitMap(*rhs, rhs_map);
+
+ switch (base.kind) {
+ case NFA_PREFIX:
+ case NFA_INFIX:
+ rhs->kind = NFA_INFIX;
+ break;
+ case NFA_SUFFIX:
+ case NFA_OUTFIX:
+ rhs->kind = NFA_SUFFIX;
+ break;
case NFA_EAGER_PREFIX:
/* Current code should not be assigning eager until well after all the
* splitting is done. */
assert(0);
rhs->kind = NFA_INFIX;
break;
- case NFA_REV_PREFIX:
+ case NFA_REV_PREFIX:
case NFA_OUTFIX_RAW:
- assert(0);
- break;
- }
-}
-
-/** \brief Fills \a succ with the common successors of the vertices in \a
- * pivots. */
-static
-void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots,
- vector<NFAVertex> &succ) {
- assert(!pivots.empty());
-
+ assert(0);
+ break;
+ }
+}
+
+/** \brief Fills \a succ with the common successors of the vertices in \a
+ * pivots. */
+static
+void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots,
+ vector<NFAVertex> &succ) {
+ assert(!pivots.empty());
+
set<NFAVertex> adj;
set<NFAVertex> adj_temp;
-
- insert(&adj, adjacent_vertices(pivots.at(0), g));
-
- for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) {
- NFAVertex pivot = *it;
- adj_temp.clear();
- for (auto v : adjacent_vertices_range(pivot, g)) {
- if (contains(adj, v)) {
- adj_temp.insert(v);
- }
- }
- adj.swap(adj_temp);
- }
-
- succ.insert(succ.end(), adj.begin(), adj.end());
-}
-
-void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots,
+
+ insert(&adj, adjacent_vertices(pivots.at(0), g));
+
+ for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) {
+ NFAVertex pivot = *it;
+ adj_temp.clear();
+ for (auto v : adjacent_vertices_range(pivot, g)) {
+ if (contains(adj, v)) {
+ adj_temp.insert(v);
+ }
+ }
+ adj.swap(adj_temp);
+ }
+
+ succ.insert(succ.end(), adj.begin(), adj.end());
+}
+
+void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size());
-
- assert(!has_parallel_edge(base));
+ DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size());
+
+ assert(!has_parallel_edge(base));
assert(isCorrectlyTopped(base));
-
- /* RHS pivots are built from the common set of successors of pivots. */
- vector<NFAVertex> rhs_pivots;
- findCommonSuccessors(base, pivots, rhs_pivots);
-
- /* generate lhs */
- splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
-
- /* generate the rhs */
- splitRHS(base, rhs_pivots, rhs, rhs_map);
-
- assert(!has_parallel_edge(*lhs));
- assert(!has_parallel_edge(*rhs));
+
+ /* RHS pivots are built from the common set of successors of pivots. */
+ vector<NFAVertex> rhs_pivots;
+ findCommonSuccessors(base, pivots, rhs_pivots);
+
+ /* generate lhs */
+ splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
+
+ /* generate the rhs */
+ splitRHS(base, rhs_pivots, rhs, rhs_map);
+
+ assert(!has_parallel_edge(*lhs));
+ assert(!has_parallel_edge(*rhs));
assert(isCorrectlyTopped(*lhs));
assert(isCorrectlyTopped(*rhs));
-}
-
-void splitGraph(const NGHolder &base, NFAVertex pivot,
+}
+
+void splitGraph(const NGHolder &base, NFAVertex pivot,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- vector<NFAVertex> pivots(1, pivot);
- splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map);
-}
-
-} // namespace ue2
+ vector<NFAVertex> pivots(1, pivot);
+ splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.h b/contrib/libs/hyperscan/src/nfagraph/ng_split.h
index 9ddc033257..3867cb76f6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_split.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.h
@@ -1,76 +1,76 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for splitting NFAGraphs into LHS and RHS.
- */
-
-#ifndef NG_SPLIT_H
-#define NG_SPLIT_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for splitting NFAGraphs into LHS and RHS.
+ */
+
+#ifndef NG_SPLIT_H
+#define NG_SPLIT_H
+
#include "ng_holder.h"
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in'
- * allocated to rhs if they are reachable from the pivot. Conversely, a vertex
- * is in the lhs if it is reachable from start without going through the
- * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS.
- *
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in'
+ * allocated to rhs if they are reachable from the pivot. Conversely, a vertex
+ * is in the lhs if it is reachable from start without going through the
+ * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS.
+ *
* Note: The RHS is setup to be triggered by TOP 0
*
- * When multiple split vertices are provided:
- * - RHS contains all vertices reachable from every pivot
- * - LHS contains all vertices which are reachable from start ignoring any
- * vertices which have an edge to every pivot
- */
-void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
+ * When multiple split vertices are provided:
+ * - RHS contains all vertices reachable from every pivot
+ * - LHS contains all vertices which are reachable from start ignoring any
+ * vertices which have an edge to every pivot
+ */
+void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map,
- NGHolder *rhs,
+ NGHolder *rhs,
std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots,
- NGHolder *lhs,
+
+void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots,
+ NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map,
- NGHolder *rhs,
+ NGHolder *rhs,
std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
+
+void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map);
-
-void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots,
+
+void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots,
NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-} // namespace ue2
-
-#endif // NG_SPLIT_H
+
+} // namespace ue2
+
+#endif // NG_SPLIT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
index 03495d1441..ac788157b0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
@@ -1,324 +1,324 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph state squashing analysis.
- *
- * The basic idea behind the state squashing is that when we are in a cyclic
- * state v there are certain other states which are completely irrelevant. This
- * is used primarily by the determinisation process to produce smaller DFAs by
- * not tracking irrelevant states. It's also used by the LimEx NFA model.
- *
- * Working out which states we can ignore mainly uses the post-dominator
- * analysis.
- *
- * ### Dot Squash Masks:
- *
- * The following vertices are added to the squash mask:
- * - (1) Any vertex post-dominated by the cyclic dot state
- * - (2) Any other vertex post-dominated by the cyclic dot state's successors
- * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state -
- * provided the predecessor's successors are a subset of the cyclic state's
- * successors [For (3), the term successor also includes report information]
- *
- * (2) and (3) allow us to get squash masks from .* as well as .+
- *
- * The squash masks are not optimal especially in the case where there
- * alternations on both sides - for example in:
- *
- * /foo(bar|baz).*(abc|xyz)/s
- *
- * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators
- * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to
- * do a more complete analysis based on cutting the graph and seeing which
- * vertices are unreachable but the current approach is quick and probably
- * adequate.
- *
- *
- * ### Non-Dot Squash Masks:
- *
- * As for dot states. However, if anything in a pdom tree falls outside the
- * character range of the cyclic state the whole pdom tree is ignored. Also when
- * considering the predecessor's pdom tree it is necessary to verify that the
- * predecessor's character reachability falls within that of the cyclic state.
- *
- * We could do better in this case by not throwing away the whole pdom tree -
- * however the bits which we can keep are not clear from the pdom tree of the
- * cyclic state - it probably can be based on the dom or pdom tree of the bad
- * vertex.
- *
- * An example of us doing badly is:
- *
- * /HTTP.*Referer[^\n]*google/s
- *
- * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able
- * to squash 'Referer'.
- *
- * ### Extension:
- *
- * If a state leads solely to a squashable state (or its immediate successors)
- * with the same reachability we can make this state a squash state of any of
- * the original states squashees which we postdominate. Could probably tighten
- * this up but it would require thought. May not need to keep the original
- * squasher around but that would also require thought.
- *
- * ### SOM Notes:
- *
- * If (left) start of match is required, it is illegal to squash any state which
- * may result in an early start of match reaching the squashing state.
- */
-
-#include "config.h"
-
-#include "ng_squash.h"
-
-#include "ng_dominators.h"
-#include "ng_dump.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_region.h"
-#include "ng_som_util.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-#include "ue2common.h"
-
-#include <deque>
-#include <map>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph state squashing analysis.
+ *
+ * The basic idea behind the state squashing is that when we are in a cyclic
+ * state v there are certain other states which are completely irrelevant. This
+ * is used primarily by the determinisation process to produce smaller DFAs by
+ * not tracking irrelevant states. It's also used by the LimEx NFA model.
+ *
+ * Working out which states we can ignore mainly uses the post-dominator
+ * analysis.
+ *
+ * ### Dot Squash Masks:
+ *
+ * The following vertices are added to the squash mask:
+ * - (1) Any vertex post-dominated by the cyclic dot state
+ * - (2) Any other vertex post-dominated by the cyclic dot state's successors
+ * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state -
+ * provided the predecessor's successors are a subset of the cyclic state's
+ * successors [For (3), the term successor also includes report information]
+ *
+ * (2) and (3) allow us to get squash masks from .* as well as .+
+ *
+ * The squash masks are not optimal especially in the case where there
+ * alternations on both sides - for example in:
+ *
+ * /foo(bar|baz).*(abc|xyz)/s
+ *
+ * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators
+ * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to
+ * do a more complete analysis based on cutting the graph and seeing which
+ * vertices are unreachable but the current approach is quick and probably
+ * adequate.
+ *
+ *
+ * ### Non-Dot Squash Masks:
+ *
+ * As for dot states. However, if anything in a pdom tree falls outside the
+ * character range of the cyclic state the whole pdom tree is ignored. Also when
+ * considering the predecessor's pdom tree it is necessary to verify that the
+ * predecessor's character reachability falls within that of the cyclic state.
+ *
+ * We could do better in this case by not throwing away the whole pdom tree -
+ * however the bits which we can keep are not clear from the pdom tree of the
+ * cyclic state - it probably can be based on the dom or pdom tree of the bad
+ * vertex.
+ *
+ * An example of us doing badly is:
+ *
+ * /HTTP.*Referer[^\n]*google/s
+ *
+ * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able
+ * to squash 'Referer'.
+ *
+ * ### Extension:
+ *
+ * If a state leads solely to a squashable state (or its immediate successors)
+ * with the same reachability we can make this state a squash state of any of
+ * the original states squashees which we postdominate. Could probably tighten
+ * this up but it would require thought. May not need to keep the original
+ * squasher around but that would also require thought.
+ *
+ * ### SOM Notes:
+ *
+ * If (left) start of match is required, it is illegal to squash any state which
+ * may result in an early start of match reaching the squashing state.
+ */
+
+#include "config.h"
+
+#include "ng_squash.h"
+
+#include "ng_dominators.h"
+#include "ng_dump.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_region.h"
+#include "ng_som_util.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+#include "ue2common.h"
+
+#include <deque>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>;
-
-static
+
+static
PostDomTree buildPDomTree(const NGHolder &g) {
PostDomTree tree;
tree.reserve(num_vertices(g));
-
+
auto postdominators = findPostDominators(g);
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- NFAVertex pdom = postdominators[v];
- if (pdom) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ NFAVertex pdom = postdominators[v];
+ if (pdom) {
DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index);
- tree[pdom].insert(v);
- }
- }
+ tree[pdom].insert(v);
+ }
+ }
return tree;
-}
-
-/**
- * Builds a squash mask based on the pdom tree of v and the given char reach.
- * The built squash mask is a bit conservative for non-dot cases and could
- * be improved with a bit of thought.
- */
-static
-void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
- const CharReach &cr, const NFAStateSet &init,
- const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
- som_type som, const vector<DepthMinMax> &som_depths,
+}
+
+/**
+ * Builds a squash mask based on the pdom tree of v and the given char reach.
+ * The built squash mask is a bit conservative for non-dot cases and could
+ * be improved with a bit of thought.
+ */
+static
+void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
+ const CharReach &cr, const NFAStateSet &init,
+ const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
+ som_type som, const vector<DepthMinMax> &som_depths,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
+ smgb_cache &cache) {
DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index);
-
- vector<NFAVertex> q;
-
+
+ vector<NFAVertex> q;
+
auto it = tree.find(v);
- if (it != tree.end()) {
- q.insert(q.end(), it->second.begin(), it->second.end());
- }
-
- const u32 v_index = g[v].index;
-
- while (!q.empty()) {
- NFAVertex u = q.back();
- q.pop_back();
- const CharReach &cru = g[u].char_reach;
-
- if ((cru & ~cr).any()) {
- /* bail: bad cr on vertex u */
- /* TODO: this could be better
- *
- * we still need to ensure that we record any paths leading to u.
- * Hence all vertices R which can reach u must be excluded from the
- * squash mask. Note: R != pdom(u) and there may exist an x in (R -
- * pdom(u)) which is in pdom(y) where y is in q. Clear ?
- */
- mask.set();
- return;
- }
-
- const u32 u_index = g[u].index;
-
- if (som) {
- /* We cannot add a state u to the squash mask of v if it may have an
- * earlier start of match offset. ie for us to add a state u to v
- * maxSomDist(u) <= minSomDist(v)
- */
- const depth &max_som_dist_u = som_depths[u_index].max;
- const depth &min_som_dist_v = som_depths[v_index].min;
-
- if (max_som_dist_u.is_infinite()) {
- /* it is hard to tell due to the INF if u can actually store an
- * earlier SOM than w (state we are building the squash mask
- * for) - need to think more deeply
- */
-
- if (mustBeSetBefore(u, v, g, cache)
- && !somMayGoBackwards(u, g, region_map, cache)) {
- DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
- goto squash_ok;
- }
- }
-
- if (max_som_dist_u > min_som_dist_v) {
- /* u can't be squashed as it may be storing an earlier SOM */
- goto add_children_to_queue;
- }
-
- }
-
- squash_ok:
- mask.set(u_index);
- DEBUG_PRINTF("pdom'ed %u\n", u_index);
- add_children_to_queue:
- it = tree.find(u);
- if (it != tree.end()) {
- q.insert(q.end(), it->second.begin(), it->second.end());
- }
- }
-
- if (cr.all()) {
- /* the init states aren't in the pdom tree. If all their succ states
- * are set (or v), we can consider them post dominated */
-
- /* Note: init states will always result in a later som */
- for (size_t i = init.find_first(); i != init.npos;
- i = init.find_next(i)) {
- /* Yes vacuous patterns do exist */
- NFAVertex iv = vByIndex[i];
- for (auto w : adjacent_vertices_range(iv, g)) {
- if (w == g.accept || w == g.acceptEod) {
- DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
- goto next_init_state;
- }
-
- u32 vert_id = g[w].index;
- if (w != iv && w != v && !mask.test(vert_id)) {
- DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
- goto next_init_state;
- }
- }
- DEBUG_PRINTF("pdom'ed %zu\n", i);
- mask.set(i);
- next_init_state:;
- }
- }
-
- mask.flip();
-}
-
-static
-void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g)) {
- succ.set(g[w].index);
- }
- }
-}
-
-static
-void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g)) {
- pred.set(g[u].index);
- }
- }
-}
-
-static
-void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
- const PostDomTree &pdom_tree, const NFAStateSet &init,
+ if (it != tree.end()) {
+ q.insert(q.end(), it->second.begin(), it->second.end());
+ }
+
+ const u32 v_index = g[v].index;
+
+ while (!q.empty()) {
+ NFAVertex u = q.back();
+ q.pop_back();
+ const CharReach &cru = g[u].char_reach;
+
+ if ((cru & ~cr).any()) {
+ /* bail: bad cr on vertex u */
+ /* TODO: this could be better
+ *
+ * we still need to ensure that we record any paths leading to u.
+ * Hence all vertices R which can reach u must be excluded from the
+ * squash mask. Note: R != pdom(u) and there may exist an x in (R -
+ * pdom(u)) which is in pdom(y) where y is in q. Clear ?
+ */
+ mask.set();
+ return;
+ }
+
+ const u32 u_index = g[u].index;
+
+ if (som) {
+ /* We cannot add a state u to the squash mask of v if it may have an
+ * earlier start of match offset. ie for us to add a state u to v
+ * maxSomDist(u) <= minSomDist(v)
+ */
+ const depth &max_som_dist_u = som_depths[u_index].max;
+ const depth &min_som_dist_v = som_depths[v_index].min;
+
+ if (max_som_dist_u.is_infinite()) {
+ /* it is hard to tell due to the INF if u can actually store an
+ * earlier SOM than w (state we are building the squash mask
+ * for) - need to think more deeply
+ */
+
+ if (mustBeSetBefore(u, v, g, cache)
+ && !somMayGoBackwards(u, g, region_map, cache)) {
+ DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
+ goto squash_ok;
+ }
+ }
+
+ if (max_som_dist_u > min_som_dist_v) {
+ /* u can't be squashed as it may be storing an earlier SOM */
+ goto add_children_to_queue;
+ }
+
+ }
+
+ squash_ok:
+ mask.set(u_index);
+ DEBUG_PRINTF("pdom'ed %u\n", u_index);
+ add_children_to_queue:
+ it = tree.find(u);
+ if (it != tree.end()) {
+ q.insert(q.end(), it->second.begin(), it->second.end());
+ }
+ }
+
+ if (cr.all()) {
+ /* the init states aren't in the pdom tree. If all their succ states
+ * are set (or v), we can consider them post dominated */
+
+ /* Note: init states will always result in a later som */
+ for (size_t i = init.find_first(); i != init.npos;
+ i = init.find_next(i)) {
+ /* Yes vacuous patterns do exist */
+ NFAVertex iv = vByIndex[i];
+ for (auto w : adjacent_vertices_range(iv, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
+ goto next_init_state;
+ }
+
+ u32 vert_id = g[w].index;
+ if (w != iv && w != v && !mask.test(vert_id)) {
+ DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
+ goto next_init_state;
+ }
+ }
+ DEBUG_PRINTF("pdom'ed %zu\n", i);
+ mask.set(i);
+ next_init_state:;
+ }
+ }
+
+ mask.flip();
+}
+
+static
+void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g)) {
+ succ.set(g[w].index);
+ }
+ }
+}
+
+static
+void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g)) {
+ pred.set(g[u].index);
+ }
+ }
+}
+
+static
+void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
+ const PostDomTree &pdom_tree, const NFAStateSet &init,
unordered_map<NFAVertex, NFAStateSet> *squash,
som_type som, const vector<DepthMinMax> &som_depths,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
- deque<NFAVertex> remaining;
- for (const auto &m : *squash) {
- remaining.push_back(m.first);
- }
-
- while (!remaining.empty()) {
- NFAVertex v = remaining.back();
- remaining.pop_back();
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
- if (g[v].char_reach != g[u].char_reach) {
- continue;
- }
-
- if (out_degree(u, g) != 1) {
- continue;
- }
-
- NFAStateSet u_squash(init.size());
+ smgb_cache &cache) {
+ deque<NFAVertex> remaining;
+ for (const auto &m : *squash) {
+ remaining.push_back(m.first);
+ }
+
+ while (!remaining.empty()) {
+ NFAVertex v = remaining.back();
+ remaining.pop_back();
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
+ if (g[v].char_reach != g[u].char_reach) {
+ continue;
+ }
+
+ if (out_degree(u, g) != 1) {
+ continue;
+ }
+
+ NFAStateSet u_squash(init.size());
size_t u_index = g[u].index;
-
- buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
- pdom_tree, som, som_depths, region_map, cache);
-
- u_squash.set(u_index); /* never clear ourselves */
-
- if ((~u_squash).any()) { // i.e. some bits unset in mask
+
+ buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
+ pdom_tree, som, som_depths, region_map, cache);
+
+ u_squash.set(u_index); /* never clear ourselves */
+
+ if ((~u_squash).any()) { // i.e. some bits unset in mask
DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index,
- g[v].index);
- (*squash)[u] = u_squash;
- remaining.push_back(u);
- }
- }
- }
-}
-
+ g[v].index);
+ (*squash)[u] = u_squash;
+ remaining.push_back(u);
+ }
+ }
+ }
+}
+
/* If there are redundant states in the graph, it may be possible for two
* sibling .* states to try to squash each other -- which should be prevented.
*
@@ -330,7 +330,7 @@ void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
for (auto it = squash.begin(); it != squash.end();) {
NFAVertex a = it->first;
u32 a_index = g[a].index;
-
+
NFAStateSet a_squash = ~it->second; /* default is mask of survivors */
for (auto b_index = a_squash.find_first(); b_index != a_squash.npos;
b_index = a_squash.find_next(b_index)) {
@@ -365,336 +365,336 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
som_type som) {
unordered_map<NFAVertex, NFAStateSet> squash;
- // Number of bits to use for all our masks. If we're a triggered graph,
- // tops have already been assigned, so we don't have to account for them.
- const u32 numStates = num_vertices(g);
-
- // Build post-dominator tree.
+ // Number of bits to use for all our masks. If we're a triggered graph,
+ // tops have already been assigned, so we don't have to account for them.
+ const u32 numStates = num_vertices(g);
+
+ // Build post-dominator tree.
auto pdom_tree = buildPDomTree(g);
-
- // Build list of vertices by state ID and a set of init states.
+
+ // Build list of vertices by state ID and a set of init states.
vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex());
- NFAStateSet initStates(numStates);
- smgb_cache cache(g);
-
- // Mappings used for SOM mode calculations, otherwise left empty.
- unordered_map<NFAVertex, u32> region_map;
- vector<DepthMinMax> som_depths;
- if (som) {
- region_map = assignRegions(g);
- som_depths = getDistancesFromSOM(g);
- }
-
- for (auto v : vertices_range(g)) {
- const u32 vert_id = g[v].index;
- DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
- assert(vert_id < numStates);
- vByIndex[vert_id] = v;
-
- if (is_any_start(v, g) || !in_degree(v, g)) {
- initStates.set(vert_id);
- }
- }
-
- for (u32 i = 0; i < numStates; i++) {
- NFAVertex v = vByIndex[i];
+ NFAStateSet initStates(numStates);
+ smgb_cache cache(g);
+
+ // Mappings used for SOM mode calculations, otherwise left empty.
+ unordered_map<NFAVertex, u32> region_map;
+ vector<DepthMinMax> som_depths;
+ if (som) {
+ region_map = assignRegions(g);
+ som_depths = getDistancesFromSOM(g);
+ }
+
+ for (auto v : vertices_range(g)) {
+ const u32 vert_id = g[v].index;
+ DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
+ assert(vert_id < numStates);
+ vByIndex[vert_id] = v;
+
+ if (is_any_start(v, g) || !in_degree(v, g)) {
+ initStates.set(vert_id);
+ }
+ }
+
+ for (u32 i = 0; i < numStates; i++) {
+ NFAVertex v = vByIndex[i];
assert(v != NGHolder::null_vertex());
- const CharReach &cr = g[v].char_reach;
-
- /* only non-init cyclics can be squashers */
- if (!hasSelfLoop(v, g) || initStates.test(i)) {
- continue;
- }
-
- DEBUG_PRINTF("state %u is cyclic\n", i);
-
- NFAStateSet mask(numStates), succ(numStates), pred(numStates);
- buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
- som_depths, region_map, cache);
- buildSucc(succ, g, v);
- buildPred(pred, g, v);
- const auto &reports = g[v].reports;
-
- for (size_t j = succ.find_first(); j != succ.npos;
- j = succ.find_next(j)) {
- NFAVertex vj = vByIndex[j];
- NFAStateSet pred2(numStates);
- buildPred(pred2, g, vj);
- if (pred2 == pred) {
- DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
- NFAStateSet tmp(numStates);
- buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
- som, som_depths, region_map, cache);
- mask &= tmp;
- }
- }
-
- for (size_t j = pred.find_first(); j != pred.npos;
- j = pred.find_next(j)) {
- NFAVertex vj = vByIndex[j];
- NFAStateSet succ2(numStates);
- buildSucc(succ2, g, vj);
- /* we can use j as a basis for squashing if its succs are a subset
- * of ours */
- if ((succ2 & ~succ).any()) {
- continue;
- }
-
- if (som) {
- /* We cannot use j to add to the squash mask of v if it may
- * have an earlier start of match offset. ie for us j as a
- * basis for the squash mask of v we require:
- * maxSomDist(j) <= minSomDist(v)
- */
-
- /* ** TODO ** */
-
- const depth &max_som_dist_j =
- som_depths[g[vj].index].max;
- const depth &min_som_dist_v =
- som_depths[g[v].index].min;
- if (max_som_dist_j > min_som_dist_v ||
- max_som_dist_j.is_infinite()) {
- /* j can't be used as it may be storing an earlier SOM */
- continue;
- }
- }
-
- const CharReach &crv = g[vj].char_reach;
-
- /* we also require that j's report information be a subset of ours
- */
- bool seen_special = false;
- for (auto w : adjacent_vertices_range(vj, g)) {
- if (is_special(w, g)) {
- if (!edge(v, w, g).second) {
- goto next_j;
- }
- seen_special = true;
- }
- }
-
- // FIXME: should be subset check?
- if (seen_special && g[vj].reports != reports) {
- continue;
- }
-
- /* ok we can use j */
- if ((crv & ~cr).none()) {
- NFAStateSet tmp(numStates);
- buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
- som, som_depths, region_map, cache);
- mask &= tmp;
- mask.reset(j);
- }
-
- next_j:;
- }
-
- mask.set(i); /* never clear ourselves */
-
- if ((~mask).any()) { // i.e. some bits unset in mask
- DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
- squash.emplace(v, mask);
- }
- }
-
- findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
- som_depths, region_map, cache);
-
+ const CharReach &cr = g[v].char_reach;
+
+ /* only non-init cyclics can be squashers */
+ if (!hasSelfLoop(v, g) || initStates.test(i)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("state %u is cyclic\n", i);
+
+ NFAStateSet mask(numStates), succ(numStates), pred(numStates);
+ buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
+ som_depths, region_map, cache);
+ buildSucc(succ, g, v);
+ buildPred(pred, g, v);
+ const auto &reports = g[v].reports;
+
+ for (size_t j = succ.find_first(); j != succ.npos;
+ j = succ.find_next(j)) {
+ NFAVertex vj = vByIndex[j];
+ NFAStateSet pred2(numStates);
+ buildPred(pred2, g, vj);
+ if (pred2 == pred) {
+ DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
+ NFAStateSet tmp(numStates);
+ buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
+ som, som_depths, region_map, cache);
+ mask &= tmp;
+ }
+ }
+
+ for (size_t j = pred.find_first(); j != pred.npos;
+ j = pred.find_next(j)) {
+ NFAVertex vj = vByIndex[j];
+ NFAStateSet succ2(numStates);
+ buildSucc(succ2, g, vj);
+ /* we can use j as a basis for squashing if its succs are a subset
+ * of ours */
+ if ((succ2 & ~succ).any()) {
+ continue;
+ }
+
+ if (som) {
+ /* We cannot use j to add to the squash mask of v if it may
+ * have an earlier start of match offset. ie for us j as a
+ * basis for the squash mask of v we require:
+ * maxSomDist(j) <= minSomDist(v)
+ */
+
+ /* ** TODO ** */
+
+ const depth &max_som_dist_j =
+ som_depths[g[vj].index].max;
+ const depth &min_som_dist_v =
+ som_depths[g[v].index].min;
+ if (max_som_dist_j > min_som_dist_v ||
+ max_som_dist_j.is_infinite()) {
+ /* j can't be used as it may be storing an earlier SOM */
+ continue;
+ }
+ }
+
+ const CharReach &crv = g[vj].char_reach;
+
+ /* we also require that j's report information be a subset of ours
+ */
+ bool seen_special = false;
+ for (auto w : adjacent_vertices_range(vj, g)) {
+ if (is_special(w, g)) {
+ if (!edge(v, w, g).second) {
+ goto next_j;
+ }
+ seen_special = true;
+ }
+ }
+
+ // FIXME: should be subset check?
+ if (seen_special && g[vj].reports != reports) {
+ continue;
+ }
+
+ /* ok we can use j */
+ if ((crv & ~cr).none()) {
+ NFAStateSet tmp(numStates);
+ buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
+ som, som_depths, region_map, cache);
+ mask &= tmp;
+ mask.reset(j);
+ }
+
+ next_j:;
+ }
+
+ mask.set(i); /* never clear ourselves */
+
+ if ((~mask).any()) { // i.e. some bits unset in mask
+ DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
+ squash.emplace(v, mask);
+ }
+ }
+
+ findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
+ som_depths, region_map, cache);
+
clearMutualSquashers(g, vByIndex, squash);
- return squash;
-}
-
-#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */
-
-/** Some squash states are clearly not advantageous in the NFA, as they do
- * incur the cost of an exception:
- * -# acyclic states
- * -# squash only a few acyclic states
- */
-void filterSquashers(const NGHolder &g,
+ return squash;
+}
+
+#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */
+
+/** Some squash states are clearly not advantageous in the NFA, as they do
+ * incur the cost of an exception:
+ * -# acyclic states
+ * -# squash only a few acyclic states
+ */
+void filterSquashers(const NGHolder &g,
unordered_map<NFAVertex, NFAStateSet> &squash) {
assert(hasCorrectlyNumberedVertices(g));
- DEBUG_PRINTF("filtering\n");
+ DEBUG_PRINTF("filtering\n");
vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */
- for (auto v : vertices_range(g)) {
- rev[g[v].index] = v;
- }
-
- for (auto v : vertices_range(g)) {
- if (!contains(squash, v)) {
- continue;
- }
+ for (auto v : vertices_range(g)) {
+ rev[g[v].index] = v;
+ }
+
+ for (auto v : vertices_range(g)) {
+ if (!contains(squash, v)) {
+ continue;
+ }
DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index);
-
- if (!hasSelfLoop(v, g)) {
- DEBUG_PRINTF("acyclic\n");
- squash.erase(v);
- continue;
- }
-
- NFAStateSet squashed = squash[v];
- squashed.flip(); /* default sense for mask of survivors */
+
+ if (!hasSelfLoop(v, g)) {
+ DEBUG_PRINTF("acyclic\n");
+ squash.erase(v);
+ continue;
+ }
+
+ NFAStateSet squashed = squash[v];
+ squashed.flip(); /* default sense for mask of survivors */
for (auto sq = squashed.find_first(); sq != squashed.npos;
sq = squashed.find_next(sq)) {
- NFAVertex u = rev[sq];
- if (hasSelfLoop(u, g)) {
- DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
- goto next_vertex;
- }
- }
-
- if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
- DEBUG_PRINTF("squash set too small\n");
- squash.erase(v);
- continue;
- }
-
- next_vertex:;
- DEBUG_PRINTF("squash set ok\n");
- }
-}
-
-static
-void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
- const ReportManager &rm,
- set<NFAVertex> &verts) {
- for (auto v : inv_adjacent_vertices_range(accept, g)) {
- if (v == g.accept) {
- continue;
- }
-
- const auto &reports = g[v].reports;
- if (reports.empty()) {
- assert(0);
- continue;
- }
-
- // Must be _all_ highlander callback reports.
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
- goto next_vertex;
- }
-
- // If there's any bounds, these are handled outside the NFA and
- // probably shouldn't be pre-empted.
- if (ir.hasBounds()) {
- goto next_vertex;
- }
- }
-
- verts.insert(v);
- next_vertex:
- continue;
- }
-}
-
-static
-void removeEdgesToAccept(NGHolder &g, NFAVertex v) {
- const auto &reports = g[v].reports;
- assert(!reports.empty());
-
- // We remove any accept edge with a non-empty subset of the reports of v.
-
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- const auto &r = g[u].reports;
- if (!r.empty() && is_subset_of(r, reports)) {
+ NFAVertex u = rev[sq];
+ if (hasSelfLoop(u, g)) {
+ DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
+ goto next_vertex;
+ }
+ }
+
+ if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
+ DEBUG_PRINTF("squash set too small\n");
+ squash.erase(v);
+ continue;
+ }
+
+ next_vertex:;
+ DEBUG_PRINTF("squash set ok\n");
+ }
+}
+
+static
+void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
+ const ReportManager &rm,
+ set<NFAVertex> &verts) {
+ for (auto v : inv_adjacent_vertices_range(accept, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ const auto &reports = g[v].reports;
+ if (reports.empty()) {
+ assert(0);
+ continue;
+ }
+
+ // Must be _all_ highlander callback reports.
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
+ goto next_vertex;
+ }
+
+ // If there's any bounds, these are handled outside the NFA and
+ // probably shouldn't be pre-empted.
+ if (ir.hasBounds()) {
+ goto next_vertex;
+ }
+ }
+
+ verts.insert(v);
+ next_vertex:
+ continue;
+ }
+}
+
+static
+void removeEdgesToAccept(NGHolder &g, NFAVertex v) {
+ const auto &reports = g[v].reports;
+ assert(!reports.empty());
+
+ // We remove any accept edge with a non-empty subset of the reports of v.
+
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ const auto &r = g[u].reports;
+ if (!r.empty() && is_subset_of(r, reports)) {
DEBUG_PRINTF("vertex %zu\n", g[u].index);
- dead.insert(e);
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- const auto &r = g[u].reports;
- if (!r.empty() && is_subset_of(r, reports)) {
+ dead.insert(e);
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ const auto &r = g[u].reports;
+ if (!r.empty() && is_subset_of(r, reports)) {
DEBUG_PRINTF("vertex %zu\n", g[u].index);
- dead.insert(e);
- }
- }
-
- assert(!dead.empty());
- remove_edges(dead, g);
-}
-
-static
-vector<NFAVertex> findUnreachable(const NGHolder &g) {
+ dead.insert(e);
+ }
+ }
+
+ assert(!dead.empty());
+ remove_edges(dead, g);
+}
+
+static
+vector<NFAVertex> findUnreachable(const NGHolder &g) {
const boost::reverse_graph<NGHolder, const NGHolder &> revg(g);
-
+
unordered_map<NFAVertex, boost::default_color_type> colours;
- colours.reserve(num_vertices(g));
-
- depth_first_visit(revg, g.acceptEod,
- make_dfs_visitor(boost::null_visitor()),
- make_assoc_property_map(colours));
-
- // Unreachable vertices are not in the colour map.
- vector<NFAVertex> unreach;
- for (auto v : vertices_range(revg)) {
- if (!contains(colours, v)) {
+ colours.reserve(num_vertices(g));
+
+ depth_first_visit(revg, g.acceptEod,
+ make_dfs_visitor(boost::null_visitor()),
+ make_assoc_property_map(colours));
+
+ // Unreachable vertices are not in the colour map.
+ vector<NFAVertex> unreach;
+ for (auto v : vertices_range(revg)) {
+ if (!contains(colours, v)) {
unreach.push_back(NFAVertex(v));
- }
- }
- return unreach;
-}
-
-/** Populates squash masks for states that can be switched off by highlander
- * (single match) reporters. */
+ }
+ }
+ return unreach;
+}
+
+/** Populates squash masks for states that can be switched off by highlander
+ * (single match) reporters. */
unordered_map<NFAVertex, NFAStateSet>
-findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
+findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
unordered_map<NFAVertex, NFAStateSet> squash;
-
- set<NFAVertex> verts;
- getHighlanderReporters(g, g.accept, rm, verts);
- getHighlanderReporters(g, g.acceptEod, rm, verts);
- if (verts.empty()) {
- DEBUG_PRINTF("no highlander reports\n");
- return squash;
- }
-
- const u32 numStates = num_vertices(g);
-
- for (auto v : verts) {
+
+ set<NFAVertex> verts;
+ getHighlanderReporters(g, g.accept, rm, verts);
+ getHighlanderReporters(g, g.acceptEod, rm, verts);
+ if (verts.empty()) {
+ DEBUG_PRINTF("no highlander reports\n");
+ return squash;
+ }
+
+ const u32 numStates = num_vertices(g);
+
+ for (auto v : verts) {
DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index,
- g[v].reports.size());
-
- // Find the set of vertices that lead to v or any other reporter with a
- // subset of v's reports. We do this by creating a copy of the graph,
- // cutting the appropriate out-edges to accept and seeing which
- // vertices become unreachable.
-
+ g[v].reports.size());
+
+ // Find the set of vertices that lead to v or any other reporter with a
+ // subset of v's reports. We do this by creating a copy of the graph,
+ // cutting the appropriate out-edges to accept and seeing which
+ // vertices become unreachable.
+
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- NGHolder h;
- cloneHolder(h, g, &orig_to_copy);
- removeEdgesToAccept(h, orig_to_copy[v]);
-
- vector<NFAVertex> unreach = findUnreachable(h);
- DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
- if (unreach.empty()) {
- continue;
- }
-
- if (!contains(squash, v)) {
- squash[v] = NFAStateSet(numStates);
- squash[v].set();
- }
-
- NFAStateSet &mask = squash[v];
-
- for (auto uv : unreach) {
+ NGHolder h;
+ cloneHolder(h, g, &orig_to_copy);
+ removeEdgesToAccept(h, orig_to_copy[v]);
+
+ vector<NFAVertex> unreach = findUnreachable(h);
+ DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
+ if (unreach.empty()) {
+ continue;
+ }
+
+ if (!contains(squash, v)) {
+ squash[v] = NFAStateSet(numStates);
+ squash[v].set();
+ }
+
+ NFAStateSet &mask = squash[v];
+
+ for (auto uv : unreach) {
DEBUG_PRINTF("squashes index %zu\n", h[uv].index);
- mask.reset(h[uv].index);
- }
- }
-
- return squash;
-}
-
-} // namespace ue2
+ mask.reset(h[uv].index);
+ }
+ }
+
+ return squash;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
index 489f541e84..16510ddd3a 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
@@ -1,72 +1,72 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph state squashing analysis.
- */
-#ifndef NG_SQUASH_H
-#define NG_SQUASH_H
-
-#include "ng_holder.h"
-#include "som/som.h"
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph state squashing analysis.
+ */
+#ifndef NG_SQUASH_H
+#define NG_SQUASH_H
+
+#include "ng_holder.h"
+#include "som/som.h"
+#include "ue2common.h"
+
#include <unordered_map>
-#include <boost/dynamic_bitset.hpp>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
+#include <boost/dynamic_bitset.hpp>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
/**
* Dynamically-sized bitset, as an NFA can have an arbitrary number of states.
*/
using NFAStateSet = boost::dynamic_bitset<>;
-
-/**
- * Populates the squash mask for each vertex (i.e. the set of states to be left
- * on during squashing).
- *
- * The NFAStateSet in the output map is indexed by vertex_index.
- */
+
+/**
+ * Populates the squash mask for each vertex (i.e. the set of states to be left
+ * on during squashing).
+ *
+ * The NFAStateSet in the output map is indexed by vertex_index.
+ */
std::unordered_map<NFAVertex, NFAStateSet>
findSquashers(const NGHolder &g, som_type som = SOM_NONE);
-
-/** Filters out squash states intended only for use in DFA construction. */
-void filterSquashers(const NGHolder &g,
+
+/** Filters out squash states intended only for use in DFA construction. */
+void filterSquashers(const NGHolder &g,
std::unordered_map<NFAVertex, NFAStateSet> &squash);
-
-/** Populates squash masks for states that can be switched off by highlander
- * (single match) reporters. */
+
+/** Populates squash masks for states that can be switched off by highlander
+ * (single match) reporters. */
std::unordered_map<NFAVertex, NFAStateSet>
-findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_SQUASH_H
+findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
+
+} // namespace ue2
+
+#endif // NG_SQUASH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
index 5e627bb593..446c2ba317 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
@@ -1,193 +1,193 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Stop Alphabet calculation.
- */
-#include "ng_stop.h"
-
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "som/som.h"
-#include "util/charreach.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Stop alphabet depth threshold. */
-static const u32 MAX_STOP_DEPTH = 8;
-
-namespace {
-
-/** Depths from start, startDs for this graph. */
-struct InitDepths {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Stop Alphabet calculation.
+ */
+#include "ng_stop.h"
+
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "som/som.h"
+#include "util/charreach.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/verify_types.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Stop alphabet depth threshold. */
+static const u32 MAX_STOP_DEPTH = 8;
+
+namespace {
+
+/** Depths from start, startDs for this graph. */
+struct InitDepths {
explicit InitDepths(const NGHolder &g)
: start(calcDepthsFrom(g, g.start)),
startDs(calcDepthsFrom(g, g.startDs)) {}
-
- depth maxDist(const NGHolder &g, NFAVertex v) const {
- u32 idx = g[v].index;
- assert(idx < start.size() && idx < startDs.size());
- const depth &d_start = start.at(idx).max;
- const depth &d_startDs = startDs.at(idx).max;
- if (d_start.is_unreachable()) {
- return d_startDs;
- } else if (d_startDs.is_unreachable()) {
- return d_start;
- }
- return max(d_start, d_startDs);
- }
-
-private:
- vector<DepthMinMax> start;
- vector<DepthMinMax> startDs;
-};
-
-} // namespace
-
-/** Find the set of characters that are not present in the reachability of
- * graph \p g after a certain depth (currently 8). If a character in this set
- * is encountered, it means that the NFA is either dead or has not progressed
+
+ depth maxDist(const NGHolder &g, NFAVertex v) const {
+ u32 idx = g[v].index;
+ assert(idx < start.size() && idx < startDs.size());
+ const depth &d_start = start.at(idx).max;
+ const depth &d_startDs = startDs.at(idx).max;
+ if (d_start.is_unreachable()) {
+ return d_startDs;
+ } else if (d_startDs.is_unreachable()) {
+ return d_start;
+ }
+ return max(d_start, d_startDs);
+ }
+
+private:
+ vector<DepthMinMax> start;
+ vector<DepthMinMax> startDs;
+};
+
+} // namespace
+
+/** Find the set of characters that are not present in the reachability of
+ * graph \p g after a certain depth (currently 8). If a character in this set
+ * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
-CharReach findStopAlphabet(const NGHolder &g, som_type som) {
- const depth max_depth(MAX_STOP_DEPTH);
- const InitDepths depths(g);
- const map<NFAVertex, BoundedRepeatSummary> no_vertices;
-
- CharReach stopcr;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- if (depths.maxDist(g, v) >= max_depth) {
- if (som == SOM_NONE) {
- stopcr |= reduced_cr(v, g, no_vertices);
- } else {
- stopcr |= g[v].char_reach;
- }
- }
- }
-
- // Turn alphabet into stops.
- stopcr.flip();
-
- return stopcr;
-}
-
-/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
- * build an eight-bit mask per character C, with each bit representing the
- * depth before the location of character C (if encountered) that the NFA would
- * be in a predictable start state. */
-vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
- const depth max_depth(MAX_STOP_DEPTH);
- const InitDepths depths(g);
- const map<NFAVertex, BoundedRepeatSummary> no_vertices;
-
- vector<CharReach> reach(MAX_STOP_DEPTH);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- CharReach v_cr;
- if (som == SOM_NONE) {
- v_cr = reduced_cr(v, g, no_vertices);
- } else {
- v_cr = g[v].char_reach;
- }
-
- u32 d = min(max_depth, depths.maxDist(g, v));
- for (u32 i = 0; i < d; i++) {
- reach[i] |= v_cr;
- }
- }
-
-#ifdef DEBUG
- for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
- DEBUG_PRINTF("depth %u, stop chars: ", i);
- describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
- printf("\n");
- }
-#endif
-
- vector<u8> stop(N_CHARS, 0);
-
- for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
- CharReach cr = ~reach[i]; // invert reach for stop chars.
- const u8 mask = 1U << i;
- for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
- stop[c] |= mask;
- }
- }
-
- return stop;
-}
-
-vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
- UNUSED som_type som) {
- const depth max_width = findMaxWidth(castle);
- DEBUG_PRINTF("castle has reach %s and max width %s\n",
- describeClass(castle.reach()).c_str(),
- max_width.str().c_str());
-
- const CharReach escape = ~castle.reach(); // invert reach for stop chars.
-
- u32 d = min(max_width, depth(MAX_STOP_DEPTH));
- const u8 mask = verify_u8((1U << d) - 1);
-
- vector<u8> stop(N_CHARS, 0);
-
- for (size_t c = escape.find_first(); c != escape.npos;
- c = escape.find_next(c)) {
- stop[c] |= mask;
- }
-
- return stop;
-}
-
-} // namespace ue2
+CharReach findStopAlphabet(const NGHolder &g, som_type som) {
+ const depth max_depth(MAX_STOP_DEPTH);
+ const InitDepths depths(g);
+ const map<NFAVertex, BoundedRepeatSummary> no_vertices;
+
+ CharReach stopcr;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (depths.maxDist(g, v) >= max_depth) {
+ if (som == SOM_NONE) {
+ stopcr |= reduced_cr(v, g, no_vertices);
+ } else {
+ stopcr |= g[v].char_reach;
+ }
+ }
+ }
+
+ // Turn alphabet into stops.
+ stopcr.flip();
+
+ return stopcr;
+}
+
+/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
+ * build an eight-bit mask per character C, with each bit representing the
+ * depth before the location of character C (if encountered) that the NFA would
+ * be in a predictable start state. */
+vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
+ const depth max_depth(MAX_STOP_DEPTH);
+ const InitDepths depths(g);
+ const map<NFAVertex, BoundedRepeatSummary> no_vertices;
+
+ vector<CharReach> reach(MAX_STOP_DEPTH);
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ CharReach v_cr;
+ if (som == SOM_NONE) {
+ v_cr = reduced_cr(v, g, no_vertices);
+ } else {
+ v_cr = g[v].char_reach;
+ }
+
+ u32 d = min(max_depth, depths.maxDist(g, v));
+ for (u32 i = 0; i < d; i++) {
+ reach[i] |= v_cr;
+ }
+ }
+
+#ifdef DEBUG
+ for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
+ DEBUG_PRINTF("depth %u, stop chars: ", i);
+ describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
+ printf("\n");
+ }
+#endif
+
+ vector<u8> stop(N_CHARS, 0);
+
+ for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
+ CharReach cr = ~reach[i]; // invert reach for stop chars.
+ const u8 mask = 1U << i;
+ for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
+ stop[c] |= mask;
+ }
+ }
+
+ return stop;
+}
+
+vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
+ UNUSED som_type som) {
+ const depth max_width = findMaxWidth(castle);
+ DEBUG_PRINTF("castle has reach %s and max width %s\n",
+ describeClass(castle.reach()).c_str(),
+ max_width.str().c_str());
+
+ const CharReach escape = ~castle.reach(); // invert reach for stop chars.
+
+ u32 d = min(max_width, depth(MAX_STOP_DEPTH));
+ const u8 mask = verify_u8((1U << d) - 1);
+
+ vector<u8> stop(N_CHARS, 0);
+
+ for (size_t c = escape.find_first(); c != escape.npos;
+ c = escape.find_next(c)) {
+ stop[c] |= mask;
+ }
+
+ return stop;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
index 4a889dca09..8399047f7b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
@@ -1,66 +1,66 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Stop Alphabet calculation.
- */
-
-#ifndef NG_STOP_H
-#define NG_STOP_H
-
-#include "ue2common.h"
-#include "som/som.h"
-
-#include <vector>
-
-namespace ue2 {
-
-struct CastleProto;
-class CharReach;
-class NGHolder;
-
-/** Find the set of characters that are not present in the reachability of
- * graph \p g after a certain depth (currently 8). If a character in this set
- * is encountered, it means that the NFA is either dead or has not progressed
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Stop Alphabet calculation.
+ */
+
+#ifndef NG_STOP_H
+#define NG_STOP_H
+
+#include "ue2common.h"
+#include "som/som.h"
+
+#include <vector>
+
+namespace ue2 {
+
+struct CastleProto;
+class CharReach;
+class NGHolder;
+
+/** Find the set of characters that are not present in the reachability of
+ * graph \p g after a certain depth (currently 8). If a character in this set
+ * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
-CharReach findStopAlphabet(const NGHolder &g, som_type som);
-
-/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
- * build an eight-bit mask per character C, with each bit representing the
- * depth before the location of character C (if encountered) that the NFA would
- * be in a predictable start state. */
-std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som);
-std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som);
-
-} // namespace ue2
-
-#endif
+CharReach findStopAlphabet(const NGHolder &g, som_type som);
+
+/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
+ * build an eight-bit mask per character C, with each bit representing the
+ * depth before the location of character C (if encountered) that the NFA would
+ * be in a predictable start state. */
+std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som);
+std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
index 4ad5ff7875..6c7259f717 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph merging ("uncalc")
- *
- * The file contains our collection of NFA graph merging strategies.
- *
- * NFAGraph merging is generally guided by the length of the common prefix
- * between NFAGraph pairs.
- */
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_limex.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_uncalc_components.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <deque>
-#include <map>
-#include <queue>
-#include <set>
-#include <vector>
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph merging ("uncalc")
+ *
+ * The file contains our collection of NFA graph merging strategies.
+ *
+ * NFAGraph merging is generally guided by the length of the common prefix
+ * between NFAGraph pairs.
+ */
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_limex.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_uncalc_components.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <deque>
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_values;
-
-namespace ue2 {
-
-static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */
-
-/** Sentinel value meaning no component has yet been selected. */
+
+namespace ue2 {
+
+static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */
+
+/** Sentinel value meaning no component has yet been selected. */
static const u32 NO_COMPONENT = ~0U;
-
+
static const u32 UNUSED_STATE = ~0U;
-
+
namespace {
struct ranking_info {
explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) {
@@ -77,15 +77,15 @@ struct ranking_info {
for (NFAVertex v : to_vertex) {
to_rank[v] = rank++;
- }
+ }
for (NFAVertex v : vertices_range(h)) {
if (!contains(to_rank, v)) {
to_rank[v] = UNUSED_STATE;
}
}
- }
-
+ }
+
NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); }
u32 get(NFAVertex v) const { return to_rank.at(v); }
u32 size() const { return (u32)to_vertex.size(); }
@@ -94,279 +94,279 @@ struct ranking_info {
to_rank[v] = rank;
to_vertex.push_back(v);
return rank;
- }
-
+ }
+
private:
vector<NFAVertex> to_vertex;
unordered_map<NFAVertex, u32> to_rank;
};
-}
-
-static never_inline
-bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
- const NGHolder &gb, NFAVertex vb) {
- // Must have the same reachability.
- if (ga[va].char_reach != gb[vb].char_reach) {
- return false;
- }
-
- // If they're start vertices, they must be the same one.
- if (is_any_start(va, ga) || is_any_start(vb, gb)) {
- if (ga[va].index != gb[vb].index) {
- return false;
- }
- }
-
- bool va_accept = edge(va, ga.accept, ga).second;
- bool vb_accept = edge(vb, gb.accept, gb).second;
- bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
- bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;
-
- // Must have the same accept/acceptEod edges.
- if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
- return false;
- }
-
- return true;
-}
-
-static never_inline
+}
+
+static never_inline
+bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
+ const NGHolder &gb, NFAVertex vb) {
+ // Must have the same reachability.
+ if (ga[va].char_reach != gb[vb].char_reach) {
+ return false;
+ }
+
+ // If they're start vertices, they must be the same one.
+ if (is_any_start(va, ga) || is_any_start(vb, gb)) {
+ if (ga[va].index != gb[vb].index) {
+ return false;
+ }
+ }
+
+ bool va_accept = edge(va, ga.accept, ga).second;
+ bool vb_accept = edge(vb, gb.accept, gb).second;
+ bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
+ bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;
+
+ // Must have the same accept/acceptEod edges.
+ if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
+ return false;
+ }
+
+ return true;
+}
+
+static never_inline
u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking,
const NGHolder &gb, const ranking_info &b_ranking) {
u32 ml = min(a_ranking.size(), b_ranking.size());
- if (ml > 65535) {
- ml = 65535;
- }
-
- // Count the number of common vertices which share reachability, report and
- // "startedness" properties.
- u32 max = 0;
- for (; max < ml; max++) {
+ if (ml > 65535) {
+ ml = 65535;
+ }
+
+ // Count the number of common vertices which share reachability, report and
+ // "startedness" properties.
+ u32 max = 0;
+ for (; max < ml; max++) {
if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) {
- break;
- }
- }
-
- return max;
-}
-
+ break;
+ }
+ }
+
+ return max;
+}
+
static
u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking,
const NGHolder &gb, const ranking_info &b_ranking) {
- /* upper bound on the common region based on local properties */
+ /* upper bound on the common region based on local properties */
u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking);
- DEBUG_PRINTF("cpl upper bound %u\n", max);
-
- while (max > 0) {
- /* shrink max region based on in-edges from outside the region */
- for (size_t j = max; j > 0; j--) {
+ DEBUG_PRINTF("cpl upper bound %u\n", max);
+
+ while (max > 0) {
+ /* shrink max region based on in-edges from outside the region */
+ for (size_t j = max; j > 0; j--) {
NFAVertex a_v = a_ranking.at(j - 1);
NFAVertex b_v = b_ranking.at(j - 1);
for (auto u : inv_adjacent_vertices_range(a_v, ga)) {
u32 state_id = a_ranking.get(u);
if (state_id != UNUSED_STATE && state_id >= max) {
- max = j - 1;
- DEBUG_PRINTF("lowering max to %u\n", max);
- goto next_vertex;
- }
- }
-
+ max = j - 1;
+ DEBUG_PRINTF("lowering max to %u\n", max);
+ goto next_vertex;
+ }
+ }
+
for (auto u : inv_adjacent_vertices_range(b_v, gb)) {
u32 state_id = b_ranking.get(u);
if (state_id != UNUSED_STATE && state_id >= max) {
- max = j - 1;
- DEBUG_PRINTF("lowering max to %u\n", max);
- goto next_vertex;
- }
- }
-
- next_vertex:;
- }
-
- /* Ensure that every pair of vertices has same out-edges to vertices in
- the region. */
+ max = j - 1;
+ DEBUG_PRINTF("lowering max to %u\n", max);
+ goto next_vertex;
+ }
+ }
+
+ next_vertex:;
+ }
+
+ /* Ensure that every pair of vertices has same out-edges to vertices in
+ the region. */
for (size_t i = 0; i < max; i++) {
- size_t a_count = 0;
- size_t b_count = 0;
-
+ size_t a_count = 0;
+ size_t b_count = 0;
+
for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) {
u32 sid = a_ranking.get(target(a_edge, ga));
if (sid == UNUSED_STATE || sid >= max) {
- continue;
- }
-
- a_count++;
-
+ continue;
+ }
+
+ a_count++;
+
NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb);
-
+
if (!b_edge) {
- max = i;
- DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
- max, i, sid);
+ max = i;
+ DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
+ max, i, sid);
goto try_smaller;
- }
-
+ }
+
if (ga[a_edge].tops != gb[b_edge].tops) {
- max = i;
+ max = i;
DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid);
goto try_smaller;
- }
- }
-
+ }
+ }
+
for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) {
u32 sid = b_ranking.get(b_v);
if (sid == UNUSED_STATE || sid >= max) {
- continue;
- }
-
- b_count++;
- }
-
- if (a_count != b_count) {
- max = i;
+ continue;
+ }
+
+ b_count++;
+ }
+
+ if (a_count != b_count) {
+ max = i;
DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu,"
" b_count=%zu)\n", max, a_count, b_count);
goto try_smaller;
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("survived checks, returning cpl %u\n", max);
return max;
try_smaller:;
- }
-
- DEBUG_PRINTF("failed to find any common region\n");
- return 0;
-}
-
+ }
+
+ DEBUG_PRINTF("failed to find any common region\n");
+ return 0;
+}
+
u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) {
return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb));
}
-static never_inline
+static never_inline
void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) {
assert(&dest != &vic);
auto dest_info = ranking_info(dest);
auto vic_info = ranking_info(vic);
- map<NFAVertex, NFAVertex> vmap; // vic -> dest
-
- vmap[vic.start] = dest.start;
- vmap[vic.startDs] = dest.startDs;
- vmap[vic.accept] = dest.accept;
- vmap[vic.acceptEod] = dest.acceptEod;
+ map<NFAVertex, NFAVertex> vmap; // vic -> dest
+
+ vmap[vic.start] = dest.start;
+ vmap[vic.startDs] = dest.startDs;
+ vmap[vic.accept] = dest.accept;
+ vmap[vic.acceptEod] = dest.acceptEod;
vmap[NGHolder::null_vertex()] = NGHolder::null_vertex();
-
- // For vertices in the common len, add to vmap and merge in the reports, if
- // any.
- for (u32 i = 0; i < common_len; i++) {
+
+ // For vertices in the common len, add to vmap and merge in the reports, if
+ // any.
+ for (u32 i = 0; i < common_len; i++) {
NFAVertex v_old = vic_info.at(i);
NFAVertex v = dest_info.at(i);
- vmap[v_old] = v;
-
- const auto &reports = vic[v_old].reports;
- dest[v].reports.insert(reports.begin(), reports.end());
- }
-
+ vmap[v_old] = v;
+
+ const auto &reports = vic[v_old].reports;
+ dest[v].reports.insert(reports.begin(), reports.end());
+ }
+
// Add in vertices beyond the common len
for (u32 i = common_len; i < vic_info.size(); i++) {
NFAVertex v_old = vic_info.at(i);
-
- if (is_special(v_old, vic)) {
- // Dest already has start vertices, just merge the reports.
- u32 idx = vic[v_old].index;
- NFAVertex v = dest.getSpecialVertex(idx);
- const auto &reports = vic[v_old].reports;
- dest[v].reports.insert(reports.begin(), reports.end());
- continue;
- }
-
- NFAVertex v = add_vertex(vic[v_old], dest);
+
+ if (is_special(v_old, vic)) {
+ // Dest already has start vertices, just merge the reports.
+ u32 idx = vic[v_old].index;
+ NFAVertex v = dest.getSpecialVertex(idx);
+ const auto &reports = vic[v_old].reports;
+ dest[v].reports.insert(reports.begin(), reports.end());
+ continue;
+ }
+
+ NFAVertex v = add_vertex(vic[v_old], dest);
dest_info.add_to_tail(v);
- vmap[v_old] = v;
- }
-
- /* add edges */
- DEBUG_PRINTF("common_len=%zu\n", common_len);
- for (const auto &e : edges_range(vic)) {
+ vmap[v_old] = v;
+ }
+
+ /* add edges */
+ DEBUG_PRINTF("common_len=%zu\n", common_len);
+ for (const auto &e : edges_range(vic)) {
NFAVertex u_old = source(e, vic);
NFAVertex v_old = target(e, vic);
NFAVertex u = vmap[u_old];
NFAVertex v = vmap[v_old];
- bool uspecial = is_special(u, dest);
- bool vspecial = is_special(v, dest);
-
- // Skip stylised edges that are already present.
- if (uspecial && vspecial && edge(u, v, dest).second) {
- continue;
- }
-
- // We're in the common region if v's state ID is low enough, unless v
- // is a special (an accept), in which case we use u's state ID.
+ bool uspecial = is_special(u, dest);
+ bool vspecial = is_special(v, dest);
+
+ // Skip stylised edges that are already present.
+ if (uspecial && vspecial && edge(u, v, dest).second) {
+ continue;
+ }
+
+ // We're in the common region if v's state ID is low enough, unless v
+ // is a special (an accept), in which case we use u's state ID.
bool in_common_region = dest_info.get(v) < common_len;
if (vspecial && dest_info.get(u) < common_len) {
- in_common_region = true;
- }
-
+ in_common_region = true;
+ }
+
DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n",
dest[u].index, dest_info.get(u),
dest[v].index, dest_info.get(v),
- in_common_region ? " [common]" : "");
-
- if (in_common_region) {
- if (!is_special(v, dest)) {
- DEBUG_PRINTF("skipping common edge\n");
- assert(edge(u, v, dest).second);
- // Should never merge edges with different top values.
+ in_common_region ? " [common]" : "");
+
+ if (in_common_region) {
+ if (!is_special(v, dest)) {
+ DEBUG_PRINTF("skipping common edge\n");
+ assert(edge(u, v, dest).second);
+ // Should never merge edges with different top values.
assert(vic[e].tops == dest[edge(u, v, dest)].tops);
- continue;
- } else {
- assert(is_any_accept(v, dest));
- // If the edge exists in both graphs, skip it.
- if (edge(u, v, dest).second) {
- DEBUG_PRINTF("skipping common edge to accept\n");
- continue;
- }
- }
- }
-
- assert(!edge(u, v, dest).second);
- add_edge(u, v, vic[e], dest);
- }
-
+ continue;
+ } else {
+ assert(is_any_accept(v, dest));
+ // If the edge exists in both graphs, skip it.
+ if (edge(u, v, dest).second) {
+ DEBUG_PRINTF("skipping common edge to accept\n");
+ continue;
+ }
+ }
+ }
+
+ assert(!edge(u, v, dest).second);
+ add_edge(u, v, vic[e], dest);
+ }
+
renumber_edges(dest);
renumber_vertices(dest);
-}
-
-namespace {
-struct NfaMergeCandidateH {
- NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in,
- u32 tb_in)
- : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {}
-
- size_t cpl; //!< common prefix length
- NGHolder *first; //!< first component to merge
- NGHolder *second; //!< second component to merge
- u32 tie_breaker; //!< for determinism
-
- bool operator<(const NfaMergeCandidateH &other) const {
- if (cpl != other.cpl) {
- return cpl < other.cpl;
- } else {
- return tie_breaker < other.tie_breaker;
- }
- }
-};
-
-} // end namespace
-
-/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
-static
+}
+
+namespace {
+struct NfaMergeCandidateH {
+ NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in,
+ u32 tb_in)
+ : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {}
+
+ size_t cpl; //!< common prefix length
+ NGHolder *first; //!< first component to merge
+ NGHolder *second; //!< second component to merge
+ u32 tie_breaker; //!< for determinism
+
+ bool operator<(const NfaMergeCandidateH &other) const {
+ if (cpl != other.cpl) {
+ return cpl < other.cpl;
+ } else {
+ return tie_breaker < other.tie_breaker;
+ }
+ }
+};
+
+} // end namespace
+
+/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
+static
bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl,
const ReportManager *rm, const CompileContext &cc) {
size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl;
-
+
combinedStateCount -= 2 * 2; /* discount accepts from both */
if (is_triggered(ha)) {
@@ -377,130 +377,130 @@ bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl,
combinedStateCount += tops.size();
}
- if (combinedStateCount > FAST_STATE_LIMIT) {
- // More complex implementability check.
- NGHolder h_temp;
- cloneHolder(h_temp, ha);
- assert(h_temp.kind == hb.kind);
- mergeNfaComponent(h_temp, hb, cpl);
- reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
- u32 numStates = isImplementableNFA(h_temp, rm, cc);
- DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
- if (!numStates) {
- DEBUG_PRINTF("not implementable\n");
- return false;
- } else if (numStates > FAST_STATE_LIMIT) {
- DEBUG_PRINTF("too many states to merge\n");
- return false;
- }
- }
-
- return true;
-}
-
-/** Returns true if the graph has start vertices that are compatible for
- * merging. Rose may generate all sorts of wacky vacuous cases, and the merge
- * code isn't currently up to handling them. */
-static
-bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) {
- // Start and startDs must have the same self-loops.
- return (edge(ga.startDs, ga.startDs, ga).second ==
- edge(gb.startDs, gb.startDs, gb).second) &&
- (edge(ga.start, ga.start, ga).second ==
- edge(gb.start, gb.start, gb).second);
-}
-
-static never_inline
-void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
- priority_queue<NfaMergeCandidateH> *pq) {
- const size_t cs = cluster.size();
- assert(cs < NO_COMPONENT);
-
- // First, make sure all holders have numbered states and collect their
- // counts.
+ if (combinedStateCount > FAST_STATE_LIMIT) {
+ // More complex implementability check.
+ NGHolder h_temp;
+ cloneHolder(h_temp, ha);
+ assert(h_temp.kind == hb.kind);
+ mergeNfaComponent(h_temp, hb, cpl);
+ reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
+ u32 numStates = isImplementableNFA(h_temp, rm, cc);
+ DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
+ if (!numStates) {
+ DEBUG_PRINTF("not implementable\n");
+ return false;
+ } else if (numStates > FAST_STATE_LIMIT) {
+ DEBUG_PRINTF("too many states to merge\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** Returns true if the graph has start vertices that are compatible for
+ * merging. Rose may generate all sorts of wacky vacuous cases, and the merge
+ * code isn't currently up to handling them. */
+static
+bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) {
+ // Start and startDs must have the same self-loops.
+ return (edge(ga.startDs, ga.startDs, ga).second ==
+ edge(gb.startDs, gb.startDs, gb).second) &&
+ (edge(ga.start, ga.start, ga).second ==
+ edge(gb.start, gb.start, gb).second);
+}
+
+static never_inline
+void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
+ priority_queue<NfaMergeCandidateH> *pq) {
+ const size_t cs = cluster.size();
+ assert(cs < NO_COMPONENT);
+
+ // First, make sure all holders have numbered states and collect their
+ // counts.
vector<ranking_info> states_map;
states_map.reserve(cs);
- for (size_t i = 0; i < cs; i++) {
- assert(cluster[i]);
+ for (size_t i = 0; i < cs; i++) {
+ assert(cluster[i]);
assert(states_map.size() == i);
const NGHolder &g = *(cluster[i]);
states_map.emplace_back(g);
- }
-
- vector<u16> seen_cpl(cs * cs, 0);
- vector<u32> best_comp(cs, NO_COMPONENT);
-
- /* TODO: understand, explain */
- for (u32 ci = 0; ci < cs; ci++) {
- for (u32 cj = ci + 1; cj < cs; cj++) {
- u16 cpl = 0;
- bool calc = false;
-
- if (best_comp[ci] != NO_COMPONENT) {
- u32 bc = best_comp[ci];
- if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) {
- cpl = seen_cpl[bc + cs * cj];
- DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
- calc = true;
- }
- }
-
- if (!calc && best_comp[cj] != NO_COMPONENT) {
- u32 bc = best_comp[cj];
- if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) {
- cpl = seen_cpl[bc + cs * ci];
- DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
- calc = true;
- }
- }
-
- NGHolder &g_i = *(cluster[ci]);
- NGHolder &g_j = *(cluster[cj]);
-
- if (!compatibleStarts(g_i, g_j)) {
- continue;
- }
-
- if (!calc) {
- cpl = commonPrefixLength(g_i, states_map[ci],
- g_j, states_map[cj]);
- }
-
- seen_cpl[ci + cs * cj] = cpl;
- seen_cpl[cj + cs * ci] = cpl;
-
- if (best_comp[cj] == NO_COMPONENT
- || seen_cpl[best_comp[cj] + cs * cj] < cpl) {
- best_comp[cj] = ci;
- }
-
- DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl);
-
- pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj],
- ci * cs + cj));
- }
- }
-}
-
+ }
+
+ vector<u16> seen_cpl(cs * cs, 0);
+ vector<u32> best_comp(cs, NO_COMPONENT);
+
+ /* TODO: understand, explain */
+ for (u32 ci = 0; ci < cs; ci++) {
+ for (u32 cj = ci + 1; cj < cs; cj++) {
+ u16 cpl = 0;
+ bool calc = false;
+
+ if (best_comp[ci] != NO_COMPONENT) {
+ u32 bc = best_comp[ci];
+ if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) {
+ cpl = seen_cpl[bc + cs * cj];
+ DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
+ calc = true;
+ }
+ }
+
+ if (!calc && best_comp[cj] != NO_COMPONENT) {
+ u32 bc = best_comp[cj];
+ if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) {
+ cpl = seen_cpl[bc + cs * ci];
+ DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
+ calc = true;
+ }
+ }
+
+ NGHolder &g_i = *(cluster[ci]);
+ NGHolder &g_j = *(cluster[cj]);
+
+ if (!compatibleStarts(g_i, g_j)) {
+ continue;
+ }
+
+ if (!calc) {
+ cpl = commonPrefixLength(g_i, states_map[ci],
+ g_j, states_map[cj]);
+ }
+
+ seen_cpl[ci + cs * cj] = cpl;
+ seen_cpl[cj + cs * ci] = cpl;
+
+ if (best_comp[cj] == NO_COMPONENT
+ || seen_cpl[best_comp[cj] + cs * cj] < cpl) {
+ best_comp[cj] = ci;
+ }
+
+ DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl);
+
+ pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj],
+ ci * cs + cj));
+ }
+ }
+}
+
/**
* True if the graphs have mergeable starts.
*
* Nowadays, this means that any vacuous edges must have the same tops. In
* addition, mixed-accept cases need to have matching reports.
*/
-static
-bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
+static
+bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
if (!isVacuous(h1) || !isVacuous(h2)) {
return true;
}
-
+
// Vacuous edges from startDs should not occur: we have better ways to
// implement true dot-star relationships. Just in case they do, ban them
// from being merged unless they have identical reports.
if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) {
assert(0);
return false;
- }
+ }
/* TODO: relax top checks if reports match */
@@ -509,88 +509,88 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
NFAEdge e2_accept = edge(h2.start, h2.accept, h2);
if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) {
return false;
- }
-
+ }
+
// If both graphs have edge (start, acceptEod), the tops must match.
NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1);
NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2);
if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) {
return false;
}
-
+
// If one graph has an edge to accept and the other has an edge to
// acceptEod, the reports must match for the merge to be safe.
if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) {
if (h1[h1.start].reports != h2[h2.start].reports) {
- return false;
- }
- }
-
- return true;
-}
-
-/** Merge graph \p ga into graph \p gb. Returns false on failure. */
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** Merge graph \p ga into graph \p gb. Returns false on failure. */
bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm,
- const CompileContext &cc) {
- assert(ga.kind == gb.kind);
-
+ const CompileContext &cc) {
+ assert(ga.kind == gb.kind);
+
// Vacuous NFAs require special checks on their starts to ensure that tops
// match, and that reports match for mixed-accept cases.
- if (!mergeableStarts(ga, gb)) {
- DEBUG_PRINTF("starts aren't mergeable\n");
- return false;
- }
-
+ if (!mergeableStarts(ga, gb)) {
+ DEBUG_PRINTF("starts aren't mergeable\n");
+ return false;
+ }
+
u32 cpl = commonPrefixLength(ga, gb);
if (!shouldMerge(gb, ga, cpl, rm, cc)) {
- return false;
- }
-
- mergeNfaComponent(gb, ga, cpl);
- reduceImplementableGraph(gb, SOM_NONE, rm, cc);
- return true;
-}
-
+ return false;
+ }
+
+ mergeNfaComponent(gb, ga, cpl);
+ reduceImplementableGraph(gb, SOM_NONE, rm, cc);
+ return true;
+}
+
map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster,
const ReportManager *rm,
const CompileContext &cc) {
map<NGHolder *, NGHolder *> merged;
- if (cluster.size() < 2) {
+ if (cluster.size() < 2) {
return merged;
- }
-
- DEBUG_PRINTF("new cluster, size %zu\n", cluster.size());
-
- priority_queue<NfaMergeCandidateH> pq;
- buildNfaMergeQueue(cluster, &pq);
-
- while (!pq.empty()) {
- NGHolder &pholder = *pq.top().first;
- NGHolder &vholder = *pq.top().second;
- pq.pop();
-
- if (contains(merged, &pholder) || contains(merged, &vholder)) {
- DEBUG_PRINTF("dead\n");
- continue;
- }
-
- if (!mergeNfaPair(vholder, pholder, rm, cc)) {
- DEBUG_PRINTF("merge failed\n");
- continue;
- }
-
- merged.emplace(&vholder, &pholder);
-
- // Seek closure.
- for (auto &m : merged) {
- if (m.second == &vholder) {
- m.second = &pholder;
- }
- }
- }
+ }
+
+ DEBUG_PRINTF("new cluster, size %zu\n", cluster.size());
+
+ priority_queue<NfaMergeCandidateH> pq;
+ buildNfaMergeQueue(cluster, &pq);
+
+ while (!pq.empty()) {
+ NGHolder &pholder = *pq.top().first;
+ NGHolder &vholder = *pq.top().second;
+ pq.pop();
+
+ if (contains(merged, &pholder) || contains(merged, &vholder)) {
+ DEBUG_PRINTF("dead\n");
+ continue;
+ }
+
+ if (!mergeNfaPair(vholder, pholder, rm, cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ continue;
+ }
+
+ merged.emplace(&vholder, &pholder);
+
+ // Seek closure.
+ for (auto &m : merged) {
+ if (m.second == &vholder) {
+ m.second = &pholder;
+ }
+ }
+ }
return merged;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
index b0f42670a3..57bb242289 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph merging ("uncalc")
- */
-
-#ifndef NG_UNCALC_COMPONENTS_H
-#define NG_UNCALC_COMPONENTS_H
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-struct CompileContext;
-struct Grey;
-class NGHolder;
-class ReportManager;
-
-/**
- * \brief Returns the common prefix length for a pair of graphs.
- *
- * The CPL is calculated based the topological ordering given by the state
- * indices for each graph.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph merging ("uncalc")
+ */
+
+#ifndef NG_UNCALC_COMPONENTS_H
+#define NG_UNCALC_COMPONENTS_H
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+struct CompileContext;
+struct Grey;
+class NGHolder;
+class ReportManager;
+
+/**
+ * \brief Returns the common prefix length for a pair of graphs.
+ *
+ * The CPL is calculated based the topological ordering given by the state
+ * indices for each graph.
+ */
u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb);
-
-/**
- * \brief Merge the group of graphs in \p cluster where possible.
- *
+
+/**
+ * \brief Merge the group of graphs in \p cluster where possible.
+ *
* The (from, to) mapping of merged graphs is returned.
- */
+ */
std::map<NGHolder *, NGHolder *>
mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm,
const CompileContext &cc);
-
-/**
- * \brief Merge graph \p ga into graph \p gb.
- *
- * Returns false on failure. On success, \p gb is reduced via \ref
- * reduceImplementableGraph and renumbered.
- */
+
+/**
+ * \brief Merge graph \p ga into graph \p gb.
+ *
+ * Returns false on failure. On success, \p gb is reduced via \ref
+ * reduceImplementableGraph and renumbered.
+ */
bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
index 89500fe39e..a9afaa304d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
@@ -1,303 +1,303 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief UTF-8 transforms and operations.
- */
-#include "ng_utf8.h"
-
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief UTF-8 transforms and operations.
+ */
+#include "ng_utf8.h"
+
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-#include "util/graph_range.h"
-#include "util/unicode_def.h"
-
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
+#include "util/graph_range.h"
+#include "util/unicode_def.h"
+
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("unexpected pred\n");
- assert(0); /* should be true due to the early stage of this analysis */
- return;
- }
-
+ DEBUG_PRINTF("unexpected pred\n");
+ assert(0); /* should be true due to the early stage of this analysis */
+ return;
+ }
+
CharReach &cr = g[v].char_reach;
- if (pred_char == 0xe0) {
- assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
- if (cr == CharReach(0xa0, 0xbf)) {
- cr |= CharReach(0x80, 0x9f);
- }
- } else if (pred_char == 0xf0) {
- assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
- if (cr == CharReach(0x90, 0xbf)) {
- cr |= CharReach(0x80, 0x8f);
- }
- } else if (pred_char == 0xf4) {
- assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
- if (cr == CharReach(0x80, 0x8f)) {
- cr |= CharReach(0x90, 0xbf);
- }
- } else {
- assert(0); /* unexpected pred */
- }
-}
-
-/** \brief Relax forbidden UTF-8 sequences.
- *
- * Some byte sequences can not appear in valid UTF-8 as they encode code points
- * above \\x{10ffff} or they represent overlong encodings. As we require valid
- * UTF-8 input, we have no defined behaviour in these cases, as a result we can
- * accept them if it simplifies the graph. */
+ if (pred_char == 0xe0) {
+ assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
+ if (cr == CharReach(0xa0, 0xbf)) {
+ cr |= CharReach(0x80, 0x9f);
+ }
+ } else if (pred_char == 0xf0) {
+ assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
+ if (cr == CharReach(0x90, 0xbf)) {
+ cr |= CharReach(0x80, 0x8f);
+ }
+ } else if (pred_char == 0xf4) {
+ assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
+ if (cr == CharReach(0x80, 0x8f)) {
+ cr |= CharReach(0x90, 0xbf);
+ }
+ } else {
+ assert(0); /* unexpected pred */
+ }
+}
+
+/** \brief Relax forbidden UTF-8 sequences.
+ *
+ * Some byte sequences can not appear in valid UTF-8 as they encode code points
+ * above \\x{10ffff} or they represent overlong encodings. As we require valid
+ * UTF-8 input, we have no defined behaviour in these cases, as a result we can
+ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
if (!expr.utf8) {
- return;
- }
-
- const CharReach e0(0xe0);
- const CharReach f0(0xf0);
- const CharReach f4(0xf4);
-
+ return;
+ }
+
+ const CharReach e0(0xe0);
+ const CharReach f0(0xf0);
+ const CharReach f4(0xf4);
+
for (auto v : vertices_range(g)) {
const CharReach &cr = g[v].char_reach;
- if (cr == e0 || cr == f0 || cr == f4) {
- u8 pred_char = cr.find_first();
+ if (cr == e0 || cr == f0 || cr == f4) {
+ u8 pred_char = cr.find_first();
for (auto t : adjacent_vertices_range(v, g)) {
allowIllegal(g, t, pred_char);
- }
- }
- }
-}
-
-static
-bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(s, u)) {
- return true;
- }
- }
- return false;
-}
-
-static
-bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (contains(s, w)) {
- return true;
- }
- }
- return false;
-}
-
-static
-void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
- set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- const CharReach &cr = h[v].char_reach;
- if (!isutf8ascii(cr) && !isutf8start(cr)) {
- bad.insert(v);
- }
- }
-
- for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
- const CharReach &cr = h[v].char_reach;
- if (!isutf8ascii(cr) && !isutf8start(cr)) {
- bad.insert(v);
- }
- }
-
- // we want to be careful with asserts connected to starts
- // as well as they may not finish a code point
- for (auto v : vertices_range(h)) {
- if (is_virtual_start(v, h)) {
- bad.insert(v);
- insert(&bad, adjacent_vertices(v, h));
- }
- }
-
- /* we cannot handle vertices connected to accept as would report matches in
- * the middle of codepoints. acceptEod is not a problem as the input must
- * end at a codepoint boundary */
- bad.insert(h.accept);
-
- // If we're in SOM mode, we don't want to mess with vertices that have a
- // direct edge from startDs.
- if (som) {
- insert(&bad, adjacent_vertices(h.startDs, h));
- }
-
- set<NFAVertex> already_seeds; /* already marked as seeds */
- for (auto v : vertices_range(h)) {
- const CharReach &cr = h[v].char_reach;
-
- if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
- continue;
- }
-
- if (hasSuccInSet(h, v, bad)) {
- continue;
- }
-
- // Skip vertices that are directly connected to other vertices already
- // in the seeds list: we can't collapse two of these directly next to
- // each other.
- if (hasPredInSet(h, v, already_seeds) ||
- hasSuccInSet(h, v, already_seeds)) {
- continue;
- }
-
+ }
+ }
+ }
+}
+
+static
+bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(s, u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (contains(s, w)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
+ set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ const CharReach &cr = h[v].char_reach;
+ if (!isutf8ascii(cr) && !isutf8start(cr)) {
+ bad.insert(v);
+ }
+ }
+
+ for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
+ const CharReach &cr = h[v].char_reach;
+ if (!isutf8ascii(cr) && !isutf8start(cr)) {
+ bad.insert(v);
+ }
+ }
+
+ // we want to be careful with asserts connected to starts
+ // as well as they may not finish a code point
+ for (auto v : vertices_range(h)) {
+ if (is_virtual_start(v, h)) {
+ bad.insert(v);
+ insert(&bad, adjacent_vertices(v, h));
+ }
+ }
+
+ /* we cannot handle vertices connected to accept as would report matches in
+ * the middle of codepoints. acceptEod is not a problem as the input must
+ * end at a codepoint boundary */
+ bad.insert(h.accept);
+
+ // If we're in SOM mode, we don't want to mess with vertices that have a
+ // direct edge from startDs.
+ if (som) {
+ insert(&bad, adjacent_vertices(h.startDs, h));
+ }
+
+ set<NFAVertex> already_seeds; /* already marked as seeds */
+ for (auto v : vertices_range(h)) {
+ const CharReach &cr = h[v].char_reach;
+
+ if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
+ continue;
+ }
+
+ if (hasSuccInSet(h, v, bad)) {
+ continue;
+ }
+
+ // Skip vertices that are directly connected to other vertices already
+ // in the seeds list: we can't collapse two of these directly next to
+ // each other.
+ if (hasPredInSet(h, v, already_seeds) ||
+ hasSuccInSet(h, v, already_seeds)) {
+ continue;
+ }
+
DEBUG_PRINTF("%zu is a seed\n", h[v].index);
- seeds->push_back(v);
- already_seeds.insert(v);
- }
-}
-
-static
-bool expandCyclic(NGHolder &h, NFAVertex v) {
+ seeds->push_back(v);
+ already_seeds.insert(v);
+ }
+}
+
+static
+bool expandCyclic(NGHolder &h, NFAVertex v) {
DEBUG_PRINTF("inspecting %zu\n", h[v].index);
- bool changes = false;
-
+ bool changes = false;
+
auto v_preds = preds(v, h);
auto v_succs = succs(v, h);
- set<NFAVertex> start_siblings;
- set<NFAVertex> end_siblings;
-
- CharReach &v_cr = h[v].char_reach;
-
- /* We need to find start vertices which have all of our preds.
- * As we have a self loop, it must be one of our succs. */
- for (auto a : adjacent_vertices_range(v, h)) {
+ set<NFAVertex> start_siblings;
+ set<NFAVertex> end_siblings;
+
+ CharReach &v_cr = h[v].char_reach;
+
+ /* We need to find start vertices which have all of our preds.
+ * As we have a self loop, it must be one of our succs. */
+ for (auto a : adjacent_vertices_range(v, h)) {
auto a_preds = preds(a, h);
-
- if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
+
+ if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
DEBUG_PRINTF("%zu is a start v\n", h[a].index);
- start_siblings.insert(a);
- }
- }
-
- /* We also need to find full cont vertices which have all our own succs;
- * As we have a self loop, it must be one of our preds. */
- for (auto a : inv_adjacent_vertices_range(v, h)) {
+ start_siblings.insert(a);
+ }
+ }
+
+ /* We also need to find full cont vertices which have all our own succs;
+ * As we have a self loop, it must be one of our preds. */
+ for (auto a : inv_adjacent_vertices_range(v, h)) {
auto a_succs = succs(a, h);
-
- if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
+
+ if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index);
- end_siblings.insert(a);
- }
- }
-
- for (auto s : start_siblings) {
- if (out_degree(s, h) != 1) {
- continue;
- }
-
- const CharReach &cr = h[s].char_reach;
- if (cr.isSubsetOf(UTF_TWO_START_CR)) {
- if (end_siblings.find(*adjacent_vertices(s, h).first)
- == end_siblings.end()) {
+ end_siblings.insert(a);
+ }
+ }
+
+ for (auto s : start_siblings) {
+ if (out_degree(s, h) != 1) {
+ continue;
+ }
+
+ const CharReach &cr = h[s].char_reach;
+ if (cr.isSubsetOf(UTF_TWO_START_CR)) {
+ if (end_siblings.find(*adjacent_vertices(s, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
- NFAVertex m = *adjacent_vertices(s, h).first;
-
- if (h[m].char_reach != UTF_CONT_CR
- || out_degree(m, h) != 1) {
- continue;
- }
- if (end_siblings.find(*adjacent_vertices(m, h).first)
- == end_siblings.end()) {
+ continue;
+ }
+ } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
+ NFAVertex m = *adjacent_vertices(s, h).first;
+
+ if (h[m].char_reach != UTF_CONT_CR
+ || out_degree(m, h) != 1) {
+ continue;
+ }
+ if (end_siblings.find(*adjacent_vertices(m, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
- NFAVertex m1 = *adjacent_vertices(s, h).first;
-
- if (h[m1].char_reach != UTF_CONT_CR
- || out_degree(m1, h) != 1) {
- continue;
- }
-
- NFAVertex m2 = *adjacent_vertices(m1, h).first;
-
- if (h[m2].char_reach != UTF_CONT_CR
- || out_degree(m2, h) != 1) {
- continue;
- }
-
- if (end_siblings.find(*adjacent_vertices(m2, h).first)
- == end_siblings.end()) {
+ continue;
+ }
+ } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
+ NFAVertex m1 = *adjacent_vertices(s, h).first;
+
+ if (h[m1].char_reach != UTF_CONT_CR
+ || out_degree(m1, h) != 1) {
+ continue;
+ }
+
+ NFAVertex m2 = *adjacent_vertices(m1, h).first;
+
+ if (h[m2].char_reach != UTF_CONT_CR
+ || out_degree(m2, h) != 1) {
+ continue;
+ }
+
+ if (end_siblings.find(*adjacent_vertices(m2, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else {
+ continue;
+ }
+ } else {
DEBUG_PRINTF("%zu is bad\n", h[s].index);
- continue;
- }
-
- v_cr |= cr;
- clear_vertex(s, h);
- changes = true;
- }
-
- if (changes) {
- v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
- v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
- * valid unicode data */
- v_cr.set(0xc1);
- v_cr |= CharReach(0xf5, 0xff);
- }
-
- return changes;
-}
-
-/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
- * where possible, based on the assumption that we will always be matching
- * against well-formed input. */
-void utf8DotRestoration(NGHolder &h, bool som) {
- vector<NFAVertex> seeds; /* cyclic ascii vertices */
- findSeeds(h, som, &seeds);
-
- bool changes = false;
- for (auto v : seeds) {
- changes |= expandCyclic(h, v);
- }
-
- if (changes) {
- pruneUseless(h);
- }
-}
-
-} // namespace ue2
+ continue;
+ }
+
+ v_cr |= cr;
+ clear_vertex(s, h);
+ changes = true;
+ }
+
+ if (changes) {
+ v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
+ v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
+ * valid unicode data */
+ v_cr.set(0xc1);
+ v_cr |= CharReach(0xf5, 0xff);
+ }
+
+ return changes;
+}
+
+/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
+ * where possible, based on the assumption that we will always be matching
+ * against well-formed input. */
+void utf8DotRestoration(NGHolder &h, bool som) {
+ vector<NFAVertex> seeds; /* cyclic ascii vertices */
+ findSeeds(h, som, &seeds);
+
+ bool changes = false;
+ for (auto v : seeds) {
+ changes |= expandCyclic(h, v);
+ }
+
+ if (changes) {
+ pruneUseless(h);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
index 7c4288336f..0300088039 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief UTF-8 transforms and operations.
- */
-
-#ifndef NG_UTF8_H
-#define NG_UTF8_H
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief UTF-8 transforms and operations.
+ */
+
+#ifndef NG_UTF8_H
+#define NG_UTF8_H
+
+namespace ue2 {
+
class ExpressionInfo;
-class NGHolder;
-
-/** \brief Relax forbidden UTF-8 sequences.
- *
- * Some byte sequences can not appear in valid UTF-8 as they encode code points
- * above \\x{10ffff} or they represent overlong encodings. As we require valid
- * UTF-8 input, we have no defined behaviour in these cases, as a result we can
- * accept them if it simplifies the graph. */
+class NGHolder;
+
+/** \brief Relax forbidden UTF-8 sequences.
+ *
+ * Some byte sequences can not appear in valid UTF-8 as they encode code points
+ * above \\x{10ffff} or they represent overlong encodings. As we require valid
+ * UTF-8 input, we have no defined behaviour in these cases, as a result we can
+ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
-
-/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
- * where possible, based on the assumption that we will always be matching
- * against well-formed input.
- */
-void utf8DotRestoration(NGHolder &h, bool som);
-
-} // namespace ue2
-
-#endif
+
+/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
+ * where possible, based on the assumption that we will always be matching
+ * against well-formed input.
+ */
+void utf8DotRestoration(NGHolder &h, bool som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
index cb2b710358..630193b19b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
@@ -1,191 +1,191 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous NFA graph utilities.
- */
-#include "ng_util.h"
-
-#include "grey.h"
-#include "ng_dump.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous NFA graph utilities.
+ */
+#include "ng_util.h"
+
+#include "grey.h"
+#include "ng_dump.h"
#include "ng_prune.h"
-#include "ue2common.h"
-#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS.
-#include "parser/position.h"
-#include "util/graph_range.h"
+#include "ue2common.h"
+#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS.
+#include "parser/position.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-#include "util/report_manager.h"
-
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+#include "util/report_manager.h"
+
#include <limits>
-#include <map>
-#include <set>
+#include <map>
+#include <set>
#include <unordered_map>
#include <unordered_set>
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
using boost::make_filtered_graph;
-using boost::make_assoc_property_map;
-
-namespace ue2 {
-
-NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) {
+using boost::make_assoc_property_map;
+
+namespace ue2 {
+
+NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) {
assert(a != NGHolder::null_vertex());
-
+
NGHolder::out_edge_iterator ii, iie;
- tie(ii, iie) = out_edges(a, g);
- if (ii == iie) {
+ tie(ii, iie) = out_edges(a, g);
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
- NFAVertex b = target(*ii, g);
- if (a == b) {
- ++ii;
- if (ii == iie) {
+ }
+ NFAVertex b = target(*ii, g);
+ if (a == b) {
+ ++ii;
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
-
- b = target(*ii, g);
- if (++ii != iie) {
+ }
+
+ b = target(*ii, g);
+ if (++ii != iie) {
return NGHolder::null_vertex();
- }
- } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) {
+ }
+ } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) {
return NGHolder::null_vertex();
- }
-
- assert(a != b);
- return b;
-}
-
-NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) {
+ }
+
+ assert(a != b);
+ return b;
+}
+
+NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) {
assert(a != NGHolder::null_vertex());
-
- u32 idegree = in_degree(a, g);
- if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) {
+
+ u32 idegree = in_degree(a, g);
+ if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) {
return NGHolder::null_vertex();
- }
-
+ }
+
NGHolder::in_edge_iterator ii, iie;
- tie(ii, iie) = in_edges(a, g);
- if (ii == iie) {
+ tie(ii, iie) = in_edges(a, g);
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
- NFAVertex b = source(*ii, g);
- if (a == b) {
- ++ii;
- if (ii == iie) {
+ }
+ NFAVertex b = source(*ii, g);
+ if (a == b) {
+ ++ii;
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
-
- b = source(*ii, g);
- }
-
- assert(a != b);
- return b;
-}
-
-NFAVertex clone_vertex(NGHolder &g, NFAVertex v) {
- NFAVertex clone = add_vertex(g);
- u32 idx = g[clone].index;
- g[clone] = g[v];
- g[clone].index = idx;
-
- return clone;
-}
-
-void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
- for (const auto &e : out_edges_range(source, g)) {
- NFAVertex t = target(e, g);
- if (edge(dest, t, g).second) {
- continue;
- }
+ }
+
+ b = source(*ii, g);
+ }
+
+ assert(a != b);
+ return b;
+}
+
+NFAVertex clone_vertex(NGHolder &g, NFAVertex v) {
+ NFAVertex clone = add_vertex(g);
+ u32 idx = g[clone].index;
+ g[clone] = g[v];
+ g[clone].index = idx;
+
+ return clone;
+}
+
+void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
+ for (const auto &e : out_edges_range(source, g)) {
+ NFAVertex t = target(e, g);
+ if (edge(dest, t, g).second) {
+ continue;
+ }
NFAEdge clone = add_edge(dest, t, g);
- u32 idx = g[clone].index;
- g[clone] = g[e];
- g[clone].index = idx;
- }
-}
-
-void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
- for (const auto &e : in_edges_range(s, g)) {
- NFAVertex ss = source(e, g);
- assert(!edge(ss, dest, g).second);
+ u32 idx = g[clone].index;
+ g[clone] = g[e];
+ g[clone].index = idx;
+ }
+}
+
+void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
+ for (const auto &e : in_edges_range(s, g)) {
+ NFAVertex ss = source(e, g);
+ assert(!edge(ss, dest, g).second);
NFAEdge clone = add_edge(ss, dest, g);
- u32 idx = g[clone].index;
- g[clone] = g[e];
- g[clone].index = idx;
- }
-}
-
-bool onlyOneTop(const NGHolder &g) {
+ u32 idx = g[clone].index;
+ g[clone] = g[e];
+ g[clone].index = idx;
+ }
+}
+
+bool onlyOneTop(const NGHolder &g) {
return getTops(g).size() == 1;
-}
-
-namespace {
-struct CycleFound {};
-struct DetectCycles : public boost::default_dfs_visitor {
- explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {}
+}
+
+namespace {
+struct CycleFound {};
+struct DetectCycles : public boost::default_dfs_visitor {
+ explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {}
void back_edge(const NFAEdge &e, const NGHolder &g) const {
- NFAVertex u = source(e, g), v = target(e, g);
- // We ignore the startDs self-loop.
- if (u == startDs && v == startDs) {
- return;
- }
- // Any other back-edge indicates a cycle.
+ NFAVertex u = source(e, g), v = target(e, g);
+ // We ignore the startDs self-loop.
+ if (u == startDs && v == startDs) {
+ return;
+ }
+ // Any other back-edge indicates a cycle.
DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index);
- throw CycleFound();
- }
-private:
- const NFAVertex startDs;
-};
-} // namespace
-
-bool isVacuous(const NGHolder &h) {
- return edge(h.start, h.accept, h).second
- || edge(h.start, h.acceptEod, h).second
- || edge(h.startDs, h.accept, h).second
- || edge(h.startDs, h.acceptEod, h).second;
-}
-
-bool isAnchored(const NGHolder &g) {
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (v != g.startDs) {
- return false;
- }
- }
- return true;
-}
-
+ throw CycleFound();
+ }
+private:
+ const NFAVertex startDs;
+};
+} // namespace
+
+bool isVacuous(const NGHolder &h) {
+ return edge(h.start, h.accept, h).second
+ || edge(h.start, h.acceptEod, h).second
+ || edge(h.startDs, h.accept, h).second
+ || edge(h.startDs, h.acceptEod, h).second;
+}
+
+bool isAnchored(const NGHolder &g) {
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (v != g.startDs) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool isFloating(const NGHolder &g) {
for (auto v : adjacent_vertices_range(g.start, g)) {
if (v != g.startDs && !edge(g.startDs, v, g).second) {
@@ -195,99 +195,99 @@ bool isFloating(const NGHolder &g) {
return true;
}
-bool isAcyclic(const NGHolder &g) {
- try {
+bool isAcyclic(const NGHolder &g) {
+ try {
boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g),
g.start);
- } catch (const CycleFound &) {
- return false;
- }
-
- return true;
-}
-
-/** True if the graph has a cycle reachable from the given source vertex. */
-bool hasReachableCycle(const NGHolder &g, NFAVertex src) {
- assert(hasCorrectlyNumberedVertices(g));
-
- try {
- // Use depth_first_visit, rather than depth_first_search, so that we
- // only search from src.
+ } catch (const CycleFound &) {
+ return false;
+ }
+
+ return true;
+}
+
+/** True if the graph has a cycle reachable from the given source vertex. */
+bool hasReachableCycle(const NGHolder &g, NFAVertex src) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ try {
+ // Use depth_first_visit, rather than depth_first_search, so that we
+ // only search from src.
boost::depth_first_visit(g, src, DetectCycles(g),
make_small_color_map(g));
} catch (const CycleFound &) {
- return true;
- }
-
- return false;
-}
-
-bool hasBigCycles(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- set<NFAEdge> dead;
- BackEdges<set<NFAEdge>> backEdgeVisitor(dead);
+ return true;
+ }
+
+ return false;
+}
+
+bool hasBigCycles(const NGHolder &g) {
+ assert(hasCorrectlyNumberedVertices(g));
+ set<NFAEdge> dead;
+ BackEdges<set<NFAEdge>> backEdgeVisitor(dead);
boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g),
g.start);
-
- for (const auto &e : dead) {
- if (source(e, g) != target(e, g)) {
- return true;
- }
- }
-
- return false;
-}
-
+
+ for (const auto &e : dead) {
+ if (source(e, g) != target(e, g)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) {
return any_of_in(vertices_range(g), [&](NFAVertex v) {
return !is_special(v, g) && g[v].char_reach.count() < max_reach_count;
});
-}
-
-bool can_never_match(const NGHolder &g) {
- assert(edge(g.accept, g.acceptEod, g).second);
+}
+
+bool can_never_match(const NGHolder &g) {
+ assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
- DEBUG_PRINTF("no paths into accept\n");
- return true;
- }
-
- return false;
-}
-
-bool can_match_at_eod(const NGHolder &h) {
+ DEBUG_PRINTF("no paths into accept\n");
+ return true;
+ }
+
+ return false;
+}
+
+bool can_match_at_eod(const NGHolder &h) {
if (in_degree(h.acceptEod, h) > 1) {
- DEBUG_PRINTF("more than one edge to acceptEod\n");
- return true;
- }
-
- for (auto e : in_edges_range(h.accept, h)) {
- if (h[e].assert_flags) {
- DEBUG_PRINTF("edge to accept has assert flags %d\n",
- h[e].assert_flags);
- return true;
- }
- }
-
- return false;
-}
-
-bool can_only_match_at_eod(const NGHolder &g) {
+ DEBUG_PRINTF("more than one edge to acceptEod\n");
+ return true;
+ }
+
+ for (auto e : in_edges_range(h.accept, h)) {
+ if (h[e].assert_flags) {
+ DEBUG_PRINTF("edge to accept has assert flags %d\n",
+ h[e].assert_flags);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool can_only_match_at_eod(const NGHolder &g) {
NGHolder::in_edge_iterator ie, ee;
- tie(ie, ee) = in_edges(g.accept, g);
-
- return ie == ee;
-}
-
-bool matches_everywhere(const NGHolder &h) {
+ tie(ie, ee) = in_edges(g.accept, g);
+
+ return ie == ee;
+}
+
+bool matches_everywhere(const NGHolder &h) {
NFAEdge e = edge(h.startDs, h.accept, h);
-
+
return e && !h[e].assert_flags;
-}
-
-bool is_virtual_start(NFAVertex v, const NGHolder &g) {
- return g[v].assert_flags & POS_FLAG_VIRTUAL_START;
-}
-
+}
+
+bool is_virtual_start(NFAVertex v, const NGHolder &g) {
+ return g[v].assert_flags & POS_FLAG_VIRTUAL_START;
+}
+
static
void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) {
// Start is last element of reverse topo ordering.
@@ -329,110 +329,110 @@ void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) {
}
}
-vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
-
- // Use the same colour map for both DFS and topological_sort below: avoids
- // having to reallocate it, etc.
+vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ // Use the same colour map for both DFS and topological_sort below: avoids
+ // having to reallocate it, etc.
auto colors = make_small_color_map(g);
-
+
using EdgeSet = unordered_set<NFAEdge>;
- EdgeSet backEdges;
- BackEdges<EdgeSet> be(backEdges);
-
+ EdgeSet backEdges;
+ BackEdges<EdgeSet> be(backEdges);
+
depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors));
-
+
auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges));
-
- vector<NFAVertex> ordering;
+
+ vector<NFAVertex> ordering;
ordering.reserve(num_vertices(g));
topological_sort(acyclic_g, back_inserter(ordering), color_map(colors));
-
+
reorderSpecials(g, ordering);
- return ordering;
-}
-
-static
-void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
+ return ordering;
+}
+
+static
+void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
decltype(make_small_color_map(NGHolder())) &colors) {
- set<NFAVertex> s;
- insert(&s, adjacent_vertices(u, g));
-
- set<NFAEdge> dead; // Edges leading to u or u's successors.
-
- for (auto v : inv_adjacent_vertices_range(u, g)) {
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- if (t == u || contains(s, t)) {
- dead.insert(e);
- }
- }
- }
-
+ set<NFAVertex> s;
+ insert(&s, adjacent_vertices(u, g));
+
+ set<NFAEdge> dead; // Edges leading to u or u's successors.
+
+ for (auto v : inv_adjacent_vertices_range(u, g)) {
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ if (t == u || contains(s, t)) {
+ dead.insert(e);
+ }
+ }
+ }
+
auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead));
-
+
depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()),
colors);
-}
-
-bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
- mbsb_cache &cache) {
- assert(&cache.g == &g);
- auto key = make_pair(g[u].index, g[v].index);
- DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size());
- if (contains(cache.cache, key)) {
- DEBUG_PRINTF("cache hit\n");
- return cache.cache[key];
- }
-
+}
+
+bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
+ mbsb_cache &cache) {
+ assert(&cache.g == &g);
+ auto key = make_pair(g[u].index, g[v].index);
+ DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size());
+ if (contains(cache.cache, key)) {
+ DEBUG_PRINTF("cache hit\n");
+ return cache.cache[key];
+ }
+
auto colors = make_small_color_map(g);
mustBeSetBefore_int(u, g, colors);
-
- for (auto vi : vertices_range(g)) {
+
+ for (auto vi : vertices_range(g)) {
auto key2 = make_pair(g[u].index, g[vi].index);
DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second);
- assert(!contains(cache.cache, key2));
+ assert(!contains(cache.cache, key2));
bool value = get(colors, vi) == small_color::white;
- cache.cache[key2] = value;
- assert(contains(cache.cache, key2));
- }
+ cache.cache[key2] = value;
+ assert(contains(cache.cache, key2));
+ }
DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second,
- cache.cache.size());
- return cache.cache[key];
-}
-
-void appendLiteral(NGHolder &h, const ue2_literal &s) {
- DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
- vector<NFAVertex> tail;
- assert(in_degree(h.acceptEod, h) == 1);
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- tail.push_back(v);
- }
- assert(!tail.empty());
-
- for (auto v : tail) {
- remove_edge(v, h.accept, h);
- }
-
- for (const auto &c : s) {
- NFAVertex v = add_vertex(h);
- h[v].char_reach = c;
- for (auto u : tail) {
- add_edge(u, v, h);
- }
- tail.clear();
- tail.push_back(v);
- }
-
- for (auto v : tail) {
- add_edge(v, h.accept, h);
- }
-}
-
+ cache.cache.size());
+ return cache.cache[key];
+}
+
+void appendLiteral(NGHolder &h, const ue2_literal &s) {
+ DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
+ vector<NFAVertex> tail;
+ assert(in_degree(h.acceptEod, h) == 1);
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ tail.push_back(v);
+ }
+ assert(!tail.empty());
+
+ for (auto v : tail) {
+ remove_edge(v, h.accept, h);
+ }
+
+ for (const auto &c : s) {
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = c;
+ for (auto u : tail) {
+ add_edge(u, v, h);
+ }
+ tail.clear();
+ tail.push_back(v);
+ }
+
+ for (auto v : tail) {
+ add_edge(v, h.accept, h);
+ }
+}
+
flat_set<u32> getTops(const NGHolder &h) {
flat_set<u32> tops;
- for (const auto &e : out_edges_range(h.start, h)) {
+ for (const auto &e : out_edges_range(h.start, h)) {
insert(&tops, h[e].tops);
}
return tops;
@@ -442,165 +442,165 @@ void setTops(NGHolder &h, u32 top) {
for (const auto &e : out_edges_range(h.start, h)) {
assert(h[e].tops.empty());
if (target(e, h) == h.startDs) {
- continue;
- }
+ continue;
+ }
h[e].tops.insert(top);
- }
-}
-
-void clearReports(NGHolder &g) {
- DEBUG_PRINTF("clearing reports without an accept edge\n");
+ }
+}
+
+void clearReports(NGHolder &g) {
+ DEBUG_PRINTF("clearing reports without an accept edge\n");
unordered_set<NFAVertex> allow;
- insert(&allow, inv_adjacent_vertices(g.accept, g));
- insert(&allow, inv_adjacent_vertices(g.acceptEod, g));
- allow.erase(g.accept); // due to stylised edge.
-
- for (auto v : vertices_range(g)) {
- if (contains(allow, v)) {
- continue;
- }
- g[v].reports.clear();
- }
-}
-
-void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) {
- for (auto v : vertices_range(g)) {
- auto &reports = g[v].reports;
- if (contains(reports, r_old)) {
- reports.insert(r_new);
- }
- }
-}
-
-static
-void fillHolderOutEdges(NGHolder &out, const NGHolder &in,
+ insert(&allow, inv_adjacent_vertices(g.accept, g));
+ insert(&allow, inv_adjacent_vertices(g.acceptEod, g));
+ allow.erase(g.accept); // due to stylised edge.
+
+ for (auto v : vertices_range(g)) {
+ if (contains(allow, v)) {
+ continue;
+ }
+ g[v].reports.clear();
+ }
+}
+
+void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) {
+ for (auto v : vertices_range(g)) {
+ auto &reports = g[v].reports;
+ if (contains(reports, r_old)) {
+ reports.insert(r_new);
+ }
+ }
+}
+
+static
+void fillHolderOutEdges(NGHolder &out, const NGHolder &in,
const unordered_map<NFAVertex, NFAVertex> &v_map,
- NFAVertex u) {
- NFAVertex u_new = v_map.at(u);
-
- for (auto e : out_edges_range(u, in)) {
- NFAVertex v = target(e, in);
-
- if (is_special(u, in) && is_special(v, in)) {
- continue;
- }
-
- auto it = v_map.find(v);
- if (it == v_map.end()) {
- continue;
- }
- NFAVertex v_new = it->second;
- assert(!edge(u_new, v_new, out).second);
- add_edge(u_new, v_new, in[e], out);
- }
-}
-
-void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv,
+ NFAVertex u) {
+ NFAVertex u_new = v_map.at(u);
+
+ for (auto e : out_edges_range(u, in)) {
+ NFAVertex v = target(e, in);
+
+ if (is_special(u, in) && is_special(v, in)) {
+ continue;
+ }
+
+ auto it = v_map.find(v);
+ if (it == v_map.end()) {
+ continue;
+ }
+ NFAVertex v_new = it->second;
+ assert(!edge(u_new, v_new, out).second);
+ add_edge(u_new, v_new, in[e], out);
+ }
+}
+
+void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv,
unordered_map<NFAVertex, NFAVertex> *v_map_out) {
- NGHolder &out = *outp;
+ NGHolder &out = *outp;
unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out;
-
- out.kind = in.kind;
-
- for (auto v : vv) {
- if (is_special(v, in)) {
- continue;
- }
- v_map[v] = add_vertex(in[v], out);
- }
-
- for (u32 i = 0; i < N_SPECIALS; i++) {
- v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i);
- }
-
- DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size());
-
- fillHolderOutEdges(out, in, v_map, in.start);
- fillHolderOutEdges(out, in, v_map, in.startDs);
-
- for (auto u : vv) {
- if (is_special(u, in)) {
- continue;
- }
- fillHolderOutEdges(out, in, v_map, u);
- }
-
+
+ out.kind = in.kind;
+
+ for (auto v : vv) {
+ if (is_special(v, in)) {
+ continue;
+ }
+ v_map[v] = add_vertex(in[v], out);
+ }
+
+ for (u32 i = 0; i < N_SPECIALS; i++) {
+ v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i);
+ }
+
+ DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size());
+
+ fillHolderOutEdges(out, in, v_map, in.start);
+ fillHolderOutEdges(out, in, v_map, in.startDs);
+
+ for (auto u : vv) {
+ if (is_special(u, in)) {
+ continue;
+ }
+ fillHolderOutEdges(out, in, v_map, u);
+ }
+
renumber_edges(out);
renumber_vertices(out);
-}
-
-void cloneHolder(NGHolder &out, const NGHolder &in) {
- assert(hasCorrectlyNumberedVertices(in));
+}
+
+void cloneHolder(NGHolder &out, const NGHolder &in) {
+ assert(hasCorrectlyNumberedVertices(in));
assert(hasCorrectlyNumberedVertices(out));
- out.kind = in.kind;
-
- // Note: depending on the state of the input graph, some stylized edges
- // (e.g. start->startDs) may not exist. This must be propagated to the
- // output graph as well.
-
- /* remove the existing special edges */
- clear_vertex(out.startDs, out);
- clear_vertex(out.accept, out);
+ out.kind = in.kind;
+
+ // Note: depending on the state of the input graph, some stylized edges
+ // (e.g. start->startDs) may not exist. This must be propagated to the
+ // output graph as well.
+
+ /* remove the existing special edges */
+ clear_vertex(out.startDs, out);
+ clear_vertex(out.accept, out);
renumber_edges(out);
-
- vector<NFAVertex> out_mapping(num_vertices(in));
- out_mapping[NODE_START] = out.start;
- out_mapping[NODE_START_DOTSTAR] = out.startDs;
- out_mapping[NODE_ACCEPT] = out.accept;
- out_mapping[NODE_ACCEPT_EOD] = out.acceptEod;
-
- for (auto v : vertices_range(in)) {
- u32 i = in[v].index;
-
- /* special vertices are already in the out graph */
- if (i >= N_SPECIALS) {
- assert(!out_mapping[i]);
- out_mapping[i] = add_vertex(in[v], out);
- }
-
- out[out_mapping[i]] = in[v];
- }
-
- for (auto e : edges_range(in)) {
- u32 si = in[source(e, in)].index;
- u32 ti = in[target(e, in)].index;
-
- DEBUG_PRINTF("adding edge %u->%u\n", si, ti);
-
- NFAVertex s = out_mapping[si];
- NFAVertex t = out_mapping[ti];
+
+ vector<NFAVertex> out_mapping(num_vertices(in));
+ out_mapping[NODE_START] = out.start;
+ out_mapping[NODE_START_DOTSTAR] = out.startDs;
+ out_mapping[NODE_ACCEPT] = out.accept;
+ out_mapping[NODE_ACCEPT_EOD] = out.acceptEod;
+
+ for (auto v : vertices_range(in)) {
+ u32 i = in[v].index;
+
+ /* special vertices are already in the out graph */
+ if (i >= N_SPECIALS) {
+ assert(!out_mapping[i]);
+ out_mapping[i] = add_vertex(in[v], out);
+ }
+
+ out[out_mapping[i]] = in[v];
+ }
+
+ for (auto e : edges_range(in)) {
+ u32 si = in[source(e, in)].index;
+ u32 ti = in[target(e, in)].index;
+
+ DEBUG_PRINTF("adding edge %u->%u\n", si, ti);
+
+ NFAVertex s = out_mapping[si];
+ NFAVertex t = out_mapping[ti];
NFAEdge e2 = add_edge(s, t, out);
- out[e2] = in[e];
- }
-
- // Safety checks.
+ out[e2] = in[e];
+ }
+
+ // Safety checks.
assert(num_vertices(in) == num_vertices(out));
assert(num_edges(in) == num_edges(out));
- assert(hasCorrectlyNumberedVertices(out));
-}
-
-void cloneHolder(NGHolder &out, const NGHolder &in,
+ assert(hasCorrectlyNumberedVertices(out));
+}
+
+void cloneHolder(NGHolder &out, const NGHolder &in,
unordered_map<NFAVertex, NFAVertex> *mapping) {
- cloneHolder(out, in);
- vector<NFAVertex> out_verts(num_vertices(in));
- for (auto v : vertices_range(out)) {
- out_verts[out[v].index] = v;
- }
-
- mapping->clear();
-
- for (auto v : vertices_range(in)) {
- (*mapping)[v] = out_verts[in[v].index];
- assert((*mapping)[v]);
- }
-}
-
-unique_ptr<NGHolder> cloneHolder(const NGHolder &in) {
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>();
- cloneHolder(*h, in);
- return h;
-}
-
+ cloneHolder(out, in);
+ vector<NFAVertex> out_verts(num_vertices(in));
+ for (auto v : vertices_range(out)) {
+ out_verts[out[v].index] = v;
+ }
+
+ mapping->clear();
+
+ for (auto v : vertices_range(in)) {
+ (*mapping)[v] = out_verts[in[v].index];
+ assert((*mapping)[v]);
+ }
+}
+
+unique_ptr<NGHolder> cloneHolder(const NGHolder &in) {
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>();
+ cloneHolder(*h, in);
+ return h;
+}
+
void reverseHolder(const NGHolder &g_in, NGHolder &g) {
// Make the BGL do the grunt work.
unordered_map<NFAVertex, NFAVertex> vertexMap;
@@ -734,58 +734,58 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
return delay;
}
-#ifndef NDEBUG
+#ifndef NDEBUG
-bool allMatchStatesHaveReports(const NGHolder &g) {
+bool allMatchStatesHaveReports(const NGHolder &g) {
unordered_set<NFAVertex> reporters;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (g[v].reports.empty()) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
- return false;
- }
+ return false;
+ }
reporters.insert(v);
- }
+ }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue; // stylised edge
- }
- if (g[v].reports.empty()) {
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue; // stylised edge
+ }
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
- return false;
- }
+ return false;
+ }
reporters.insert(v);
- }
-
- for (auto v : vertices_range(g)) {
+ }
+
+ for (auto v : vertices_range(g)) {
if (!contains(reporters, v) && !g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n",
g[v].index);
return false;
- }
- }
+ }
+ }
return true;
-}
-
+}
+
bool isCorrectlyTopped(const NGHolder &g) {
if (is_triggered(g)) {
for (const auto &e : out_edges_range(g.start, g)) {
if (g[e].tops.empty() != (target(e, g) == g.startDs)) {
return false;
}
- }
+ }
} else {
for (const auto &e : out_edges_range(g.start, g)) {
if (!g[e].tops.empty()) {
return false;
}
}
- }
+ }
return true;
-}
-
-#endif // NDEBUG
+}
-} // namespace ue2
+#endif // NDEBUG
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_util.h
index a2d0d9b7d6..0f89b64dc9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_util.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.h
@@ -1,44 +1,44 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous NFA graph utilities.
- */
-#ifndef NG_UTIL_H
-#define NG_UTIL_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous NFA graph utilities.
+ */
+#ifndef NG_UTIL_H
+#define NG_UTIL_H
+
#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ue2common.h"
+#include "ng_holder.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
+#include "util/graph.h"
+#include "util/graph_range.h"
+
#include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor
#include <algorithm>
@@ -46,12 +46,12 @@
#include <unordered_map>
#include <vector>
-namespace ue2 {
-
-struct Grey;
-struct ue2_literal;
-class ReportManager;
-
+namespace ue2 {
+
+struct Grey;
+struct ue2_literal;
+class ReportManager;
+
template<class VertexDepth>
depth maxDistFromInit(const VertexDepth &vd) {
if (vd.fromStart.max.is_unreachable()) {
@@ -62,7 +62,7 @@ depth maxDistFromInit(const VertexDepth &vd) {
return std::max(vd.fromStartDotStar.max, vd.fromStart.max);
}
}
-
+
template<class VertexDepth>
depth maxDistFromStartOfData(const VertexDepth &vd) {
if (vd.fromStartDotStar.max.is_reachable()) {
@@ -73,21 +73,21 @@ depth maxDistFromStartOfData(const VertexDepth &vd) {
}
}
-/** True if the given vertex is a dot (reachable on any character). */
-template<class GraphT>
-static really_inline
-bool is_dot(NFAVertex v, const GraphT &g) {
- return g[v].char_reach.all();
-}
-
-/** adds successors of v to s */
-template<class U>
-static really_inline
-void succ(const NGHolder &g, NFAVertex v, U *s) {
+/** True if the given vertex is a dot (reachable on any character). */
+template<class GraphT>
+static really_inline
+bool is_dot(NFAVertex v, const GraphT &g) {
+ return g[v].char_reach.all();
+}
+
+/** adds successors of v to s */
+template<class U>
+static really_inline
+void succ(const NGHolder &g, NFAVertex v, U *s) {
auto rv = adjacent_vertices(v, g);
s->insert(rv.first, rv.second);
-}
-
+}
+
template<class ContTemp = flat_set<NFAVertex>>
ContTemp succs(NFAVertex u, const NGHolder &g) {
ContTemp rv;
@@ -95,14 +95,14 @@ ContTemp succs(NFAVertex u, const NGHolder &g) {
return rv;
}
-/** adds predecessors of v to s */
-template<class U>
-static really_inline
-void pred(const NGHolder &g, NFAVertex v, U *p) {
+/** adds predecessors of v to s */
+template<class U>
+static really_inline
+void pred(const NGHolder &g, NFAVertex v, U *p) {
auto rv = inv_adjacent_vertices(v, g);
p->insert(rv.first, rv.second);
-}
-
+}
+
template<class ContTemp = flat_set<NFAVertex>>
ContTemp preds(NFAVertex u, const NGHolder &g) {
ContTemp rv;
@@ -110,15 +110,15 @@ ContTemp preds(NFAVertex u, const NGHolder &g) {
return rv;
}
-/** returns a vertex with an out edge from v and is not v.
- * v must have exactly one out-edge excluding self-loops.
+/** returns a vertex with an out edge from v and is not v.
+ * v must have exactly one out-edge excluding self-loops.
* will return NGHolder::null_vertex() if the preconditions don't hold.
- */
-NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
-
-/** Like getSoleDestVertex but for in-edges */
-NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
-
+ */
+NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
+
+/** Like getSoleDestVertex but for in-edges */
+NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
+
/** \brief edge filtered graph.
*
* This will give you a view over the graph that has none of the edges from
@@ -159,159 +159,159 @@ bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) {
return bad_vertex_filter<VertexSet>(v);
}
-/** Visitor that records back edges */
-template <typename BackEdgeSet>
-class BackEdges : public boost::default_dfs_visitor {
-public:
- explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
- template <class EdgeT, class GraphT>
- void back_edge(const EdgeT &e, const GraphT &) {
- backEdges.insert(e); // Remove this back edge only
- }
- BackEdgeSet &backEdges;
-};
-
-/** Returns true if the vertex is either of the real starts (NODE_START,
- * NODE_START_DOTSTAR). */
-template <typename GraphT>
-static really_inline
+/** Visitor that records back edges */
+template <typename BackEdgeSet>
+class BackEdges : public boost::default_dfs_visitor {
+public:
+ explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
+ template <class EdgeT, class GraphT>
+ void back_edge(const EdgeT &e, const GraphT &) {
+ backEdges.insert(e); // Remove this back edge only
+ }
+ BackEdgeSet &backEdges;
+};
+
+/** Returns true if the vertex is either of the real starts (NODE_START,
+ * NODE_START_DOTSTAR). */
+template <typename GraphT>
+static really_inline
bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) {
- u32 i = g[v].index;
- return i == NODE_START || i == NODE_START_DOTSTAR;
-}
-
-bool is_virtual_start(NFAVertex v, const NGHolder &g);
-
-template <typename GraphT>
+ u32 i = g[v].index;
+ return i == NODE_START || i == NODE_START_DOTSTAR;
+}
+
+bool is_virtual_start(NFAVertex v, const NGHolder &g);
+
+template <typename GraphT>
bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) {
- u32 i = g[v].index;
- return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
-}
-
-/** returns true iff v has an edge to accept or acceptEod */
-template <typename GraphT>
+ u32 i = g[v].index;
+ return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
+}
+
+/** returns true iff v has an edge to accept or acceptEod */
+template <typename GraphT>
bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) {
- return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
-}
-
-/** Generate a reverse topological ordering for a back-edge filtered version of
+ return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
+}
+
+/** Generate a reverse topological ordering for a back-edge filtered version of
* our graph (as it must be a DAG and correctly numbered).
*
* Note: we ensure that we produce a topo ordering that begins with acceptEod
* and accept (if present) and ends with startDs followed by start.
*/
-std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
-
-bool onlyOneTop(const NGHolder &g);
-
+std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
+
+bool onlyOneTop(const NGHolder &g);
+
/** Return the set of the tops on the given graph. */
-flat_set<u32> getTops(const NGHolder &h);
-
+flat_set<u32> getTops(const NGHolder &h);
+
/** Initialise the tops on h to the provide top. Assumes that h is triggered and
* no tops have been set on h. */
void setTops(NGHolder &h, u32 top = DEFAULT_TOP);
-/** adds a vertex to g with all the same vertex properties as \p v (aside from
- * index) */
-NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
-
-/**
- * \brief Copies all out-edges from source to target.
- *
- * Edge properties (aside from index) are preserved and duplicate edges are
- * skipped.
- */
-void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
-
-/**
- * \brief Copies all in-edges from source to target.
- *
- * Edge properties (aside from index) are preserved.
- */
-void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
-
-/** \brief True if the graph contains an edge from one of {start, startDs} to
- * one of {accept, acceptEod}. */
-bool isVacuous(const NGHolder &h);
-
-/** \brief True if the graph contains no floating vertices (startDs has no
- * proper successors). */
-bool isAnchored(const NGHolder &h);
-
+/** adds a vertex to g with all the same vertex properties as \p v (aside from
+ * index) */
+NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
+
+/**
+ * \brief Copies all out-edges from source to target.
+ *
+ * Edge properties (aside from index) are preserved and duplicate edges are
+ * skipped.
+ */
+void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
+
+/**
+ * \brief Copies all in-edges from source to target.
+ *
+ * Edge properties (aside from index) are preserved.
+ */
+void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
+
+/** \brief True if the graph contains an edge from one of {start, startDs} to
+ * one of {accept, acceptEod}. */
+bool isVacuous(const NGHolder &h);
+
+/** \brief True if the graph contains no floating vertices (startDs has no
+ * proper successors). */
+bool isAnchored(const NGHolder &h);
+
/** \brief True if the graph contains no anchored vertices (start has no
* successors aside from startDs or vertices connected to startDs). */
bool isFloating(const NGHolder &h);
-/** True if the graph contains no back-edges at all, other than the
- * startDs self-loop. */
-bool isAcyclic(const NGHolder &g);
-
-/** True if the graph has a cycle reachable from the given source vertex. */
-bool hasReachableCycle(const NGHolder &g, NFAVertex src);
-
-/** True if g has any cycles which are not self-loops. */
-bool hasBigCycles(const NGHolder &g);
-
+/** True if the graph contains no back-edges at all, other than the
+ * startDs self-loop. */
+bool isAcyclic(const NGHolder &g);
+
+/** True if the graph has a cycle reachable from the given source vertex. */
+bool hasReachableCycle(const NGHolder &g, NFAVertex src);
+
+/** True if g has any cycles which are not self-loops. */
+bool hasBigCycles(const NGHolder &g);
+
/**
* \brief True if g has at least one non-special vertex with reach smaller than
* max_reach_count. The default of 200 is pretty conservative.
*/
bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200);
-/** Returns the set of all vertices that appear in any of the graph's cycles. */
-std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
-
-bool can_never_match(const NGHolder &g);
-
-/* \brief Does the graph have any edges leading into acceptEod (aside from
- * accept) or will it have after resolving asserts? */
-bool can_match_at_eod(const NGHolder &h);
-
-bool can_only_match_at_eod(const NGHolder &g);
-
-/** \brief Does this graph become a "firehose", matching between every
- * byte? */
-bool matches_everywhere(const NGHolder &h);
-
-
-struct mbsb_cache {
- explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
- std::map<std::pair<u32, u32>, bool> cache;
- const NGHolder &g;
-};
-
-/* weaker than straight domination as allows jump edges */
-bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
- mbsb_cache &cache);
-
-/* adds the literal 's' to the end of the graph before h.accept */
-void appendLiteral(NGHolder &h, const ue2_literal &s);
-
-/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
- * \a in). A vertex mapping is returned in \a v_map_out. */
-void fillHolder(NGHolder *outp, const NGHolder &in,
- const std::deque<NFAVertex> &vv,
+/** Returns the set of all vertices that appear in any of the graph's cycles. */
+std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
+
+bool can_never_match(const NGHolder &g);
+
+/* \brief Does the graph have any edges leading into acceptEod (aside from
+ * accept) or will it have after resolving asserts? */
+bool can_match_at_eod(const NGHolder &h);
+
+bool can_only_match_at_eod(const NGHolder &g);
+
+/** \brief Does this graph become a "firehose", matching between every
+ * byte? */
+bool matches_everywhere(const NGHolder &h);
+
+
+struct mbsb_cache {
+ explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
+ std::map<std::pair<u32, u32>, bool> cache;
+ const NGHolder &g;
+};
+
+/* weaker than straight domination as allows jump edges */
+bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
+ mbsb_cache &cache);
+
+/* adds the literal 's' to the end of the graph before h.accept */
+void appendLiteral(NGHolder &h, const ue2_literal &s);
+
+/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
+ * \a in). A vertex mapping is returned in \a v_map_out. */
+void fillHolder(NGHolder *outp, const NGHolder &in,
+ const std::deque<NFAVertex> &vv,
std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
-
-/** \brief Clone the graph in \a in into graph \a out, returning a vertex
- * mapping in \a v_map_out. */
-void cloneHolder(NGHolder &out, const NGHolder &in,
+
+/** \brief Clone the graph in \a in into graph \a out, returning a vertex
+ * mapping in \a v_map_out. */
+void cloneHolder(NGHolder &out, const NGHolder &in,
std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
-
-/** \brief Clone the graph in \a in into graph \a out. */
-void cloneHolder(NGHolder &out, const NGHolder &in);
-
-/** \brief Build a clone of graph \a in and return a pointer to it. */
-std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
-
-/** \brief Clear all reports on vertices that do not have an edge to accept or
- * acceptEod. */
-void clearReports(NGHolder &g);
-
-/** \brief Add report \a r_new to every vertex that already has report \a
- * r_old. */
-void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
-
+
+/** \brief Clone the graph in \a in into graph \a out. */
+void cloneHolder(NGHolder &out, const NGHolder &in);
+
+/** \brief Build a clone of graph \a in and return a pointer to it. */
+std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
+
+/** \brief Clear all reports on vertices that do not have an edge to accept or
+ * acceptEod. */
+void clearReports(NGHolder &g);
+
+/** \brief Add report \a r_new to every vertex that already has report \a
+ * r_old. */
+void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
+
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
* accepts. */
void reverseHolder(const NGHolder &g, NGHolder &out);
@@ -321,8 +321,8 @@ void reverseHolder(const NGHolder &g, NGHolder &out);
u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 max_delay, bool overhang_ok = true);
-#ifndef NDEBUG
-
+#ifndef NDEBUG
+
// Assertions: only available in internal builds.
/**
@@ -330,8 +330,8 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
* with edges to accept or acceptEod have at least one report ID. Additionally,
* checks that ONLY vertices with edges to accept or acceptEod has reports.
*/
-bool allMatchStatesHaveReports(const NGHolder &g);
-
+bool allMatchStatesHaveReports(const NGHolder &g);
+
/**
* Assertion: returns true if the graph is triggered and all edges out of start
* have tops OR if the graph is not-triggered and all edges out of start have no
@@ -339,7 +339,7 @@ bool allMatchStatesHaveReports(const NGHolder &g);
*/
bool isCorrectlyTopped(const NGHolder &g);
#endif // NDEBUG
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
index d1123dff49..71ec2e4bab 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
@@ -1,143 +1,143 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for vacuous graphs.
- */
-#include "ng_vacuous.h"
-
-#include "grey.h"
-#include "ng.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for vacuous graphs.
+ */
+#include "ng_vacuous.h"
+
+#include "grey.h"
+#include "ng.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
+
+using namespace std;
+
+namespace ue2 {
+
+static
ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
Report ir = rm.getBasicInternalReport(expr);
-
- // Apply any extended params.
+
+ // Apply any extended params.
if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
ir.minOffset = expr.min_offset;
ir.maxOffset = expr.max_offset;
- }
-
+ }
+
assert(!expr.min_length); // should be handled elsewhere.
-
- return rm.getInternalId(ir);
-}
-
-static
+
+ return rm.getInternalId(ir);
+}
+
+static
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
const ReportID r = getInternalId(rm, expr);
-
- boundary.report_at_0_eod.insert(r);
- boundary.report_at_0.insert(r);
-
- // Replace the graph with a '.+'.
-
- clear_graph(g);
- clearReports(g);
- remove_edge(g.start, g.accept, g);
- remove_edge(g.start, g.acceptEod, g);
- remove_edge(g.startDs, g.accept, g);
- remove_edge(g.startDs, g.acceptEod, g);
-
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- g[v].reports.insert(r);
- add_edge(v, v, g);
- add_edge(g.start, v, g);
- add_edge(g.startDs, v, g);
- add_edge(v, g.accept, g);
-}
-
-static
-void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
+
+ boundary.report_at_0_eod.insert(r);
+ boundary.report_at_0.insert(r);
+
+ // Replace the graph with a '.+'.
+
+ clear_graph(g);
+ clearReports(g);
+ remove_edge(g.start, g.accept, g);
+ remove_edge(g.start, g.acceptEod, g);
+ remove_edge(g.startDs, g.accept, g);
+ remove_edge(g.startDs, g.acceptEod, g);
+
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ g[v].reports.insert(r);
+ add_edge(v, v, g);
+ add_edge(g.start, v, g);
+ add_edge(g.startDs, v, g);
+ add_edge(v, g.accept, g);
+}
+
+static
+void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0.insert(getInternalId(rm, expr));
- remove_edge(g.start, g.accept, g);
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
-}
-
-static
-void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.start, g.accept, g);
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+}
+
+static
+void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_eod.insert(getInternalId(rm, expr));
- remove_edge(g.startDs, g.acceptEod, g);
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
- g[g.startDs].reports.clear();
-}
-
-static
-void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.startDs, g.acceptEod, g);
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+ g[g.startDs].reports.clear();
+}
+
+static
+void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0_eod.insert(getInternalId(rm, expr));
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
-}
-
-bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+}
+
+bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
- if (edge(g.startDs, g.accept, g).second) {
- // e.g. '.*'; match "between" every byte
- DEBUG_PRINTF("graph is firehose\n");
+ if (edge(g.startDs, g.accept, g).second) {
+ // e.g. '.*'; match "between" every byte
+ DEBUG_PRINTF("graph is firehose\n");
makeFirehose(boundary, rm, g, expr);
- return true;
- }
-
- bool work_done = false;
-
- if (edge(g.start, g.accept, g).second) {
- DEBUG_PRINTF("creating anchored acceptor\n");
+ return true;
+ }
+
+ bool work_done = false;
+
+ if (edge(g.start, g.accept, g).second) {
+ DEBUG_PRINTF("creating anchored acceptor\n");
makeAnchoredAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- if (edge(g.startDs, g.acceptEod, g).second) {
- DEBUG_PRINTF("creating end-anchored acceptor\n");
+ work_done = true;
+ }
+
+ if (edge(g.startDs, g.acceptEod, g).second) {
+ DEBUG_PRINTF("creating end-anchored acceptor\n");
makeEndAnchoredAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- if (edge(g.start, g.acceptEod, g).second) {
- DEBUG_PRINTF("creating nothing acceptor\n");
+ work_done = true;
+ }
+
+ if (edge(g.start, g.acceptEod, g).second) {
+ DEBUG_PRINTF("creating nothing acceptor\n");
makeNothingAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- return work_done;
-}
-
-} // namespace ue2
+ work_done = true;
+ }
+
+ return work_done;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
index c33cb312de..12ad62d812 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for vacuous graphs.
- */
-
-#ifndef NG_VACUOUS_H
-#define NG_VACUOUS_H
-
-namespace ue2 {
-
-struct BoundaryReports;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for vacuous graphs.
+ */
+
+#ifndef NG_VACUOUS_H
+#define NG_VACUOUS_H
+
+namespace ue2 {
+
+struct BoundaryReports;
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
-// Returns true if a "vacuous" reporter was created.
-bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
+class ReportManager;
+
+// Returns true if a "vacuous" reporter was created.
+bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr);
-
-} // namespace ue2
-
-#endif // NG_VACUOUS_H
+
+} // namespace ue2
+
+#endif // NG_VACUOUS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
index 219241ca55..f2d4fb73e4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
@@ -1,237 +1,237 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for finding the min/max width of the input required to
- * match a pattern.
- */
-#include "ng_width.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/depth.h"
-#include "util/graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for finding the min/max width of the input required to
+ * match a pattern.
+ */
+#include "ng_width.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/depth.h"
+#include "util/graph.h"
#include "util/graph_small_color_map.h"
-
-#include <deque>
-#include <vector>
-
-#include <boost/graph/breadth_first_search.hpp>
-#include <boost/graph/dag_shortest_paths.hpp>
-#include <boost/graph/filtered_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/**
- * Filter out special edges, or in the top-specific variant, start edges that
- * don't have the right top set.
- */
-struct SpecialEdgeFilter {
- SpecialEdgeFilter() {}
- explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {}
+
+#include <deque>
+#include <vector>
+
+#include <boost/graph/breadth_first_search.hpp>
+#include <boost/graph/dag_shortest_paths.hpp>
+#include <boost/graph/filtered_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/**
+ * Filter out special edges, or in the top-specific variant, start edges that
+ * don't have the right top set.
+ */
+struct SpecialEdgeFilter {
+ SpecialEdgeFilter() {}
+ explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {}
SpecialEdgeFilter(const NGHolder &h_in, u32 top_in)
- : h(&h_in), single_top(true), top(top_in) {}
-
- bool operator()(const NFAEdge &e) const {
+ : h(&h_in), single_top(true), top(top_in) {}
+
+ bool operator()(const NFAEdge &e) const {
NFAVertex u = source(e, *h);
NFAVertex v = target(e, *h);
if ((is_any_start(u, *h) && is_any_start(v, *h)) ||
(is_any_accept(u, *h) && is_any_accept(v, *h))) {
- return false;
- }
- if (single_top) {
+ return false;
+ }
+ if (single_top) {
if (u == h->start && !contains((*h)[e].tops, top)) {
- return false;
- }
- if (u == h->startDs) {
- return false;
- }
- }
- return true;
-
- }
-private:
- const NGHolder *h = nullptr;
- bool single_top = false;
- u32 top = 0;
-};
-
-} // namespace
-
-static
-depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
- NFAVertex src) {
- if (isLeafNode(src, h)) {
- return depth::unreachable();
- }
-
+ return false;
+ }
+ if (u == h->startDs) {
+ return false;
+ }
+ }
+ return true;
+
+ }
+private:
+ const NGHolder *h = nullptr;
+ bool single_top = false;
+ u32 top = 0;
+};
+
+} // namespace
+
+static
+depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
+ NFAVertex src) {
+ if (isLeafNode(src, h)) {
+ return depth::unreachable();
+ }
+
boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter);
-
- assert(hasCorrectlyNumberedVertices(h));
- const size_t num = num_vertices(h);
- vector<depth> distance(num, depth::unreachable());
- distance.at(g[src].index) = depth(0);
-
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- // Since we are interested in the single-source shortest paths on a graph
- // with the same weight on every edge, using BFS will be faster than
- // Dijkstra here.
+
+ assert(hasCorrectlyNumberedVertices(h));
+ const size_t num = num_vertices(h);
+ vector<depth> distance(num, depth::unreachable());
+ distance.at(g[src].index) = depth(0);
+
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ // Since we are interested in the single-source shortest paths on a graph
+ // with the same weight on every edge, using BFS will be faster than
+ // Dijkstra here.
breadth_first_search(g, src,
- visitor(make_bfs_visitor(record_distances(
- make_iterator_property_map(distance.begin(), index_map),
+ visitor(make_bfs_visitor(record_distances(
+ make_iterator_property_map(distance.begin(), index_map),
boost::on_tree_edge()))));
-
- DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n",
- distance.at(NODE_ACCEPT).str().c_str(),
- distance.at(NODE_ACCEPT_EOD).str().c_str());
-
- depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD));
-
- if (d.is_unreachable()) {
- return d;
- }
-
- assert(d.is_finite());
- assert(d > depth(0));
- return d - depth(1);
-}
-
-static
-depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
- NFAVertex src) {
+
+ DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n",
+ distance.at(NODE_ACCEPT).str().c_str(),
+ distance.at(NODE_ACCEPT_EOD).str().c_str());
+
+ depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD));
+
+ if (d.is_unreachable()) {
+ return d;
+ }
+
+ assert(d.is_finite());
+ assert(d > depth(0));
+ return d - depth(1);
+}
+
+static
+depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
+ NFAVertex src) {
if (isLeafNode(src, h)) {
- return depth::unreachable();
- }
-
- if (hasReachableCycle(h, src)) {
- // There's a cycle reachable from this src, so we have inf width.
- return depth::infinity();
- }
-
+ return depth::unreachable();
+ }
+
+ if (hasReachableCycle(h, src)) {
+ // There's a cycle reachable from this src, so we have inf width.
+ return depth::infinity();
+ }
+
boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter);
-
- assert(hasCorrectlyNumberedVertices(h));
- const size_t num = num_vertices(h);
- vector<int> distance(num);
+
+ assert(hasCorrectlyNumberedVertices(h));
+ const size_t num = num_vertices(h);
+ vector<int> distance(num);
auto colors = make_small_color_map(h);
-
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- // DAG shortest paths with negative edge weights.
+
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ // DAG shortest paths with negative edge weights.
dag_shortest_paths(g, src,
- distance_map(make_iterator_property_map(distance.begin(), index_map))
- .weight_map(boost::make_constant_property<NFAEdge>(-1))
+ distance_map(make_iterator_property_map(distance.begin(), index_map))
+ .weight_map(boost::make_constant_property<NFAEdge>(-1))
.color_map(colors));
-
- depth acceptDepth, acceptEodDepth;
+
+ depth acceptDepth, acceptEodDepth;
if (get(colors, h.accept) == small_color::white) {
- acceptDepth = depth::unreachable();
- } else {
+ acceptDepth = depth::unreachable();
+ } else {
acceptDepth = depth(-1 * distance.at(NODE_ACCEPT));
- }
+ }
if (get(colors, h.acceptEod) == small_color::white) {
- acceptEodDepth = depth::unreachable();
- } else {
+ acceptEodDepth = depth::unreachable();
+ } else {
acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD));
- }
-
- depth d;
- if (acceptDepth.is_unreachable()) {
- d = acceptEodDepth;
- } else if (acceptEodDepth.is_unreachable()) {
- d = acceptDepth;
- } else {
- d = max(acceptDepth, acceptEodDepth);
- }
-
- if (d.is_unreachable()) {
+ }
+
+ depth d;
+ if (acceptDepth.is_unreachable()) {
+ d = acceptEodDepth;
+ } else if (acceptEodDepth.is_unreachable()) {
+ d = acceptDepth;
+ } else {
+ d = max(acceptDepth, acceptEodDepth);
+ }
+
+ if (d.is_unreachable()) {
assert(findMinWidth(h, filter, src).is_unreachable());
- return d;
- }
-
- // Invert sign and subtract one for start transition.
- assert(d.is_finite() && d > depth(0));
- return d - depth(1);
-}
-
-static
-depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
- depth startDepth = findMinWidth(h, filter, h.start);
- depth dotstarDepth = findMinWidth(h, filter, h.startDs);
- DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
- dotstarDepth.str().c_str());
- if (startDepth.is_unreachable()) {
- assert(dotstarDepth.is_finite());
- return dotstarDepth;
- } else if (dotstarDepth.is_unreachable()) {
- assert(startDepth.is_finite());
- return startDepth;
- } else {
- assert(min(startDepth, dotstarDepth).is_finite());
- return min(startDepth, dotstarDepth);
- }
-}
-
-depth findMinWidth(const NGHolder &h) {
- return findMinWidth(h, SpecialEdgeFilter(h));
-}
-
-depth findMinWidth(const NGHolder &h, u32 top) {
- return findMinWidth(h, SpecialEdgeFilter(h, top));
-}
-
-static
-depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
- depth startDepth = findMaxWidth(h, filter, h.start);
- depth dotstarDepth = findMaxWidth(h, filter, h.startDs);
- DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
- dotstarDepth.str().c_str());
- if (startDepth.is_unreachable()) {
- return dotstarDepth;
- } else if (dotstarDepth.is_unreachable()) {
- return startDepth;
- } else {
- return max(startDepth, dotstarDepth);
- }
-}
-
-depth findMaxWidth(const NGHolder &h) {
- return findMaxWidth(h, SpecialEdgeFilter(h));
-}
-
-depth findMaxWidth(const NGHolder &h, u32 top) {
- return findMaxWidth(h, SpecialEdgeFilter(h, top));
-}
-
-} // namespace ue2
+ return d;
+ }
+
+ // Invert sign and subtract one for start transition.
+ assert(d.is_finite() && d > depth(0));
+ return d - depth(1);
+}
+
+static
+depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
+ depth startDepth = findMinWidth(h, filter, h.start);
+ depth dotstarDepth = findMinWidth(h, filter, h.startDs);
+ DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
+ dotstarDepth.str().c_str());
+ if (startDepth.is_unreachable()) {
+ assert(dotstarDepth.is_finite());
+ return dotstarDepth;
+ } else if (dotstarDepth.is_unreachable()) {
+ assert(startDepth.is_finite());
+ return startDepth;
+ } else {
+ assert(min(startDepth, dotstarDepth).is_finite());
+ return min(startDepth, dotstarDepth);
+ }
+}
+
+depth findMinWidth(const NGHolder &h) {
+ return findMinWidth(h, SpecialEdgeFilter(h));
+}
+
+depth findMinWidth(const NGHolder &h, u32 top) {
+ return findMinWidth(h, SpecialEdgeFilter(h, top));
+}
+
+static
+depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
+ depth startDepth = findMaxWidth(h, filter, h.start);
+ depth dotstarDepth = findMaxWidth(h, filter, h.startDs);
+ DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
+ dotstarDepth.str().c_str());
+ if (startDepth.is_unreachable()) {
+ return dotstarDepth;
+ } else if (dotstarDepth.is_unreachable()) {
+ return startDepth;
+ } else {
+ return max(startDepth, dotstarDepth);
+ }
+}
+
+depth findMaxWidth(const NGHolder &h) {
+ return findMaxWidth(h, SpecialEdgeFilter(h));
+}
+
+depth findMaxWidth(const NGHolder &h, u32 top) {
+ return findMaxWidth(h, SpecialEdgeFilter(h, top));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_width.h
index 871e8a9343..ecc3c100ae 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_width.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.h
@@ -1,74 +1,74 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for finding the min/max width of the input required to
- * match a pattern.
- */
-
-#ifndef NG_WIDTH_H
-#define NG_WIDTH_H
-
-#include "ue2common.h"
-#include "util/depth.h"
-
-namespace ue2 {
-
-class NGHolder;
-
-/**
- * \brief Compute the minimum width in bytes of an input that will match the
- * given graph.
- */
-depth findMinWidth(const NGHolder &h);
-
-/**
- * \brief Compute the minimum width in bytes of an input that will match the
- * given graph, considering only paths activated by the given top.
- */
-depth findMinWidth(const NGHolder &h, u32 top);
-
-/**
- * \brief Compute the maximum width in bytes of an input that will match the
- * given graph.
- *
- * If there is no bound on the maximum width, returns infinity.
- */
-depth findMaxWidth(const NGHolder &h);
-
-/**
- * \brief Compute the maximum width in bytes of an input that will match the
- * given graph, considering only paths activated by the given top.
- *
- * If there is no bound on the maximum width, returns infinity.
- */
-depth findMaxWidth(const NGHolder &h, u32 top);
-
-} // namespace ue2
-
-#endif // NG_WIDTH_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for finding the min/max width of the input required to
+ * match a pattern.
+ */
+
+#ifndef NG_WIDTH_H
+#define NG_WIDTH_H
+
+#include "ue2common.h"
+#include "util/depth.h"
+
+namespace ue2 {
+
+class NGHolder;
+
+/**
+ * \brief Compute the minimum width in bytes of an input that will match the
+ * given graph.
+ */
+depth findMinWidth(const NGHolder &h);
+
+/**
+ * \brief Compute the minimum width in bytes of an input that will match the
+ * given graph, considering only paths activated by the given top.
+ */
+depth findMinWidth(const NGHolder &h, u32 top);
+
+/**
+ * \brief Compute the maximum width in bytes of an input that will match the
+ * given graph.
+ *
+ * If there is no bound on the maximum width, returns infinity.
+ */
+depth findMaxWidth(const NGHolder &h);
+
+/**
+ * \brief Compute the maximum width in bytes of an input that will match the
+ * given graph, considering only paths activated by the given top.
+ *
+ * If there is no bound on the maximum width, returns infinity.
+ */
+depth findMaxWidth(const NGHolder &h, u32 top);
+
+} // namespace ue2
+
+#endif // NG_WIDTH_H