diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch) | |
tree | d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/nfagraph | |
parent | 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff) | |
download | ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph')
102 files changed, 25466 insertions, 25466 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp index 8dccf9863d..6545d55ac0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp @@ -1,229 +1,229 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file * \brief NG and graph handling. - */ + */ #include "ng.h" -#include "grey.h" -#include "ng_anchored_acyclic.h" -#include "ng_anchored_dots.h" -#include "ng_asserts.h" -#include "ng_calc_components.h" -#include "ng_cyclic_redundancy.h" -#include "ng_dump.h" -#include "ng_edge_redundancy.h" -#include "ng_equivalence.h" -#include "ng_extparam.h" -#include "ng_fixed_width.h" +#include "grey.h" +#include "ng_anchored_acyclic.h" +#include "ng_anchored_dots.h" +#include "ng_asserts.h" +#include "ng_calc_components.h" +#include "ng_cyclic_redundancy.h" +#include "ng_dump.h" +#include "ng_edge_redundancy.h" +#include "ng_equivalence.h" +#include "ng_extparam.h" +#include "ng_fixed_width.h" #include "ng_fuzzy.h" -#include "ng_haig.h" -#include "ng_literal_component.h" -#include "ng_literal_decorated.h" -#include "ng_misc_opt.h" -#include "ng_puff.h" -#include "ng_prefilter.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_region_redundancy.h" -#include "ng_reports.h" -#include "ng_sep.h" -#include "ng_small_literal_set.h" -#include "ng_som.h" -#include "ng_vacuous.h" +#include "ng_haig.h" +#include "ng_literal_component.h" +#include "ng_literal_decorated.h" +#include "ng_misc_opt.h" +#include "ng_puff.h" +#include "ng_prefilter.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_region.h" +#include "ng_region_redundancy.h" +#include "ng_reports.h" +#include "ng_sep.h" +#include "ng_small_literal_set.h" +#include "ng_som.h" +#include "ng_vacuous.h" #include "ng_violet.h" -#include "ng_utf8.h" -#include "ng_util.h" -#include "ng_width.h" -#include "ue2common.h" +#include "ng_utf8.h" +#include "ng_util.h" +#include "ng_width.h" +#include "ue2common.h" #include "compiler/compiler.h" -#include "nfa/goughcompile.h" +#include "nfa/goughcompile.h" #include "rose/rose_build.h" -#include "smallwrite/smallwrite_build.h" -#include "util/compile_error.h" -#include "util/container.h" -#include "util/depth.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/ue2string.h" - -using namespace std; - -namespace ue2 { - +#include "smallwrite/smallwrite_build.h" +#include "util/compile_error.h" +#include "util/container.h" +#include "util/depth.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/ue2string.h" + +using namespace std; + +namespace ue2 { + NG::NG(const CompileContext &in_cc, size_t num_patterns, unsigned in_somPrecision) - : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength), - minWidth(depth::infinity()), - rm(in_cc.grey), - ssm(in_somPrecision), - cc(in_cc), + : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength), + minWidth(depth::infinity()), + rm(in_cc.grey), + ssm(in_somPrecision), + cc(in_cc), smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { -} - -NG::~NG() { - // empty -} - -/** \brief SOM handling code, called by \ref addComponent. - * - * \return true if the component was handled completely by something (e.g. a - * Haig outfix), false if SOM could be established but implementation via an - * engine will be required. - * - * \throw CompileError if SOM cannot be supported for the component. - */ -static +} + +NG::~NG() { + // empty +} + +/** \brief SOM handling code, called by \ref addComponent. + * + * \return true if the component was handled completely by something (e.g. a + * Haig outfix), false if SOM could be established but implementation via an + * engine will be required. + * + * \throw CompileError if SOM cannot be supported for the component. + */ +static bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, - const som_type som, const u32 comp_id) { - DEBUG_PRINTF("doing som\n"); + const som_type som, const u32 comp_id) { + DEBUG_PRINTF("doing som\n"); dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); assert(allMatchStatesHaveReports(g)); - - // First, we try the "SOM chain" support in ng_som.cpp. - + + // First, we try the "SOM chain" support in ng_som.cpp. + sombe_rv rv = doSom(ng, g, expr, comp_id, som); - if (rv == SOMBE_HANDLED_INTERNAL) { - return false; - } else if (rv == SOMBE_HANDLED_ALL) { - return true; - } - assert(rv == SOMBE_FAIL); - - /* Next, Sombe style approaches */ + if (rv == SOMBE_HANDLED_INTERNAL) { + return false; + } else if (rv == SOMBE_HANDLED_ALL) { + return true; + } + assert(rv == SOMBE_FAIL); + + /* Next, Sombe style approaches */ rv = doSomWithHaig(ng, g, expr, comp_id, som); - if (rv == SOMBE_HANDLED_INTERNAL) { - return false; - } else if (rv == SOMBE_HANDLED_ALL) { - return true; - } - assert(rv == SOMBE_FAIL); - - // If the previous approach could not support this pattern, we try treating - // it monolithically, as a Haig outfix. - - vector<vector<CharReach> > triggers; /* empty for outfix */ - - assert(g.kind == NFA_OUTFIX); + if (rv == SOMBE_HANDLED_INTERNAL) { + return false; + } else if (rv == SOMBE_HANDLED_ALL) { + return true; + } + assert(rv == SOMBE_FAIL); + + // If the previous approach could not support this pattern, we try treating + // it monolithically, as a Haig outfix. + + vector<vector<CharReach> > triggers; /* empty for outfix */ + + assert(g.kind == NFA_OUTFIX); dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); makeReportsSomPass(ng.rm, g); - auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, - ng.cc.grey); - if (haig) { - DEBUG_PRINTF("built haig outfix\n"); - ng.rose->addOutfix(g, *haig); - return true; - } - - /* Our various strategies for supporting SOM for this pattern have failed. - * Provide a generic pattern not supported/too large return value as it is - * unclear what the meaning of a specific SOM error would be */ + auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, + ng.cc.grey); + if (haig) { + DEBUG_PRINTF("built haig outfix\n"); + ng.rose->addOutfix(g, *haig); + return true; + } + + /* Our various strategies for supporting SOM for this pattern have failed. + * Provide a generic pattern not supported/too large return value as it is + * unclear what the meaning of a specific SOM error would be */ throw CompileError(expr.index, "Pattern is too large."); - - assert(0); // unreachable - return false; -} - -void reduceGraph(NGHolder &g, som_type som, bool utf8, - const CompileContext &cc) { - if (!cc.grey.performGraphSimplification) { - return; - } - - // We run reduction passes until either the graph stops changing or we hit - // a (small) limit. - - if (!som) { - mergeCyclicDotStars(g); - } - - const unsigned MAX_PASSES = 3; - for (unsigned pass = 1; pass <= MAX_PASSES; pass++) { - bool changed = false; - DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES); - changed |= removeEdgeRedundancy(g, som, cc); - changed |= reduceGraphEquivalences(g, cc); - changed |= removeRedundancy(g, som); + + assert(0); // unreachable + return false; +} + +void reduceGraph(NGHolder &g, som_type som, bool utf8, + const CompileContext &cc) { + if (!cc.grey.performGraphSimplification) { + return; + } + + // We run reduction passes until either the graph stops changing or we hit + // a (small) limit. + + if (!som) { + mergeCyclicDotStars(g); + } + + const unsigned MAX_PASSES = 3; + for (unsigned pass = 1; pass <= MAX_PASSES; pass++) { + bool changed = false; + DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES); + changed |= removeEdgeRedundancy(g, som, cc); + changed |= reduceGraphEquivalences(g, cc); + changed |= removeRedundancy(g, som); changed |= removeCyclicPathRedundancy(g); - if (!changed) { - DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass); - break; - } - } - - if (utf8) { - utf8DotRestoration(g, som); - } - - /* Minor non-redundancy improvements */ - if (improveGraph(g, som)) { - /* may be some more edges to remove */ - removeEdgeRedundancy(g, som, cc); - } - - removeCyclicDominated(g, som); - - if (!som) { - mergeCyclicDotStars(g); - } - - if (!som) { - removeSiblingsOfStartDotStar(g); - } -} - -static + if (!changed) { + DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass); + break; + } + } + + if (utf8) { + utf8DotRestoration(g, som); + } + + /* Minor non-redundancy improvements */ + if (improveGraph(g, som)) { + /* may be some more edges to remove */ + removeEdgeRedundancy(g, som, cc); + } + + removeCyclicDominated(g, som); + + if (!som) { + mergeCyclicDotStars(g); + } + + if (!som) { + removeSiblingsOfStartDotStar(g); + } +} + +static bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { - const CompileContext &cc = ng.cc; + const CompileContext &cc = ng.cc; assert(hasCorrectlyNumberedVertices(g)); - - DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", + + DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", expr.index, comp_id, num_vertices(g), num_edges(g)); - + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - + assert(allMatchStatesHaveReports(g)); - + reduceExtendedParams(g, ng.rm, som); reduceGraph(g, som, expr.utf8, cc); - + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); - // There may be redundant regions that we can remove - if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(g, som); - } - + // There may be redundant regions that we can remove + if (cc.grey.performGraphSimplification) { + removeRegionRedundancy(g, som); + } + // We might be done at this point: if we've run out of vertices, we can // stop processing. if (num_vertices(g) == N_SPECIALS) { @@ -231,125 +231,125 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, return true; } - // "Short Exhaustible Passthrough" patterns always become outfixes. - if (!som && isSEP(g, ng.rm, cc.grey)) { - DEBUG_PRINTF("graph is SEP\n"); - if (ng.rose->addOutfix(g)) { - return true; - } - } - - // Start Of Match handling. - if (som) { + // "Short Exhaustible Passthrough" patterns always become outfixes. + if (!som && isSEP(g, ng.rm, cc.grey)) { + DEBUG_PRINTF("graph is SEP\n"); + if (ng.rose->addOutfix(g)) { + return true; + } + } + + // Start Of Match handling. + if (som) { if (addComponentSom(ng, g, expr, som, comp_id)) { - return true; - } - } - + return true; + } + } + assert(allMatchStatesHaveReports(g)); - if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { - return true; - } - - if (handleSmallLiteralSets(*ng.rose, g, cc) - || handleFixedWidth(*ng.rose, g, cc.grey)) { - return true; - } - - if (handleDecoratedLiterals(*ng.rose, g, cc)) { - return true; - } - + if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { + return true; + } + + if (handleSmallLiteralSets(*ng.rose, g, cc) + || handleFixedWidth(*ng.rose, g, cc.grey)) { + return true; + } + + if (handleDecoratedLiterals(*ng.rose, g, cc)) { + return true; + } + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { - return true; - } - + return true; + } + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { - return true; - } - - if (handleSmallLiteralSets(*ng.rose, g, cc) - || handleFixedWidth(*ng.rose, g, cc.grey)) { - return true; - } - - if (handleDecoratedLiterals(*ng.rose, g, cc)) { - return true; - } - + return true; + } + + if (handleSmallLiteralSets(*ng.rose, g, cc) + || handleFixedWidth(*ng.rose, g, cc.grey)) { + return true; + } + + if (handleDecoratedLiterals(*ng.rose, g, cc)) { + return true; + } + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { - return true; - } - - DEBUG_PRINTF("testing for outfix\n"); - assert(allMatchStatesHaveReports(g)); - if (ng.rose->addOutfix(g)) { - return true; - } - - return false; -} - -// Returns true if all components have been added. -static + return true; + } + + DEBUG_PRINTF("testing for outfix\n"); + assert(allMatchStatesHaveReports(g)); + if (ng.rose->addOutfix(g)) { + return true; + } + + return false; +} + +// Returns true if all components have been added. +static bool processComponents(NG &ng, ExpressionInfo &expr, - deque<unique_ptr<NGHolder>> &g_comp, - const som_type som) { - const u32 num_components = g_comp.size(); - - u32 failed = 0; - for (u32 i = 0; i < num_components; i++) { - if (!g_comp[i]) { - continue; - } + deque<unique_ptr<NGHolder>> &g_comp, + const som_type som) { + const u32 num_components = g_comp.size(); + + u32 failed = 0; + for (u32 i = 0; i < num_components; i++) { + if (!g_comp[i]) { + continue; + } if (addComponent(ng, *g_comp[i], expr, som, i)) { - g_comp[i].reset(); - continue; - } - - if (som) { /* bail immediately */ - return false; - } - failed++; - } - - if (!failed) { - DEBUG_PRINTF("all components claimed\n"); - return true; - } - - DEBUG_PRINTF("%u components still remain\n", failed); - return false; -} - + g_comp[i].reset(); + continue; + } + + if (som) { /* bail immediately */ + return false; + } + failed++; + } + + if (!failed) { + DEBUG_PRINTF("all components claimed\n"); + return true; + } + + DEBUG_PRINTF("%u components still remain\n", failed); + return false; +} + bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { assert(g_ptr); NGHolder &g = *g_ptr; - // remove reports that aren't on vertices connected to accept. + // remove reports that aren't on vertices connected to accept. clearReports(g); - + som_type som = expr.som; if (som && isVacuous(g)) { throw CompileError(expr.index, "Start of match is not " - "currently supported for patterns which match an " - "empty buffer."); - } - + "currently supported for patterns which match an " + "empty buffer."); + } + dumpDotWrapper(g, expr, "01_initial", cc.grey); assert(allMatchStatesHaveReports(g)); - - /* ensure utf8 starts at cp boundary */ + + /* ensure utf8 starts at cp boundary */ ensureCodePointStart(rm, g, expr); - + if (can_never_match(g)) { throw CompileError(expr.index, "Pattern can never match."); } - + bool hamming = expr.hamm_distance > 0; u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; - + DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false"); // validate graph's suitability for fuzzing before resolving asserts @@ -367,10 +367,10 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { if (can_never_match(g)) { throw CompileError(expr.index, "Pattern can never match."); - } - + } + optimiseVirtualStarts(g); /* good for som */ - + propagateExtendedParams(g, expr, rm); reduceExtendedParams(g, rm, som); @@ -387,61 +387,61 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { })) { // We have at least one report with a minimum length constraint, which // we currently use SOM to satisfy. - som = SOM_LEFT; - ssm.somPrecision(8); - } - - if (som) { - rose->setSom(); - } - - // first, we can perform graph work that can be done on an individual - // expression basis. - + som = SOM_LEFT; + ssm.somPrecision(8); + } + + if (som) { + rose->setSom(); + } + + // first, we can perform graph work that can be done on an individual + // expression basis. + if (expr.utf8) { relaxForbiddenUtf8(g, expr); - } - + } + if (all_of_in(all_reports(g), [&](ReportID id) { const auto &report = rm.getReport(id); return report.ekey != INVALID_EKEY && !report.minLength && !report.minOffset; })) { - // In highlander mode: if we don't have constraints on our reports that - // may prevent us accepting our first match (i.e. extended params) we - // can prune the other out-edges of all vertices connected to accept. + // In highlander mode: if we don't have constraints on our reports that + // may prevent us accepting our first match (i.e. extended params) we + // can prune the other out-edges of all vertices connected to accept. // TODO: shift the report checking down into pruneHighlanderAccepts() // to allow us to handle the parts we can in mixed cases. pruneHighlanderAccepts(g, rm); - } - + } + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); - - // If we're a vacuous pattern, we can handle this early. + + // If we're a vacuous pattern, we can handle this early. if (splitOffVacuous(boundary, rm, g, expr)) { - DEBUG_PRINTF("split off vacuous\n"); - } - - // We might be done at this point: if we've run out of vertices, we can - // stop processing. + DEBUG_PRINTF("split off vacuous\n"); + } + + // We might be done at this point: if we've run out of vertices, we can + // stop processing. if (num_vertices(g) == N_SPECIALS) { - DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); - return true; - } - - // Now that vacuous edges have been removed, update the min width exclusive - // of boundary reports. + DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); + return true; + } + + // Now that vacuous edges have been removed, update the min width exclusive + // of boundary reports. minWidth = min(minWidth, findMinWidth(g)); - - // Add the pattern to the small write builder. + + // Add the pattern to the small write builder. smwr->add(g, expr); - - if (!som) { + + if (!som) { removeSiblingsOfStartDotStar(g); - } - + } + dumpDotWrapper(g, expr, "03_early", cc.grey); - + // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { removeRedundancy(g, som); @@ -450,177 +450,177 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { dumpDotWrapper(g, expr, "04_reduced", cc.grey); - // If we've got some literals that span the graph from start to accept, we - // can split them off into Rose from here. - if (!som) { + // If we've got some literals that span the graph from start to accept, we + // can split them off into Rose from here. + if (!som) { if (splitOffLiterals(*this, g)) { - DEBUG_PRINTF("some vertices claimed by literals\n"); - } - } - - // We might be done at this point: if we've run out of vertices, we can - // stop processing. + DEBUG_PRINTF("some vertices claimed by literals\n"); + } + } + + // We might be done at this point: if we've run out of vertices, we can + // stop processing. if (num_vertices(g) == N_SPECIALS) { - DEBUG_PRINTF("all vertices claimed before calc components\n"); - return true; - } - + DEBUG_PRINTF("all vertices claimed before calc components\n"); + return true; + } + // Split the graph into a set of connected components and process those. // Note: this invalidates g_ptr. - + auto g_comp = calcComponents(std::move(g_ptr), cc.grey); - assert(!g_comp.empty()); - - if (!som) { + assert(!g_comp.empty()); + + if (!som) { for (auto &gc : g_comp) { assert(gc); reformLeadingDots(*gc); - } - + } + recalcComponents(g_comp, cc.grey); - } - + } + if (processComponents(*this, expr, g_comp, som)) { - return true; - } - - // If we're in prefiltering mode, we can run the prefilter reductions and - // have another shot at accepting the graph. - + return true; + } + + // If we're in prefiltering mode, we can run the prefilter reductions and + // have another shot at accepting the graph. + if (cc.grey.prefilterReductions && expr.prefilter) { for (auto &gc : g_comp) { if (!gc) { - continue; - } + continue; + } prefilterReductions(*gc, cc); - } - + } + if (processComponents(*this, expr, g_comp, som)) { - return true; - } - } - - // We must have components that could not be compiled. - for (u32 i = 0; i < g_comp.size(); i++) { - if (g_comp[i]) { - DEBUG_PRINTF("could not compile component %u with %zu vertices\n", - i, num_vertices(*g_comp[i])); + return true; + } + } + + // We must have components that could not be compiled. + for (u32 i = 0; i < g_comp.size(); i++) { + if (g_comp[i]) { + DEBUG_PRINTF("could not compile component %u with %zu vertices\n", + i, num_vertices(*g_comp[i])); throw CompileError(expr.index, "Pattern is too large."); - } - } - - assert(0); // should have thrown. - return false; -} - -/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ + } + } + + assert(0); // should have thrown. + return false; +} + +/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ bool NG::addHolder(NGHolder &g) { DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); - - /* We don't update the global minWidth here as we care about the min width - * of the whole pattern - not a just a prefix of it. */ - - bool prefilter = false; + + /* We don't update the global minWidth here as we care about the min width + * of the whole pattern - not a just a prefix of it. */ + + bool prefilter = false; //dumpDotComp(comp, g, *this, 20, "prefix_init"); - - som_type som = SOM_NONE; /* the prefixes created by the SOM code do not - themselves track som */ - bool utf8 = false; // handling done earlier + + som_type som = SOM_NONE; /* the prefixes created by the SOM code do not + themselves track som */ + bool utf8 = false; // handling done earlier reduceGraph(g, som, utf8, cc); - - // There may be redundant regions that we can remove - if (cc.grey.performGraphSimplification) { + + // There may be redundant regions that we can remove + if (cc.grey.performGraphSimplification) { removeRegionRedundancy(g, som); - } - - // "Short Exhaustible Passthrough" patterns always become outfixes. + } + + // "Short Exhaustible Passthrough" patterns always become outfixes. if (isSEP(g, rm, cc.grey)) { - DEBUG_PRINTF("graph is SEP\n"); + DEBUG_PRINTF("graph is SEP\n"); if (rose->addOutfix(g)) { - return true; - } - } - + return true; + } + } + if (splitOffAnchoredAcyclic(*rose, g, cc)) { - return true; - } - + return true; + } + if (handleSmallLiteralSets(*rose, g, cc) || handleFixedWidth(*rose, g, cc.grey)) { - return true; - } - + return true; + } + if (handleDecoratedLiterals(*rose, g, cc)) { - return true; - } - + return true; + } + if (doViolet(*rose, g, prefilter, false, rm, cc)) { - return true; - } + return true; + } if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { - return true; - } + return true; + } if (doViolet(*rose, g, prefilter, true, rm, cc)) { - return true; - } - - DEBUG_PRINTF("trying for outfix\n"); + return true; + } + + DEBUG_PRINTF("trying for outfix\n"); if (rose->addOutfix(g)) { - DEBUG_PRINTF("ok\n"); - return true; - } - DEBUG_PRINTF("trying for outfix - failed\n"); - DEBUG_PRINTF("nobody would take us\n"); - return false; -} - -bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, + DEBUG_PRINTF("ok\n"); + return true; + } + DEBUG_PRINTF("trying for outfix - failed\n"); + DEBUG_PRINTF("nobody would take us\n"); + return false; +} + +bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, u32 external_report, bool highlander, som_type som, bool quiet) { - assert(!literal.empty()); - - if (!cc.grey.shortcutLiterals) { - return false; - } - - // We can't natively handle arbitrary literals with mixed case sensitivity - // in Rose -- they require mechanisms like benefits masks, which have - // length limits etc. Better to let those go through full graph processing. - if (mixed_sensitivity(literal)) { - DEBUG_PRINTF("mixed sensitivity\n"); - return false; - } - - // Register external report and validate highlander constraints. - rm.registerExtReport(external_report, - external_report_info(highlander, expr_index)); - - ReportID id; - if (som) { - assert(!highlander); // not allowed, checked earlier. - Report r = makeSomRelativeCallback(external_report, 0, literal.length()); - id = rm.getInternalId(r); - rose->setSom(); - } else { - u32 ekey = highlander ? rm.getExhaustibleKey(external_report) - : INVALID_EKEY; + assert(!literal.empty()); + + if (!cc.grey.shortcutLiterals) { + return false; + } + + // We can't natively handle arbitrary literals with mixed case sensitivity + // in Rose -- they require mechanisms like benefits masks, which have + // length limits etc. Better to let those go through full graph processing. + if (mixed_sensitivity(literal)) { + DEBUG_PRINTF("mixed sensitivity\n"); + return false; + } + + // Register external report and validate highlander constraints. + rm.registerExtReport(external_report, + external_report_info(highlander, expr_index)); + + ReportID id; + if (som) { + assert(!highlander); // not allowed, checked earlier. + Report r = makeSomRelativeCallback(external_report, 0, literal.length()); + id = rm.getInternalId(r); + rose->setSom(); + } else { + u32 ekey = highlander ? rm.getExhaustibleKey(external_report) + : INVALID_EKEY; Report r = makeECallback(external_report, 0, ekey, quiet); - id = rm.getInternalId(r); - } - - DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n", - dumpString(literal).c_str(), id); - - rose->add(false, false, literal, {id}); - - minWidth = min(minWidth, depth(literal.length())); - + id = rm.getInternalId(r); + } + + DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n", + dumpString(literal).c_str(), id); + + rose->add(false, false, literal, {id}); + + minWidth = min(minWidth, depth(literal.length())); + /* inform small write handler about this literal */ smwr->add(literal, id); - - return true; -} - -} // namespace ue2 + + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.h b/contrib/libs/hyperscan/src/nfagraph/ng.h index a5a9077d4f..ed908e9a8d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng.h @@ -1,110 +1,110 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file * \brief NG declaration. - */ - -#ifndef NG_H -#define NG_H - -#include "ng_holder.h" -#include "ue2common.h" -#include "parser/position.h" -#include "som/slot_manager.h" -#include "som/som.h" -#include "util/boundary_reports.h" -#include "util/compile_context.h" -#include "util/depth.h" -#include "util/graph.h" + */ + +#ifndef NG_H +#define NG_H + +#include "ng_holder.h" +#include "ue2common.h" +#include "parser/position.h" +#include "som/slot_manager.h" +#include "som/som.h" +#include "util/boundary_reports.h" +#include "util/compile_context.h" +#include "util/depth.h" +#include "util/graph.h" #include "util/noncopyable.h" -#include "util/report_manager.h" - -#include <deque> -#include <map> -#include <memory> -#include <utility> -#include <vector> - -namespace ue2 { - -struct CompileContext; -struct ue2_literal; - +#include "util/report_manager.h" + +#include <deque> +#include <map> +#include <memory> +#include <utility> +#include <vector> + +namespace ue2 { + +struct CompileContext; +struct ue2_literal; + class ExpressionInfo; -class RoseBuild; -class SmallWriteBuild; - +class RoseBuild; +class SmallWriteBuild; + class NG : noncopyable { -public: +public: NG(const CompileContext &in_cc, size_t num_patterns, unsigned in_somPrecision); - ~NG(); - - /** \brief Consumes a pattern, returns false or throws a CompileError - * exception if the graph cannot be consumed. */ + ~NG(); + + /** \brief Consumes a pattern, returns false or throws a CompileError + * exception if the graph cannot be consumed. */ bool addGraph(ExpressionInfo &expr, std::unique_ptr<NGHolder> g_ptr); - - /** \brief Consumes a graph, cut-down version of addGraph for use by SOM - * processing. */ - bool addHolder(NGHolder &h); - + + /** \brief Consumes a graph, cut-down version of addGraph for use by SOM + * processing. */ + bool addHolder(NGHolder &h); + /** \brief Adds a literal to Rose, used by literal shortcut passes (instead * of using \ref addGraph) */ - bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, + bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool highlander, som_type som, bool quiet); - - /** \brief Maximum history in bytes available for use by SOM reverse NFAs, - * a hack for pattern support (see UE-1903). This is always set to the max - * "lookbehind" length. */ - const u32 maxSomRevHistoryAvailable; - - /** \brief The length of the shortest corpus which can match a pattern - * contained in the NG (excluding the boundary reports used by vacuous - * patterns, which give an effective minWidth of zero). */ - depth minWidth; - - ReportManager rm; - SomSlotManager ssm; - BoundaryReports boundary; - const CompileContext cc; - + + /** \brief Maximum history in bytes available for use by SOM reverse NFAs, + * a hack for pattern support (see UE-1903). This is always set to the max + * "lookbehind" length. */ + const u32 maxSomRevHistoryAvailable; + + /** \brief The length of the shortest corpus which can match a pattern + * contained in the NG (excluding the boundary reports used by vacuous + * patterns, which give an effective minWidth of zero). */ + depth minWidth; + + ReportManager rm; + SomSlotManager ssm; + BoundaryReports boundary; + const CompileContext cc; + const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder. - const std::unique_ptr<RoseBuild> rose; //!< Rose builder. -}; - -/** \brief Run graph reduction passes. - * - * Shared with the small write compiler. - */ + const std::unique_ptr<RoseBuild> rose; //!< Rose builder. +}; + +/** \brief Run graph reduction passes. + * + * Shared with the small write compiler. + */ void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc); - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp index 22e3e49609..6547c7a8e1 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp @@ -1,67 +1,67 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Anchored acyclic graph -> DFA analysis. - */ -#include "ng_anchored_acyclic.h" - -#include "ng_holder.h" -#include "ng_reports.h" -#include "ng_util.h" -#include "ue2common.h" -#include "rose/rose_build.h" -#include "util/compile_context.h" - -namespace ue2 { - -bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h, - const CompileContext &cc) { - if (!cc.grey.allowAnchoredAcyclic) { - return false; - } - - if (!isAnchored(h)) { - DEBUG_PRINTF("fail, not anchored\n"); - return false; - } - - if (!isAcyclic(h)) { - DEBUG_PRINTF("fail, not acyclic\n"); - return false; - } - - if (rose.addAnchoredAcyclic(h)) { - return true; - } else { - DEBUG_PRINTF("failed to add anchored nfa\n"); - return false; - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Anchored acyclic graph -> DFA analysis. + */ +#include "ng_anchored_acyclic.h" + +#include "ng_holder.h" +#include "ng_reports.h" +#include "ng_util.h" +#include "ue2common.h" +#include "rose/rose_build.h" +#include "util/compile_context.h" + +namespace ue2 { + +bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h, + const CompileContext &cc) { + if (!cc.grey.allowAnchoredAcyclic) { + return false; + } + + if (!isAnchored(h)) { + DEBUG_PRINTF("fail, not anchored\n"); + return false; + } + + if (!isAcyclic(h)) { + DEBUG_PRINTF("fail, not acyclic\n"); + return false; + } + + if (rose.addAnchoredAcyclic(h)) { + return true; + } else { + DEBUG_PRINTF("failed to add anchored nfa\n"); + return false; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h index f9bc5d772e..fa4e6199b4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h @@ -1,49 +1,49 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Anchored acyclic graph -> DFA analysis. - */ - -#ifndef NG_ANCHORED_ACYCLIC_H -#define NG_ANCHORED_ACYCLIC_H - -namespace ue2 { - -class NGHolder; -class RoseBuild; -struct CompileContext; - -/** \brief Attempt to consume the entire pattern in graph \a h as an anchored - * acyclic DFA. Returns true if successful. */ -bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NG_ANCHORED_ACYCLIC_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Anchored acyclic graph -> DFA analysis. + */ + +#ifndef NG_ANCHORED_ACYCLIC_H +#define NG_ANCHORED_ACYCLIC_H + +namespace ue2 { + +class NGHolder; +class RoseBuild; +struct CompileContext; + +/** \brief Attempt to consume the entire pattern in graph \a h as an anchored + * acyclic DFA. Returns true if successful. */ +bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h, + const CompileContext &cc); + +} // namespace ue2 + +#endif // NG_ANCHORED_ACYCLIC_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp index 9a13376d19..9a0abb124c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp @@ -1,651 +1,651 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Analysis pass to reform leading dots. - * - * We have found that many regexes found in the wild use an anchored dot-repeat - * to represent an unanchored pattern, particularly if they have been used with - * a regex engine that assumes that a pattern is anchored. This pass reforms - * patterns that begin with sequences of dots into a more standard form. - * - * In addition, both anchored and unanchored patterns with dot repeats as - * prefixes will have these prefixes reformed into a canonical form, which some - * later analyses depend upon. - */ -#include "ng_anchored_dots.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" -#include "util/depth.h" -#include "util/graph_range.h" - -#include <algorithm> -#include <queue> -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -static -bool findStarts(const NGHolder &g, set<NFAVertex> &anchored, - set<NFAVertex> &unanchored) { - // Populate unanchored map - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (is_special(v, g)) { - continue; - } - unanchored.insert(v); - } - - // Populate anchored map - for (auto v : adjacent_vertices_range(g.start, g)) { - if (is_special(v, g)) { - continue; - } - anchored.insert(v); - } - - if (unanchored == anchored) { - anchored.clear(); - } else if (!unanchored.empty() && !anchored.empty()) { - return false; - } - - return !anchored.empty() || !unanchored.empty(); -} - -namespace { -class DotInfo { -public: - DotInfo(NFAVertex v, bool se, u32 idx) - : vertex(v), hasSelfLoop(se), index(idx) {} - - bool operator<(const DotInfo &other) const { - if (hasSelfLoop != other.hasSelfLoop) - return hasSelfLoop < other.hasSelfLoop; - // tie break with vertex id: lowest ID wins - return index > other.index; - } - - NFAVertex vertex; - bool hasSelfLoop; - u32 index; -}; -} - -// Returns nullptr if all vertices in the given set are not dots. -// We can only pick one dot vertex, so we go for a dot-star if it exists, -// otherwise the dot without a self-edge with the lowest ID. -static -NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts, - set<NFAVertex> &otherV) { - priority_queue<DotInfo> dotq; - for (auto v : starts) { - if (is_dot(v, g)) { - u32 idx = g[v].index; - dotq.push(DotInfo(v, hasSelfLoop(v, g), idx)); - } - } - - if (dotq.empty()) { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Analysis pass to reform leading dots. + * + * We have found that many regexes found in the wild use an anchored dot-repeat + * to represent an unanchored pattern, particularly if they have been used with + * a regex engine that assumes that a pattern is anchored. This pass reforms + * patterns that begin with sequences of dots into a more standard form. + * + * In addition, both anchored and unanchored patterns with dot repeats as + * prefixes will have these prefixes reformed into a canonical form, which some + * later analyses depend upon. + */ +#include "ng_anchored_dots.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" +#include "util/depth.h" +#include "util/graph_range.h" + +#include <algorithm> +#include <queue> +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +static +bool findStarts(const NGHolder &g, set<NFAVertex> &anchored, + set<NFAVertex> &unanchored) { + // Populate unanchored map + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (is_special(v, g)) { + continue; + } + unanchored.insert(v); + } + + // Populate anchored map + for (auto v : adjacent_vertices_range(g.start, g)) { + if (is_special(v, g)) { + continue; + } + anchored.insert(v); + } + + if (unanchored == anchored) { + anchored.clear(); + } else if (!unanchored.empty() && !anchored.empty()) { + return false; + } + + return !anchored.empty() || !unanchored.empty(); +} + +namespace { +class DotInfo { +public: + DotInfo(NFAVertex v, bool se, u32 idx) + : vertex(v), hasSelfLoop(se), index(idx) {} + + bool operator<(const DotInfo &other) const { + if (hasSelfLoop != other.hasSelfLoop) + return hasSelfLoop < other.hasSelfLoop; + // tie break with vertex id: lowest ID wins + return index > other.index; + } + + NFAVertex vertex; + bool hasSelfLoop; + u32 index; +}; +} + +// Returns nullptr if all vertices in the given set are not dots. +// We can only pick one dot vertex, so we go for a dot-star if it exists, +// otherwise the dot without a self-edge with the lowest ID. +static +NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts, + set<NFAVertex> &otherV) { + priority_queue<DotInfo> dotq; + for (auto v : starts) { + if (is_dot(v, g)) { + u32 idx = g[v].index; + dotq.push(DotInfo(v, hasSelfLoop(v, g), idx)); + } + } + + if (dotq.empty()) { return NGHolder::null_vertex(); - } - - const DotInfo &dot = dotq.top(); - otherV = starts; - otherV.erase(dot.vertex); - DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index, - dot.hasSelfLoop ? "has self-edge" : "no self-edge"); - DEBUG_PRINTF("%zu other vertices\n", otherV.size()); - return dot.vertex; -} - -// Returns true if the given vertex is only preceded by start. If start is -// graph.startDs (i.e. unanchored), the given vertex can also be connected to -// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored. -static -bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g, - bool selfLoopIsAcceptable) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (selfLoopIsAcceptable && u == v) { - continue; - } else if (u == start) { - continue; - } else if (start == g.startDs && u == g.start) { - continue; - } else { - return false; - } - } - return true; -} - -// Note: this will only remove the anchored first dot in the chain -- any other -// removable nodes will be handled by the unanchored case below. -static -void reformAnchoredRepeatsComponent(NGHolder &g, - set<NFAVertex> &compAnchoredStarts, - set<NFAVertex> &compUnanchoredStarts, - set<NFAVertex> &dead, depth *startBegin, - depth *startEnd) { - // anchored cases can not have any unanchored starts - if (!compUnanchoredStarts.empty()) { - DEBUG_PRINTF("we have unanchored starts, skipping\n"); - return; - } - + } + + const DotInfo &dot = dotq.top(); + otherV = starts; + otherV.erase(dot.vertex); + DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index, + dot.hasSelfLoop ? "has self-edge" : "no self-edge"); + DEBUG_PRINTF("%zu other vertices\n", otherV.size()); + return dot.vertex; +} + +// Returns true if the given vertex is only preceded by start. If start is +// graph.startDs (i.e. unanchored), the given vertex can also be connected to +// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored. +static +bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g, + bool selfLoopIsAcceptable) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (selfLoopIsAcceptable && u == v) { + continue; + } else if (u == start) { + continue; + } else if (start == g.startDs && u == g.start) { + continue; + } else { + return false; + } + } + return true; +} + +// Note: this will only remove the anchored first dot in the chain -- any other +// removable nodes will be handled by the unanchored case below. +static +void reformAnchoredRepeatsComponent(NGHolder &g, + set<NFAVertex> &compAnchoredStarts, + set<NFAVertex> &compUnanchoredStarts, + set<NFAVertex> &dead, depth *startBegin, + depth *startEnd) { + // anchored cases can not have any unanchored starts + if (!compUnanchoredStarts.empty()) { + DEBUG_PRINTF("we have unanchored starts, skipping\n"); + return; + } + NFAVertex dotV = NGHolder::null_vertex(); - set<NFAVertex> otherV; - dotV = findReformable(g, compAnchoredStarts, otherV); + set<NFAVertex> otherV; + dotV = findReformable(g, compAnchoredStarts, otherV); if (dotV == NGHolder::null_vertex()) { - DEBUG_PRINTF("no candidate reformable dot found.\n"); - return; - } - - NFAEdge loopEdge; - bool selfLoop = false; - bool bustOut = false; - - for (const auto &e : out_edges_range(dotV, g)) { - NFAVertex t = target(e, g); - if (t == dotV) { - selfLoop = true; - loopEdge = e; - continue; - } - - if (is_special(t, g)) { - bustOut = true; - break; - } - - if (!otherV.empty() && otherV.find(t) == otherV.end()) { - bustOut = true; - break; - } - } - - if (bustOut) { - DEBUG_PRINTF("busting out\n"); - return; - } - - if (!isStartNode(dotV, g.start, g, true)) { + DEBUG_PRINTF("no candidate reformable dot found.\n"); + return; + } + + NFAEdge loopEdge; + bool selfLoop = false; + bool bustOut = false; + + for (const auto &e : out_edges_range(dotV, g)) { + NFAVertex t = target(e, g); + if (t == dotV) { + selfLoop = true; + loopEdge = e; + continue; + } + + if (is_special(t, g)) { + bustOut = true; + break; + } + + if (!otherV.empty() && otherV.find(t) == otherV.end()) { + bustOut = true; + break; + } + } + + if (bustOut) { + DEBUG_PRINTF("busting out\n"); + return; + } + + if (!isStartNode(dotV, g.start, g, true)) { DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); - return; - } - - /* get bounds */ - depth min; + return; + } + + /* get bounds */ + depth min; depth max(1); - - if (selfLoop) { - // A self-loop indicates that this is a '.+' or '.*' - max = depth::infinity(); - } - - if (!otherV.empty()) { - /* We require that the successors of the dot node are are the same - * as the start vertex. TODO: remember why. - */ - if (selfLoop) { - if (otherV.size() != out_degree(dotV, g) - 1) { - return; - } - } else { - if (otherV.size() != out_degree(dotV, g)) { - return; - } - } - + + if (selfLoop) { + // A self-loop indicates that this is a '.+' or '.*' + max = depth::infinity(); + } + + if (!otherV.empty()) { + /* We require that the successors of the dot node are are the same + * as the start vertex. TODO: remember why. + */ + if (selfLoop) { + if (otherV.size() != out_degree(dotV, g) - 1) { + return; + } + } else { + if (otherV.size() != out_degree(dotV, g)) { + return; + } + } + min = depth(0); - } else { + } else { min = depth(1); - } - - *startBegin = min; - *startEnd = max; - - for (auto t : adjacent_vertices_range(dotV, g)) { - if (t != dotV) { - add_edge_if_not_present(g.startDs, t, g); - add_edge_if_not_present(g.start, t, g); - compUnanchoredStarts.insert(t); - } - } - - for (auto v : otherV) { - remove_edge(g.start, v, g); - } - + } + + *startBegin = min; + *startEnd = max; + + for (auto t : adjacent_vertices_range(dotV, g)) { + if (t != dotV) { + add_edge_if_not_present(g.startDs, t, g); + add_edge_if_not_present(g.start, t, g); + compUnanchoredStarts.insert(t); + } + } + + for (auto v : otherV) { + remove_edge(g.start, v, g); + } + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); - clear_vertex(dotV, g); - dead.insert(dotV); - compAnchoredStarts.erase(dotV); -} - -static -void reformUnanchoredRepeatsComponent(NGHolder &g, - set<NFAVertex> &compAnchoredStarts, - set<NFAVertex> &compUnanchoredStarts, - set<NFAVertex> &dead, - depth *startBegin, depth *startEnd) { - // unanchored cases can not have any anchored starts - if (!compAnchoredStarts.empty()) { - DEBUG_PRINTF("we have anchored starts, skipping\n"); - return; - } - - while (true) { + clear_vertex(dotV, g); + dead.insert(dotV); + compAnchoredStarts.erase(dotV); +} + +static +void reformUnanchoredRepeatsComponent(NGHolder &g, + set<NFAVertex> &compAnchoredStarts, + set<NFAVertex> &compUnanchoredStarts, + set<NFAVertex> &dead, + depth *startBegin, depth *startEnd) { + // unanchored cases can not have any anchored starts + if (!compAnchoredStarts.empty()) { + DEBUG_PRINTF("we have anchored starts, skipping\n"); + return; + } + + while (true) { NFAVertex dotV = NGHolder::null_vertex(); - set<NFAVertex> otherV; - dotV = findReformable(g, compUnanchoredStarts, otherV); + set<NFAVertex> otherV; + dotV = findReformable(g, compUnanchoredStarts, otherV); if (dotV == NGHolder::null_vertex()) { - DEBUG_PRINTF("no candidate reformable dot found.\n"); - return; - } - - NFAEdge loopEdge; - bool selfLoop = false; - bool bustOut = false; - - for (const auto &e : out_edges_range(dotV, g)) { - NFAVertex t = target(e, g); - - if (t == dotV) { - selfLoop = true; - loopEdge = e; - continue; - } - - if (is_special(t, g)) { - bustOut = true; - break; - } - - if (!otherV.empty() && otherV.find(t) == otherV.end()) { - bustOut = true; - break; - } - } - - if (bustOut) { - DEBUG_PRINTF("busting out\n"); - if (!selfLoop) { - return; - } - - for (auto v : otherV) { - if (!edge(dotV, v, g).second) { - return; - } - } - - // A self-loop indicates that this is a '.+' or '.*' + DEBUG_PRINTF("no candidate reformable dot found.\n"); + return; + } + + NFAEdge loopEdge; + bool selfLoop = false; + bool bustOut = false; + + for (const auto &e : out_edges_range(dotV, g)) { + NFAVertex t = target(e, g); + + if (t == dotV) { + selfLoop = true; + loopEdge = e; + continue; + } + + if (is_special(t, g)) { + bustOut = true; + break; + } + + if (!otherV.empty() && otherV.find(t) == otherV.end()) { + bustOut = true; + break; + } + } + + if (bustOut) { + DEBUG_PRINTF("busting out\n"); + if (!selfLoop) { + return; + } + + for (auto v : otherV) { + if (!edge(dotV, v, g).second) { + return; + } + } + + // A self-loop indicates that this is a '.+' or '.*' DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index); - *startEnd = depth::infinity(); - remove_edge(dotV, dotV, g); - return; - } - - if (!isStartNode(dotV, g.startDs, g, true)) { + *startEnd = depth::infinity(); + remove_edge(dotV, dotV, g); + return; + } + + if (!isStartNode(dotV, g.startDs, g, true)) { DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); - return; - } - - /* get bounds */ + return; + } + + /* get bounds */ depth min(1); depth max(1); - - if (selfLoop) { - // A self-loop indicates that this is a '.+' or '.*' - DEBUG_PRINTF("self-loop detected\n"); - max = depth::infinity(); - } - - if (!otherV.empty()) { - if (!selfLoop && otherV.size() != out_degree(dotV, g)) { - return; - } - - if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) { - return; - } - - if (min > depth(1)) { - /* this is not a case we can handle */ - DEBUG_PRINTF("min greater than one, skipping\n"); - return; - } + + if (selfLoop) { + // A self-loop indicates that this is a '.+' or '.*' + DEBUG_PRINTF("self-loop detected\n"); + max = depth::infinity(); + } + + if (!otherV.empty()) { + if (!selfLoop && otherV.size() != out_degree(dotV, g)) { + return; + } + + if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) { + return; + } + + if (min > depth(1)) { + /* this is not a case we can handle */ + DEBUG_PRINTF("min greater than one, skipping\n"); + return; + } min = depth(0); - } - - *startBegin += min; - *startEnd += max; - - for (auto v : otherV) { - remove_edge(g.start, v, g); - remove_edge(g.startDs, v, g); - } - - compUnanchoredStarts.clear(); - for (auto t : adjacent_vertices_range(dotV, g)) { - if (t != dotV) { + } + + *startBegin += min; + *startEnd += max; + + for (auto v : otherV) { + remove_edge(g.start, v, g); + remove_edge(g.startDs, v, g); + } + + compUnanchoredStarts.clear(); + for (auto t : adjacent_vertices_range(dotV, g)) { + if (t != dotV) { DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index); - add_edge(g.startDs, t, g); - add_edge(g.start, t, g); - compUnanchoredStarts.insert(t); - } - } - + add_edge(g.startDs, t, g); + add_edge(g.start, t, g); + compUnanchoredStarts.insert(t); + } + } + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); - dead.insert(dotV); - clear_vertex(dotV, g); - compUnanchoredStarts.erase(dotV); - } -} - -// for t to be another optional dot, it must have only in-edges from v and from -// starts -static -bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) { - if (!is_dot(t, g)) { - return false; - } - - bool found_v = false, found_start = false; - - for (auto u : inv_adjacent_vertices_range(t, g)) { - if (u == v) { - found_v = true; - } else if (u == g.start || u == g.startDs) { - found_start = true; - } else { - return false; - } - } - - return found_v && found_start; -} - -static -bool gatherParticipants(const NGHolder &g, - NFAVertex start, NFAVertex initialDot, - set<NFAVertex> &dots, set<NFAVertex> &succ) { - // Walk the graph downwards from the initial dot; each dot will have: - // 1) a single optional dot successor, or - // 2) N successors (our terminating case) - dots.insert(initialDot); - NFAVertex v = initialDot; - - while (out_degree(v, g) == 1) { - NFAVertex t = *(adjacent_vertices(v, g).first); - // for t to be another optional dot, it must have only in-edges from v - // and from starts - if (isOptionalDot(t, v, g)) { - // another dot; bail if we've seen it once already - if (dots.find(t) != dots.end()) { + dead.insert(dotV); + clear_vertex(dotV, g); + compUnanchoredStarts.erase(dotV); + } +} + +// for t to be another optional dot, it must have only in-edges from v and from +// starts +static +bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) { + if (!is_dot(t, g)) { + return false; + } + + bool found_v = false, found_start = false; + + for (auto u : inv_adjacent_vertices_range(t, g)) { + if (u == v) { + found_v = true; + } else if (u == g.start || u == g.startDs) { + found_start = true; + } else { + return false; + } + } + + return found_v && found_start; +} + +static +bool gatherParticipants(const NGHolder &g, + NFAVertex start, NFAVertex initialDot, + set<NFAVertex> &dots, set<NFAVertex> &succ) { + // Walk the graph downwards from the initial dot; each dot will have: + // 1) a single optional dot successor, or + // 2) N successors (our terminating case) + dots.insert(initialDot); + NFAVertex v = initialDot; + + while (out_degree(v, g) == 1) { + NFAVertex t = *(adjacent_vertices(v, g).first); + // for t to be another optional dot, it must have only in-edges from v + // and from starts + if (isOptionalDot(t, v, g)) { + // another dot; bail if we've seen it once already + if (dots.find(t) != dots.end()) { DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index); - return false; - } - dots.insert(t); - v = t; - continue; - } - // otherwise, we found a terminating dot state - break; - } - - // Our terminating states are the successors of v. - // All of these MUST have an edge from start as well. - for (auto w : adjacent_vertices_range(v, g)) { - succ.insert(w); - if (!edge(start, w, g).second) { + return false; + } + dots.insert(t); + v = t; + continue; + } + // otherwise, we found a terminating dot state + break; + } + + // Our terminating states are the successors of v. + // All of these MUST have an edge from start as well. + for (auto w : adjacent_vertices_range(v, g)) { + succ.insert(w); + if (!edge(start, w, g).second) { DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n", - g[w].index); - return false; - } - } - - /* All the non chained v connected to start must be in succ as well - * TODO: remember why (and document). */ - for (auto u : adjacent_vertices_range(start, g)) { - if (is_special(u, g)) { - continue; - } - if (!contains(dots, u) && !contains(succ, u)) { - return false; - } - } - - return !succ.empty(); -} - -static -void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, - set<NFAVertex> &dead, UNUSED depth *startBegin, - depth *startEnd) { - // Handle optional dot repeat prefixes, e.g. - // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs - // Note that this code assumes that fixed repeats ('^.{5,20}') have been - // pruned already, down (in this case) to '^.{0,15}'. - - // The first of our optional dots must be connected to start. The jump edge - // past it will be verified in gatherParticipants(). If start is - // graph.start, it should not be connected to startDs. + g[w].index); + return false; + } + } + + /* All the non chained v connected to start must be in succ as well + * TODO: remember why (and document). */ + for (auto u : adjacent_vertices_range(start, g)) { + if (is_special(u, g)) { + continue; + } + if (!contains(dots, u) && !contains(succ, u)) { + return false; + } + } + + return !succ.empty(); +} + +static +void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, + set<NFAVertex> &dead, UNUSED depth *startBegin, + depth *startEnd) { + // Handle optional dot repeat prefixes, e.g. + // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs + // Note that this code assumes that fixed repeats ('^.{5,20}') have been + // pruned already, down (in this case) to '^.{0,15}'. + + // The first of our optional dots must be connected to start. The jump edge + // past it will be verified in gatherParticipants(). If start is + // graph.start, it should not be connected to startDs. NFAVertex initialDot = NGHolder::null_vertex(); - for (auto v : adjacent_vertices_range(start, g)) { - if (is_special(v, g)) { - continue; - } - if (is_dot(v, g) && isStartNode(v, start, g, false)) { - if (initialDot) { - return; - } - initialDot = v; + for (auto v : adjacent_vertices_range(start, g)) { + if (is_special(v, g)) { + continue; + } + if (is_dot(v, g) && isStartNode(v, start, g, false)) { + if (initialDot) { + return; + } + initialDot = v; DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index); - } - } - - if (!initialDot) { - return; - } - - // Collect all the other optional dot vertices and the successor vertices - // by walking down the graph from initialDot - set<NFAVertex> dots, succ; - if (!gatherParticipants(g, start, initialDot, dots, succ)) { - DEBUG_PRINTF("gatherParticipants failed\n"); - return; - } - - DEBUG_PRINTF("optional dot repeat with %zu participants, " - "terminating in %zu non-dot nodes\n", - dots.size(), succ.size()); - - // Remove all the participants and set the start offset - dead.insert(dots.begin(), dots.end()); - - DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(), - startEnd->str().c_str()); - - if (start == g.start && startEnd->is_infinite()) { + } + } + + if (!initialDot) { + return; + } + + // Collect all the other optional dot vertices and the successor vertices + // by walking down the graph from initialDot + set<NFAVertex> dots, succ; + if (!gatherParticipants(g, start, initialDot, dots, succ)) { + DEBUG_PRINTF("gatherParticipants failed\n"); + return; + } + + DEBUG_PRINTF("optional dot repeat with %zu participants, " + "terminating in %zu non-dot nodes\n", + dots.size(), succ.size()); + + // Remove all the participants and set the start offset + dead.insert(dots.begin(), dots.end()); + + DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(), + startEnd->str().c_str()); + + if (start == g.start && startEnd->is_infinite()) { *startEnd = depth(dots.size()); - } else if (startEnd->is_finite()) { - *startEnd += dots.size(); - } - assert(startEnd->is_reachable()); - - // Connect our successor vertices to both start and startDs. + } else if (startEnd->is_finite()) { + *startEnd += dots.size(); + } + assert(startEnd->is_reachable()); + + // Connect our successor vertices to both start and startDs. for (auto v : succ) { - add_edge_if_not_present(g.start, v, g); - add_edge_if_not_present(g.startDs, v, g); - } -} - -static -void deleteVertices(set<NFAVertex> &dead, NGHolder &g) { - if (!dead.empty()) { - DEBUG_PRINTF("pruning %zu vertices\n", dead.size()); - remove_vertices(dead, g); - } - dead.clear(); -} - -static -void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) { - DEBUG_PRINTF("component\n"); - set<NFAVertex> anchored, unanchored, dead; - if (!findStarts(g, anchored, unanchored)) { - DEBUG_PRINTF("no starts\n"); - return; - } - - reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin, - startEnd); - deleteVertices(dead, g); - - reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin, - startEnd); - deleteVertices(dead, g); -} - -static -void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) { - DEBUG_PRINTF("collapseVariableRepeats\n"); - set<NFAVertex> dead; - - collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd); - deleteVertices(dead, g); - - collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd); - deleteVertices(dead, g); -} - -static -void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs, - depth min_repeat, depth max_repeat) { - const bool unbounded = max_repeat.is_infinite(); - if (unbounded) { - max_repeat = min_repeat; - } - - assert(max_repeat.is_finite()); - - NFAVertex u = lhs; - - if (!min_repeat && unbounded) { - NFAVertex v = add_vertex(g); - add_edge(u, v, g); - g[v].char_reach.setall(); - - for (auto w : rhs) { - add_edge(lhs, w, g); - } - } - - for (u32 i = 0; i < min_repeat; i++) { - NFAVertex v = add_vertex(g); - add_edge(u, v, g); - g[v].char_reach.setall(); - u = v; - } - - NFAVertex split = u; - /* lhs now split point for optional */ - for (u32 i = min_repeat; i < max_repeat; i++) { - NFAVertex v = add_vertex(g); - add_edge(u, v, g); - if (u != split) { - add_edge(split, v, g); - } - g[v].char_reach.setall(); - u = v; - } - - if (unbounded) { - add_edge(u, u, g); - } - - for (auto w : rhs) { - add_edge(u, w, g); - if (split != u) { - add_edge(split, w, g); - } - } -} - -static -void restoreLeadingDots(NGHolder &g, const depth &startBegin, - const depth &startEnd) { - if (startBegin == depth(0) && startEnd.is_infinite()) { - return; - } - DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(), - startEnd.str().c_str()); - - for (UNUSED auto v : adjacent_vertices_range(g.start, g)) { - assert(edge(g.startDs, v, g).second); - } - clear_out_edges(g.start, g); - add_edge(g.start, g.startDs, g); - - const bool unbounded = startEnd.is_infinite(); - - NFAVertex root = unbounded ? g.startDs : g.start; - - vector<NFAVertex> rhs; - insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g)); - rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end()); - for (auto v : rhs) { - remove_edge(g.startDs, v, g); - } - - addDotsBetween(g, root, rhs, startBegin, startEnd); + add_edge_if_not_present(g.start, v, g); + add_edge_if_not_present(g.startDs, v, g); + } +} + +static +void deleteVertices(set<NFAVertex> &dead, NGHolder &g) { + if (!dead.empty()) { + DEBUG_PRINTF("pruning %zu vertices\n", dead.size()); + remove_vertices(dead, g); + } + dead.clear(); +} + +static +void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) { + DEBUG_PRINTF("component\n"); + set<NFAVertex> anchored, unanchored, dead; + if (!findStarts(g, anchored, unanchored)) { + DEBUG_PRINTF("no starts\n"); + return; + } + + reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin, + startEnd); + deleteVertices(dead, g); + + reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin, + startEnd); + deleteVertices(dead, g); +} + +static +void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) { + DEBUG_PRINTF("collapseVariableRepeats\n"); + set<NFAVertex> dead; + + collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd); + deleteVertices(dead, g); + + collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd); + deleteVertices(dead, g); +} + +static +void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs, + depth min_repeat, depth max_repeat) { + const bool unbounded = max_repeat.is_infinite(); + if (unbounded) { + max_repeat = min_repeat; + } + + assert(max_repeat.is_finite()); + + NFAVertex u = lhs; + + if (!min_repeat && unbounded) { + NFAVertex v = add_vertex(g); + add_edge(u, v, g); + g[v].char_reach.setall(); + + for (auto w : rhs) { + add_edge(lhs, w, g); + } + } + + for (u32 i = 0; i < min_repeat; i++) { + NFAVertex v = add_vertex(g); + add_edge(u, v, g); + g[v].char_reach.setall(); + u = v; + } + + NFAVertex split = u; + /* lhs now split point for optional */ + for (u32 i = min_repeat; i < max_repeat; i++) { + NFAVertex v = add_vertex(g); + add_edge(u, v, g); + if (u != split) { + add_edge(split, v, g); + } + g[v].char_reach.setall(); + u = v; + } + + if (unbounded) { + add_edge(u, u, g); + } + + for (auto w : rhs) { + add_edge(u, w, g); + if (split != u) { + add_edge(split, w, g); + } + } +} + +static +void restoreLeadingDots(NGHolder &g, const depth &startBegin, + const depth &startEnd) { + if (startBegin == depth(0) && startEnd.is_infinite()) { + return; + } + DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(), + startEnd.str().c_str()); + + for (UNUSED auto v : adjacent_vertices_range(g.start, g)) { + assert(edge(g.startDs, v, g).second); + } + clear_out_edges(g.start, g); + add_edge(g.start, g.startDs, g); + + const bool unbounded = startEnd.is_infinite(); + + NFAVertex root = unbounded ? g.startDs : g.start; + + vector<NFAVertex> rhs; + insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g)); + rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end()); + for (auto v : rhs) { + remove_edge(g.startDs, v, g); + } + + addDotsBetween(g, root, rhs, startBegin, startEnd); renumber_vertices(g); renumber_edges(g); -} - -// Entry point. -void reformLeadingDots(NGHolder &g) { - depth startBegin(0); - depth startEnd = depth::infinity(); - - reformAnchoredRepeats(g, &startBegin, &startEnd); - collapseVariableRepeats(g, &startBegin, &startEnd); - restoreLeadingDots(g, startBegin, startEnd); -} - -} // namespace ue2 +} + +// Entry point. +void reformLeadingDots(NGHolder &g) { + depth startBegin(0); + depth startEnd = depth::infinity(); + + reformAnchoredRepeats(g, &startBegin, &startEnd); + collapseVariableRepeats(g, &startBegin, &startEnd); + restoreLeadingDots(g, startBegin, startEnd); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h index 8454c31941..d5ffee2d79 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h @@ -1,45 +1,45 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Analysis pass to reform leading dots. - */ - -#ifndef NG_ANCHORED_BOUNDED_REPEATS_H -#define NG_ANCHORED_BOUNDED_REPEATS_H - -namespace ue2 { - -class NGHolder; - -/* should not be used if SoM is required */ -void reformLeadingDots(NGHolder &g); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Analysis pass to reform leading dots. + */ + +#ifndef NG_ANCHORED_BOUNDED_REPEATS_H +#define NG_ANCHORED_BOUNDED_REPEATS_H + +namespace ue2 { + +class NGHolder; + +/* should not be used if SoM is required */ +void reformLeadingDots(NGHolder &g); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp index 8812afadb7..24d4ecace1 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp @@ -1,558 +1,558 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Resolve special assert vertices. - * - * The assert resolution algorithm proceeds by iterating over those edges with - * assertion flags, considering source and target vertices of each edge. If a - * vertex has a superset of the reachability demanded by the assertion on the - * edge, it is split into alternatives providing the word and non-word paths - * through that vertex. - * - * A great deal of the complexity in the resolveAsserts pass is devoted to - * handling these assertions when the UCP flag is specified (meaning \\w and \\W - * are implemented with Unicode properties, rather than their ASCII - * interpretation) and the prefiltering flag is also used. Complete, - * non-prefiltering UCP support is not available yet. - */ -#include "ng_asserts.h" - -#include "ng.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_util.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Resolve special assert vertices. + * + * The assert resolution algorithm proceeds by iterating over those edges with + * assertion flags, considering source and target vertices of each edge. If a + * vertex has a superset of the reachability demanded by the assertion on the + * edge, it is split into alternatives providing the word and non-word paths + * through that vertex. + * + * A great deal of the complexity in the resolveAsserts pass is devoted to + * handling these assertions when the UCP flag is specified (meaning \\w and \\W + * are implemented with Unicode properties, rather than their ASCII + * interpretation) and the prefiltering flag is also used. Complete, + * non-prefiltering UCP support is not available yet. + */ +#include "ng_asserts.h" + +#include "ng.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_util.h" #include "compiler/compiler.h" -#include "parser/position.h" // for POS flags -#include "util/bitutils.h" // for findAndClearLSB_32 -#include "util/boundary_reports.h" -#include "util/container.h" -#include "util/compile_context.h" -#include "util/compile_error.h" -#include "util/graph_range.h" -#include "util/report_manager.h" -#include "util/unicode_def.h" - -#include <queue> - -using namespace std; - -namespace ue2 { - -/** \brief Hard limit on the maximum number of vertices we'll clone before we - * throw up our hands and report 'Pattern too large.' */ -static const size_t MAX_CLONED_VERTICES = 2048; - -/** \brief The definition of \\w, since we use it everywhere in here. */ -static const CharReach CHARREACH_WORD(CharReach('a', 'z') | - CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_')); - -/** \brief \\W is the inverse of \\w */ -static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD); - -/** \brief Prefiltering definition of \\w for UCP mode. - * - * Includes all high bytes as to capture all non-ASCII, however depending on - * direction only continuers or starters are strictly required - as the input - * is well-formed, this laxness will not cost us. */ -static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD - | CharReach(128, 255)); - -/** \brief Prefiltering definition of \\W for UCP Mode. - * - * (non-word already includes high bytes) */ -static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD); - -/** \brief Find all the edges with assertion flags. */ -static -vector<NFAEdge> getAsserts(const NGHolder &g) { - vector<NFAEdge> out; - for (const auto &e : edges_range(g)) { - if (g[e].assert_flags) { - out.push_back(e); - } - } - return out; -} - -static -void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) { +#include "parser/position.h" // for POS flags +#include "util/bitutils.h" // for findAndClearLSB_32 +#include "util/boundary_reports.h" +#include "util/container.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/graph_range.h" +#include "util/report_manager.h" +#include "util/unicode_def.h" + +#include <queue> + +using namespace std; + +namespace ue2 { + +/** \brief Hard limit on the maximum number of vertices we'll clone before we + * throw up our hands and report 'Pattern too large.' */ +static const size_t MAX_CLONED_VERTICES = 2048; + +/** \brief The definition of \\w, since we use it everywhere in here. */ +static const CharReach CHARREACH_WORD(CharReach('a', 'z') | + CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_')); + +/** \brief \\W is the inverse of \\w */ +static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD); + +/** \brief Prefiltering definition of \\w for UCP mode. + * + * Includes all high bytes as to capture all non-ASCII, however depending on + * direction only continuers or starters are strictly required - as the input + * is well-formed, this laxness will not cost us. */ +static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD + | CharReach(128, 255)); + +/** \brief Prefiltering definition of \\W for UCP Mode. + * + * (non-word already includes high bytes) */ +static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD); + +/** \brief Find all the edges with assertion flags. */ +static +vector<NFAEdge> getAsserts(const NGHolder &g) { + vector<NFAEdge> out; + for (const auto &e : edges_range(g)) { + if (g[e].assert_flags) { + out.push_back(e); + } + } + return out; +} + +static +void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) { DEBUG_PRINTF("%zu needs splitting\n", g[v].index); - to_split->emplace(g[v].index, v); -} - -/** \brief Find vertices that need to be split due to an assertion edge. - * - * A vertex needs to be split if has an edge to/from it with an assert with a - * restriction on the relevant end. */ -static -void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts, - map<u32, NFAVertex> *to_split, - map<u32, NFAVertex> *to_split_ucp) { - for (const auto &e : asserts) { - NFAVertex u = source(e, g); - NFAVertex v = target(e, g); - u32 flags = g[e].assert_flags; - assert(flags); - - const CharReach &u_cr = g[u].char_reach; - const CharReach &v_cr = g[v].char_reach; - - bool ucp_assert = flags & UCP_ASSERT_FLAGS; - bool normal_assert = flags & NON_UCP_ASSERT_FLAGS; - /* In reality, an expression can only be entirely ucp or not ucp */ - assert(ucp_assert != normal_assert); - - if (normal_assert) { - /* assume any flag results in us have to split if the vertex is not - * a subset of word or completely disjoint from it. We could be more - * nuanced if flags is a disjunction of multiple assertions. */ - if (!u_cr.isSubsetOf(CHARREACH_WORD) - && !u_cr.isSubsetOf(CHARREACH_NONWORD) - && u != g.start) { /* start is always considered a nonword */ - addToSplit(g, u, to_split); - } - - if (!v_cr.isSubsetOf(CHARREACH_WORD) - && !v_cr.isSubsetOf(CHARREACH_NONWORD) - && v != g.accept /* accept require special handling, done on a - * per edge basis in resolve asserts - */ - && v != g.acceptEod) { /* eod is always considered a nonword */ - addToSplit(g, v, to_split); - } - } - - if (ucp_assert) { - /* note: the ucp prefilter crs overlap - requires a bit more care */ - if (u == g.start) { /* start never needs to be split, - * treat nonword */ - } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) { - if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) - && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { - addToSplit(g, u, to_split_ucp); - } - } else { - assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP); - if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) - && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { - addToSplit(g, u, to_split_ucp); - } - } - - if (v == g.acceptEod /* eod is always considered a nonword */ - || v == g.accept) { /* accept require special handling, done on - * a per edge basis in resolve asserts */ - } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) { - if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) - && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { - addToSplit(g, v, to_split_ucp); - } - } else { - assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP); - if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) - && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { - addToSplit(g, v, to_split_ucp); - } - } - } - } -} - -static + to_split->emplace(g[v].index, v); +} + +/** \brief Find vertices that need to be split due to an assertion edge. + * + * A vertex needs to be split if has an edge to/from it with an assert with a + * restriction on the relevant end. */ +static +void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts, + map<u32, NFAVertex> *to_split, + map<u32, NFAVertex> *to_split_ucp) { + for (const auto &e : asserts) { + NFAVertex u = source(e, g); + NFAVertex v = target(e, g); + u32 flags = g[e].assert_flags; + assert(flags); + + const CharReach &u_cr = g[u].char_reach; + const CharReach &v_cr = g[v].char_reach; + + bool ucp_assert = flags & UCP_ASSERT_FLAGS; + bool normal_assert = flags & NON_UCP_ASSERT_FLAGS; + /* In reality, an expression can only be entirely ucp or not ucp */ + assert(ucp_assert != normal_assert); + + if (normal_assert) { + /* assume any flag results in us have to split if the vertex is not + * a subset of word or completely disjoint from it. We could be more + * nuanced if flags is a disjunction of multiple assertions. */ + if (!u_cr.isSubsetOf(CHARREACH_WORD) + && !u_cr.isSubsetOf(CHARREACH_NONWORD) + && u != g.start) { /* start is always considered a nonword */ + addToSplit(g, u, to_split); + } + + if (!v_cr.isSubsetOf(CHARREACH_WORD) + && !v_cr.isSubsetOf(CHARREACH_NONWORD) + && v != g.accept /* accept require special handling, done on a + * per edge basis in resolve asserts + */ + && v != g.acceptEod) { /* eod is always considered a nonword */ + addToSplit(g, v, to_split); + } + } + + if (ucp_assert) { + /* note: the ucp prefilter crs overlap - requires a bit more care */ + if (u == g.start) { /* start never needs to be split, + * treat nonword */ + } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) { + if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) + && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { + addToSplit(g, u, to_split_ucp); + } + } else { + assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP); + if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) + && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { + addToSplit(g, u, to_split_ucp); + } + } + + if (v == g.acceptEod /* eod is always considered a nonword */ + || v == g.accept) { /* accept require special handling, done on + * a per edge basis in resolve asserts */ + } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) { + if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) + && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { + addToSplit(g, v, to_split_ucp); + } + } else { + assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP); + if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) + && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { + addToSplit(g, v, to_split_ucp); + } + } + } + } +} + +static void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, NFAVertex v, s32 adj) { - // Don't try and set the report ID of a special vertex. - assert(!is_special(v, g)); - - // If there's a report set already, we're replacing it. - g[v].reports.clear(); - + // Don't try and set the report ID of a special vertex. + assert(!is_special(v, g)); + + // If there's a report set already, we're replacing it. + g[v].reports.clear(); + Report ir = rm.getBasicInternalReport(expr, adj); - - g[v].reports.insert(rm.getInternalId(ir)); + + g[v].reports.insert(rm.getInternalId(ir)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); -} - -static +} + +static NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, NFAVertex v, const CharReach &cr_mask) { - NFAVertex clone = clone_vertex(g, v); - g[clone].char_reach &= cr_mask; - clone_out_edges(g, v, clone); - clone_in_edges(g, v, clone); - - if (v == g.startDs) { + NFAVertex clone = clone_vertex(g, v); + g[clone].char_reach &= cr_mask; + clone_out_edges(g, v, clone); + clone_in_edges(g, v, clone); + + if (v == g.startDs) { if (expr.utf8) { - g[clone].char_reach &= ~UTF_START_CR; - } - - DEBUG_PRINTF("marked as virt\n"); - g[clone].assert_flags = POS_FLAG_VIRTUAL_START; - + g[clone].char_reach &= ~UTF_START_CR; + } + + DEBUG_PRINTF("marked as virt\n"); + g[clone].assert_flags = POS_FLAG_VIRTUAL_START; + setReportId(rm, g, expr, clone, 0); - } - - return clone; -} - -static + } + + return clone; +} + +static void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, NFAVertex v, bool ucp) { - assert(v != g.start); - assert(v != g.accept); - assert(v != g.acceptEod); + assert(v != g.start); + assert(v != g.accept); + assert(v != g.acceptEod); DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp); - - CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; - CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; - - auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; - - // Split v into word/nonword vertices with only asserting out-edges. + + CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; + CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; + + auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; + + // Split v into word/nonword vertices with only asserting out-edges. NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); - remove_out_edge_if(w_out, has_no_assert, g); - remove_out_edge_if(nw_out, has_no_assert, g); - - // Split v into word/nonword vertices with only asserting in-edges. + remove_out_edge_if(w_out, has_no_assert, g); + remove_out_edge_if(nw_out, has_no_assert, g); + + // Split v into word/nonword vertices with only asserting in-edges. NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); - remove_in_edge_if(w_in, has_no_assert, g); - remove_in_edge_if(nw_in, has_no_assert, g); - - // Prune edges with asserts from original v. - auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; }; - remove_in_edge_if(v, has_assert, g); - remove_out_edge_if(v, has_assert, g); -} - -static + remove_in_edge_if(w_in, has_no_assert, g); + remove_in_edge_if(nw_in, has_no_assert, g); + + // Prune edges with asserts from original v. + auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; }; + remove_in_edge_if(v, has_assert, g); + remove_out_edge_if(v, has_assert, g); +} + +static void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, set<NFAEdge> *dead) { - for (const auto &e : edges_range(g)) { - u32 flags = g[e].assert_flags; - if (!flags) { - continue; - } - - NFAVertex u = source(e, g); - NFAVertex v = target(e, g); - - assert(u != g.startDs); - - const CharReach &u_cr = g[u].char_reach; - const CharReach &v_cr = g[v].char_reach; - - bool impassable = true; - bool ucp = flags & UCP_ASSERT_FLAGS; + for (const auto &e : edges_range(g)) { + u32 flags = g[e].assert_flags; + if (!flags) { + continue; + } + + NFAVertex u = source(e, g); + NFAVertex v = target(e, g); + + assert(u != g.startDs); + + const CharReach &u_cr = g[u].char_reach; + const CharReach &v_cr = g[v].char_reach; + + bool impassable = true; + bool ucp = flags & UCP_ASSERT_FLAGS; DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n", g[u].index, g[v].index, flags, (int)ucp); - while (flags && impassable) { - u32 flag = 1U << findAndClearLSB_32(&flags); - switch (flag) { - case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: - case POS_FLAG_ASSERT_NONWORD_TO_WORD: - if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) { - continue; - } - break; - case POS_FLAG_ASSERT_WORD_TO_NONWORD: - case POS_FLAG_ASSERT_WORD_TO_WORD: - if ((u_cr & CHARREACH_WORD).none() || u == g.start) { - continue; - } - break; - case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: - case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: - if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) { - continue; - } - break; - case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: - case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: - if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) { - continue; - } - break; - default: - assert(0); - } - - if (v == g.accept) { - /* accept special will need to be treated specially later */ - impassable = false; - continue; - } - - switch (flag) { - case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: - case POS_FLAG_ASSERT_WORD_TO_NONWORD: - if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) { - continue; - } - break; - case POS_FLAG_ASSERT_WORD_TO_WORD: - case POS_FLAG_ASSERT_NONWORD_TO_WORD: - if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) { - continue; - } - break; - case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: - case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: - if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any() - && v != g.acceptEod) { - continue; - } - break; - case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: - case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: - if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any() - || v == g.acceptEod) { - continue; - } - break; - default: - assert(0); - } - impassable = false; - } - - if (impassable) { - dead->insert(e); - } else if (v == g.accept && !ucp) { - bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start; - UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start; - assert(u_w != u_nw); - bool v_w = false; - bool v_nw = false; - - flags = g[e].assert_flags; - if (u_w) { - v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD; - v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD; - } else { - v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD; - v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD; - } - assert(v_w || v_nw); - if (v_w && v_nw) { - /* edge is effectively unconditional */ - g[e].assert_flags = 0; - } else if (v_w) { - /* need to add a word byte */ - NFAVertex vv = add_vertex(g); + while (flags && impassable) { + u32 flag = 1U << findAndClearLSB_32(&flags); + switch (flag) { + case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: + case POS_FLAG_ASSERT_NONWORD_TO_WORD: + if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) { + continue; + } + break; + case POS_FLAG_ASSERT_WORD_TO_NONWORD: + case POS_FLAG_ASSERT_WORD_TO_WORD: + if ((u_cr & CHARREACH_WORD).none() || u == g.start) { + continue; + } + break; + case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: + case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: + if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) { + continue; + } + break; + case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: + case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: + if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) { + continue; + } + break; + default: + assert(0); + } + + if (v == g.accept) { + /* accept special will need to be treated specially later */ + impassable = false; + continue; + } + + switch (flag) { + case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: + case POS_FLAG_ASSERT_WORD_TO_NONWORD: + if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) { + continue; + } + break; + case POS_FLAG_ASSERT_WORD_TO_WORD: + case POS_FLAG_ASSERT_NONWORD_TO_WORD: + if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) { + continue; + } + break; + case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: + case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: + if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any() + && v != g.acceptEod) { + continue; + } + break; + case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: + case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: + if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any() + || v == g.acceptEod) { + continue; + } + break; + default: + assert(0); + } + impassable = false; + } + + if (impassable) { + dead->insert(e); + } else if (v == g.accept && !ucp) { + bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start; + UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start; + assert(u_w != u_nw); + bool v_w = false; + bool v_nw = false; + + flags = g[e].assert_flags; + if (u_w) { + v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD; + v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD; + } else { + v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD; + v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD; + } + assert(v_w || v_nw); + if (v_w && v_nw) { + /* edge is effectively unconditional */ + g[e].assert_flags = 0; + } else if (v_w) { + /* need to add a word byte */ + NFAVertex vv = add_vertex(g); setReportId(rm, g, expr, vv, -1); - g[vv].char_reach = CHARREACH_WORD; - add_edge(vv, g.accept, g); - g[e].assert_flags = 0; - add_edge(u, vv, g[e], g); - dead->insert(e); - } else { - /* need to add a non word byte or see eod */ - NFAVertex vv = add_vertex(g); + g[vv].char_reach = CHARREACH_WORD; + add_edge(vv, g.accept, g); + g[e].assert_flags = 0; + add_edge(u, vv, g[e], g); + dead->insert(e); + } else { + /* need to add a non word byte or see eod */ + NFAVertex vv = add_vertex(g); setReportId(rm, g, expr, vv, -1); - g[vv].char_reach = CHARREACH_NONWORD; - add_edge(vv, g.accept, g); - g[e].assert_flags = 0; - add_edge(u, vv, g[e], g); + g[vv].char_reach = CHARREACH_NONWORD; + add_edge(vv, g.accept, g); + g[e].assert_flags = 0; + add_edge(u, vv, g[e], g); /* there may already be a different edge from start to eod if so * we need to make it unconditional and alive */ if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { - g[start_eod].assert_flags = 0; - dead->erase(start_eod); + g[start_eod].assert_flags = 0; + dead->erase(start_eod); } else { add_edge(u, g.acceptEod, g[e], g); - } - dead->insert(e); - } - } else if (v == g.accept && ucp) { - DEBUG_PRINTF("resolving ucp assert to accept\n"); - assert(u_cr.any()); - bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any() - && u != g.start; - bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any() - || u == g.start; - assert(u_w || u_nw); - - bool v_w = false; - bool v_nw = false; - - flags = g[e].assert_flags; - if (u_w) { - v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP; - v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP; - } - if (u_nw) { - v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP; - v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP; - } - assert(v_w || v_nw); - if (v_w && v_nw) { - /* edge is effectively unconditional */ - g[e].assert_flags = 0; - } else if (v_w) { - /* need to add a word byte */ - NFAVertex vv = add_vertex(g); + } + dead->insert(e); + } + } else if (v == g.accept && ucp) { + DEBUG_PRINTF("resolving ucp assert to accept\n"); + assert(u_cr.any()); + bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any() + && u != g.start; + bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any() + || u == g.start; + assert(u_w || u_nw); + + bool v_w = false; + bool v_nw = false; + + flags = g[e].assert_flags; + if (u_w) { + v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP; + v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP; + } + if (u_nw) { + v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP; + v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP; + } + assert(v_w || v_nw); + if (v_w && v_nw) { + /* edge is effectively unconditional */ + g[e].assert_flags = 0; + } else if (v_w) { + /* need to add a word byte */ + NFAVertex vv = add_vertex(g); setReportId(rm, g, expr, vv, -1); - g[vv].char_reach = CHARREACH_WORD_UCP_PRE; - add_edge(vv, g.accept, g); - g[e].assert_flags = 0; - add_edge(u, vv, g[e], g); - dead->insert(e); - } else { - /* need to add a non word byte or see eod */ - NFAVertex vv = add_vertex(g); + g[vv].char_reach = CHARREACH_WORD_UCP_PRE; + add_edge(vv, g.accept, g); + g[e].assert_flags = 0; + add_edge(u, vv, g[e], g); + dead->insert(e); + } else { + /* need to add a non word byte or see eod */ + NFAVertex vv = add_vertex(g); setReportId(rm, g, expr, vv, -1); - g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; - add_edge(vv, g.accept, g); - g[e].assert_flags = 0; - add_edge(u, vv, g[e], g); + g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; + add_edge(vv, g.accept, g); + g[e].assert_flags = 0; + add_edge(u, vv, g[e], g); /* there may already be a different edge from start to eod if so * we need to make it unconditional and alive */ if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { - g[start_eod].assert_flags = 0; - dead->erase(start_eod); + g[start_eod].assert_flags = 0; + dead->erase(start_eod); } else { add_edge(u, g.acceptEod, g[e], g); - } - dead->insert(e); - } - } else { - /* we can remove the asserts as we have partitioned the vertices - * into w/nw around the assert edges - */ - g[e].assert_flags = 0; - } - } -} - + } + dead->insert(e); + } + } else { + /* we can remove the asserts as we have partitioned the vertices + * into w/nw around the assert edges + */ + g[e].assert_flags = 0; + } + } +} + void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { - vector<NFAEdge> asserts = getAsserts(g); - if (asserts.empty()) { - return; - } - - map<u32, NFAVertex> to_split; /* by index, for determinism */ - map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ - findSplitters(g, asserts, &to_split, &to_split_ucp); - if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { + vector<NFAEdge> asserts = getAsserts(g); + if (asserts.empty()) { + return; + } + + map<u32, NFAVertex> to_split; /* by index, for determinism */ + map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ + findSplitters(g, asserts, &to_split, &to_split_ucp); + if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { throw CompileError(expr.index, "Pattern is too large."); - } - - for (const auto &m : to_split) { - assert(!contains(to_split_ucp, m.first)); + } + + for (const auto &m : to_split) { + assert(!contains(to_split_ucp, m.first)); splitVertex(rm, g, expr, m.second, false); - } - - for (const auto &m : to_split_ucp) { + } + + for (const auto &m : to_split_ucp) { splitVertex(rm, g, expr, m.second, true); - } - - set<NFAEdge> dead; + } + + set<NFAEdge> dead; resolveEdges(rm, g, expr, &dead); - - remove_edges(dead, g); + + remove_edges(dead, g); renumber_vertices(g); - pruneUseless(g); - pruneEmptyVertices(g); - + pruneUseless(g); + pruneEmptyVertices(g); + renumber_vertices(g); renumber_edges(g); - clearReports(g); -} - + clearReports(g); +} + void ensureCodePointStart(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { - /* In utf8 mode there is an implicit assertion that we start at codepoint - * boundaries. Assert resolution handles the badness coming from asserts. - * The only other source of trouble is startDs->accept connections. - */ + /* In utf8 mode there is an implicit assertion that we start at codepoint + * boundaries. Assert resolution handles the badness coming from asserts. + * The only other source of trouble is startDs->accept connections. + */ NFAEdge orig = edge(g.startDs, g.accept, g); if (expr.utf8 && orig) { DEBUG_PRINTF("rectifying %u\n", expr.report); Report ir = rm.getBasicInternalReport(expr); - ReportID rep = rm.getInternalId(ir); - - NFAVertex v_a = add_vertex(g); - g[v_a].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_a].char_reach = UTF_ASCII_CR; - add_edge(v_a, g.accept, g[orig], g); - - NFAVertex v_2 = add_vertex(g); - g[v_2].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX); - - NFAVertex v_3 = add_vertex(g); - g[v_3].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX); - - NFAVertex v_4 = add_vertex(g); - g[v_4].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX); - - NFAVertex v_c = add_vertex(g); - g[v_c].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_c].char_reach = UTF_CONT_CR; - add_edge(v_c, g.accept, g[orig], g); - - add_edge(v_2, v_c, g); - - NFAVertex v_3c = add_vertex(g); - g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_3c].char_reach = UTF_CONT_CR; - add_edge(v_3c, v_c, g); - add_edge(v_3, v_3c, g); - - NFAVertex v_4c = add_vertex(g); - g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START; - g[v_4c].char_reach = UTF_CONT_CR; - add_edge(v_4c, v_3c, g); - add_edge(v_4, v_4c, g); - - g[v_a].reports.insert(rep); - g[v_c].reports.insert(rep); - - add_edge(g.start, v_a, g); - add_edge(g.startDs, v_a, g); - add_edge(g.start, v_2, g); - add_edge(g.startDs, v_2, g); - add_edge(g.start, v_3, g); - add_edge(g.startDs, v_3, g); - add_edge(g.start, v_4, g); - add_edge(g.startDs, v_4, g); - remove_edge(orig, g); + ReportID rep = rm.getInternalId(ir); + + NFAVertex v_a = add_vertex(g); + g[v_a].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_a].char_reach = UTF_ASCII_CR; + add_edge(v_a, g.accept, g[orig], g); + + NFAVertex v_2 = add_vertex(g); + g[v_2].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX); + + NFAVertex v_3 = add_vertex(g); + g[v_3].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX); + + NFAVertex v_4 = add_vertex(g); + g[v_4].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX); + + NFAVertex v_c = add_vertex(g); + g[v_c].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_c].char_reach = UTF_CONT_CR; + add_edge(v_c, g.accept, g[orig], g); + + add_edge(v_2, v_c, g); + + NFAVertex v_3c = add_vertex(g); + g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_3c].char_reach = UTF_CONT_CR; + add_edge(v_3c, v_c, g); + add_edge(v_3, v_3c, g); + + NFAVertex v_4c = add_vertex(g); + g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START; + g[v_4c].char_reach = UTF_CONT_CR; + add_edge(v_4c, v_3c, g); + add_edge(v_4, v_4c, g); + + g[v_a].reports.insert(rep); + g[v_c].reports.insert(rep); + + add_edge(g.start, v_a, g); + add_edge(g.startDs, v_a, g); + add_edge(g.start, v_2, g); + add_edge(g.startDs, v_2, g); + add_edge(g.start, v_3, g); + add_edge(g.startDs, v_3, g); + add_edge(g.start, v_4, g); + add_edge(g.startDs, v_4, g); + remove_edge(orig, g); renumber_edges(g); clearReports(g); - } -} - -} // namespace ue2 + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h index 2534f57147..edbc3d5d80 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h @@ -1,50 +1,50 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Resolve special assert vertices. - */ - -#ifndef NG_ASSERTS_H -#define NG_ASSERTS_H - -namespace ue2 { - -struct BoundaryReports; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Resolve special assert vertices. + */ + +#ifndef NG_ASSERTS_H +#define NG_ASSERTS_H + +namespace ue2 { + +struct BoundaryReports; class ExpressionInfo; class NGHolder; -class ReportManager; - +class ReportManager; + void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); - + void ensureCodePointStart(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); - -} // namespace ue2 - -#endif // NG_ASSERTS_H + +} // namespace ue2 + +#endif // NG_ASSERTS_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp index 60f667f491..33edad8bef 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp @@ -1,278 +1,278 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief: NFA Graph Builder: used by Glushkov construction to construct an + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief: NFA Graph Builder: used by Glushkov construction to construct an * NGHolder from a parsed expression. - */ + */ #include "ng_builder.h" -#include "grey.h" -#include "ng.h" -#include "ng_util.h" -#include "ue2common.h" -#include "compiler/compiler.h" // for ParsedExpression -#include "util/compile_error.h" -#include "util/make_unique.h" - -#include <cassert> - -using namespace std; - -namespace ue2 { - -namespace { - -/** Concrete implementation of NFABuilder interface. */ -class NFABuilderImpl : public NFABuilder { -public: - NFABuilderImpl(ReportManager &rm, const Grey &grey, - const ParsedExpression &expr); - - ~NFABuilderImpl() override; - - Position makePositions(size_t nPositions) override; - Position getStart() const override; - Position getStartDotStar() const override; - Position getAccept() const override; - Position getAcceptEOD() const override; - - bool isSpecialState(Position p) const override; - - void setNodeReportID(Position position, int offsetAdjust) override; - void addCharReach(Position position, const CharReach &cr) override; - void setAssertFlag(Position position, u32 flag) override; - u32 getAssertFlag(Position position) override; - - void addVertex(Position p) override; - - void addEdge(Position start, Position end) override; - - bool hasEdge(Position start, Position end) const override; - - u32 numVertices() const override { return vertIdx; } - - void cloneRegion(Position first, Position last, - unsigned posOffset) override; - +#include "grey.h" +#include "ng.h" +#include "ng_util.h" +#include "ue2common.h" +#include "compiler/compiler.h" // for ParsedExpression +#include "util/compile_error.h" +#include "util/make_unique.h" + +#include <cassert> + +using namespace std; + +namespace ue2 { + +namespace { + +/** Concrete implementation of NFABuilder interface. */ +class NFABuilderImpl : public NFABuilder { +public: + NFABuilderImpl(ReportManager &rm, const Grey &grey, + const ParsedExpression &expr); + + ~NFABuilderImpl() override; + + Position makePositions(size_t nPositions) override; + Position getStart() const override; + Position getStartDotStar() const override; + Position getAccept() const override; + Position getAcceptEOD() const override; + + bool isSpecialState(Position p) const override; + + void setNodeReportID(Position position, int offsetAdjust) override; + void addCharReach(Position position, const CharReach &cr) override; + void setAssertFlag(Position position, u32 flag) override; + u32 getAssertFlag(Position position) override; + + void addVertex(Position p) override; + + void addEdge(Position start, Position end) override; + + bool hasEdge(Position start, Position end) const override; + + u32 numVertices() const override { return vertIdx; } + + void cloneRegion(Position first, Position last, + unsigned posOffset) override; + BuiltExpression getGraph() override; - -private: - /** fetch a vertex given its Position ID. */ - NFAVertex getVertex(Position pos) const; - - /** \brief Internal convenience function to add an edge (u, v). */ - pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v); - - /** \brief We use the ReportManager to hand out new internal reports. */ - ReportManager &rm; - - /** \brief Greybox: used for resource limits. */ - const Grey &grey; - + +private: + /** fetch a vertex given its Position ID. */ + NFAVertex getVertex(Position pos) const; + + /** \brief Internal convenience function to add an edge (u, v). */ + pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v); + + /** \brief We use the ReportManager to hand out new internal reports. */ + ReportManager &rm; + + /** \brief Greybox: used for resource limits. */ + const Grey &grey; + /** \brief Underlying graph. */ unique_ptr<NGHolder> graph; - + /** \brief Underlying expression info. */ ExpressionInfo expr; - /** \brief mapping from position to vertex. Use \ref getVertex for access. - * */ - vector<NFAVertex> id2vertex; - - /** \brief Index of next vertex. */ - u32 vertIdx; -}; // class NFABuilderImpl - -} // namespace - -NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, + /** \brief mapping from position to vertex. Use \ref getVertex for access. + * */ + vector<NFAVertex> id2vertex; + + /** \brief Index of next vertex. */ + u32 vertIdx; +}; // class NFABuilderImpl + +} // namespace + +NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, const ParsedExpression &parsed) : rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()), expr(parsed.expr), vertIdx(N_SPECIALS) { - - // Reserve space for a reasonably-sized NFA - id2vertex.reserve(64); - id2vertex.resize(N_SPECIALS); - id2vertex[NODE_START] = graph->start; - id2vertex[NODE_START_DOTSTAR] = graph->startDs; - id2vertex[NODE_ACCEPT] = graph->accept; - id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod; -} - -NFABuilderImpl::~NFABuilderImpl() { - // empty -} - -NFAVertex NFABuilderImpl::getVertex(Position pos) const { - assert(id2vertex.size() >= pos); - const NFAVertex v = id2vertex[pos]; + + // Reserve space for a reasonably-sized NFA + id2vertex.reserve(64); + id2vertex.resize(N_SPECIALS); + id2vertex[NODE_START] = graph->start; + id2vertex[NODE_START_DOTSTAR] = graph->startDs; + id2vertex[NODE_ACCEPT] = graph->accept; + id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod; +} + +NFABuilderImpl::~NFABuilderImpl() { + // empty +} + +NFAVertex NFABuilderImpl::getVertex(Position pos) const { + assert(id2vertex.size() >= pos); + const NFAVertex v = id2vertex[pos]; assert(v != NGHolder::null_vertex()); assert((*graph)[v].index == pos); - return v; -} - -void NFABuilderImpl::addVertex(Position pos) { - // Enforce resource limit. - if (pos > grey.limitGraphVertices) { - throw CompileError("Pattern too large."); - } - - NFAVertex v = add_vertex(*graph); - if (id2vertex.size() <= pos) { - id2vertex.resize(pos + 1); - } - id2vertex[pos] = v; + return v; +} + +void NFABuilderImpl::addVertex(Position pos) { + // Enforce resource limit. + if (pos > grey.limitGraphVertices) { + throw CompileError("Pattern too large."); + } + + NFAVertex v = add_vertex(*graph); + if (id2vertex.size() <= pos) { + id2vertex.resize(pos + 1); + } + id2vertex[pos] = v; (*graph)[v].index = pos; -} - +} + BuiltExpression NFABuilderImpl::getGraph() { - DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", - num_vertices(*graph), num_edges(*graph)); - - if (num_edges(*graph) > grey.limitGraphEdges) { - throw CompileError("Pattern too large."); - } - if (num_vertices(*graph) > grey.limitGraphVertices) { - throw CompileError("Pattern too large."); - } - + DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", + num_vertices(*graph), num_edges(*graph)); + + if (num_edges(*graph) > grey.limitGraphEdges) { + throw CompileError("Pattern too large."); + } + if (num_vertices(*graph) > grey.limitGraphVertices) { + throw CompileError("Pattern too large."); + } + return { expr, move(graph) }; -} - -void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { +} + +void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { Report ir = rm.getBasicInternalReport(expr, offsetAdjust); - DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", + DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", pos, expr.report, offsetAdjust, ir.ekey); - - NFAVertex v = getVertex(pos); - auto &reports = (*graph)[v].reports; - reports.clear(); - reports.insert(rm.getInternalId(ir)); -} - -void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) { - NFAVertex v = getVertex(pos); + + NFAVertex v = getVertex(pos); + auto &reports = (*graph)[v].reports; + reports.clear(); + reports.insert(rm.getInternalId(ir)); +} + +void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) { + NFAVertex v = getVertex(pos); (*graph)[v].char_reach |= cr; -} - -void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) { - NFAVertex v = getVertex(pos); +} + +void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) { + NFAVertex v = getVertex(pos); (*graph)[v].assert_flags |= flag; -} - -u32 NFABuilderImpl::getAssertFlag(Position pos) { - NFAVertex v = getVertex(pos); +} + +u32 NFABuilderImpl::getAssertFlag(Position pos) { + NFAVertex v = getVertex(pos); return (*graph)[v].assert_flags; -} - -pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { - // assert that the edge doesn't already exist +} + +pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { + // assert that the edge doesn't already exist assert(edge(u, v, *graph).second == false); - + return add_edge(u, v, *graph); -} - -void NFABuilderImpl::addEdge(Position startPos, Position endPos) { - DEBUG_PRINTF("%u -> %u\n", startPos, endPos); - assert(startPos < vertIdx); - assert(endPos < vertIdx); - - NFAVertex u = getVertex(startPos); - NFAVertex v = getVertex(endPos); - - if ((u == graph->start || u == graph->startDs) && v == graph->startDs) { - /* standard special -> special edges already exist */ +} + +void NFABuilderImpl::addEdge(Position startPos, Position endPos) { + DEBUG_PRINTF("%u -> %u\n", startPos, endPos); + assert(startPos < vertIdx); + assert(endPos < vertIdx); + + NFAVertex u = getVertex(startPos); + NFAVertex v = getVertex(endPos); + + if ((u == graph->start || u == graph->startDs) && v == graph->startDs) { + /* standard special -> special edges already exist */ assert(edge(u, v, *graph).second == true); - return; - } - + return; + } + assert(edge(u, v, *graph).second == false); - addEdge(u, v); -} - -bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const { + addEdge(u, v); +} + +bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const { return edge(getVertex(startPos), getVertex(endPos), *graph).second; -} - -Position NFABuilderImpl::getStart() const { - return NODE_START; -} - -Position NFABuilderImpl::getStartDotStar() const { - return NODE_START_DOTSTAR; -} - -Position NFABuilderImpl::getAccept() const { - return NODE_ACCEPT; -} - -Position NFABuilderImpl::getAcceptEOD() const { - return NODE_ACCEPT_EOD; -} - -bool NFABuilderImpl::isSpecialState(Position p) const { - return (p == NODE_START || p == NODE_START_DOTSTAR || - p == NODE_ACCEPT || p == NODE_ACCEPT_EOD); -} - -Position NFABuilderImpl::makePositions(size_t nPositions) { - Position base = vertIdx; - for (size_t i = 0; i < nPositions; i++) { - addVertex(vertIdx++); - } - DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base); - return base; -} - -void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) { +} + +Position NFABuilderImpl::getStart() const { + return NODE_START; +} + +Position NFABuilderImpl::getStartDotStar() const { + return NODE_START_DOTSTAR; +} + +Position NFABuilderImpl::getAccept() const { + return NODE_ACCEPT; +} + +Position NFABuilderImpl::getAcceptEOD() const { + return NODE_ACCEPT_EOD; +} + +bool NFABuilderImpl::isSpecialState(Position p) const { + return (p == NODE_START || p == NODE_START_DOTSTAR || + p == NODE_ACCEPT || p == NODE_ACCEPT_EOD); +} + +Position NFABuilderImpl::makePositions(size_t nPositions) { + Position base = vertIdx; + for (size_t i = 0; i < nPositions; i++) { + addVertex(vertIdx++); + } + DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base); + return base; +} + +void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) { NGHolder &g = *graph; - assert(posOffset > 0); - - // walk the nodes between first and last and copy their vertex properties - DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last, - posOffset); - for (Position i = first; i <= last; ++i) { - NFAVertex orig = getVertex(i); - Position destIdx = i + posOffset; - assert(destIdx < vertIdx); - NFAVertex dest = getVertex(destIdx); - g[dest] = g[orig]; // all properties - g[dest].index = destIdx; - } -} - -unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc, - const ParsedExpression &expr) { - return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr); -} - -NFABuilder::~NFABuilder() { } - -} // namespace ue2 + assert(posOffset > 0); + + // walk the nodes between first and last and copy their vertex properties + DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last, + posOffset); + for (Position i = first; i <= last; ++i) { + NFAVertex orig = getVertex(i); + Position destIdx = i + posOffset; + assert(destIdx < vertIdx); + NFAVertex dest = getVertex(destIdx); + g[dest] = g[orig]; // all properties + g[dest].index = destIdx; + } +} + +unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &expr) { + return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr); +} + +NFABuilder::~NFABuilder() { } + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h index 9f71b62235..7158620e70 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h @@ -1,99 +1,99 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief: NFA Graph Builder: used by Glushkov construction to construct an + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief: NFA Graph Builder: used by Glushkov construction to construct an * NGHolder from a parsed expression. - */ - -#ifndef NG_BUILDER_H -#define NG_BUILDER_H - -#include "ue2common.h" - -#include "parser/position.h" + */ + +#ifndef NG_BUILDER_H +#define NG_BUILDER_H + +#include "ue2common.h" + +#include "parser/position.h" #include "util/noncopyable.h" - -#include <memory> - -namespace ue2 { - -class CharReach; -class ReportManager; + +#include <memory> + +namespace ue2 { + +class CharReach; +class ReportManager; struct BuiltExpression; -struct CompileContext; - -class ParsedExpression; - -/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct - * one. Used by GlushkovBuildState. */ +struct CompileContext; + +class ParsedExpression; + +/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct + * one. Used by GlushkovBuildState. */ class NFABuilder : noncopyable { -public: - virtual ~NFABuilder(); - - virtual Position makePositions(size_t nPositions) = 0; - virtual Position getStart() const = 0; - virtual Position getStartDotStar() const = 0; - virtual Position getAccept() const = 0; - virtual Position getAcceptEOD() const = 0; - - virtual bool isSpecialState(Position p) const = 0; - - virtual void setNodeReportID(Position position, int offsetAdjust) = 0; - virtual void addCharReach(Position position, const CharReach &cr) = 0; - - /* or-in vertex assertions */ - virtual void setAssertFlag(Position position, u32 flag) = 0; - virtual u32 getAssertFlag(Position position) = 0; - - virtual void addVertex(Position p) = 0; - - virtual void addEdge(Position start, Position end) = 0; - - virtual bool hasEdge(Position start, Position end) const = 0; - - virtual u32 numVertices() const = 0; - - virtual void cloneRegion(Position first, Position last, - unsigned posOffset) = 0; - - /** +public: + virtual ~NFABuilder(); + + virtual Position makePositions(size_t nPositions) = 0; + virtual Position getStart() const = 0; + virtual Position getStartDotStar() const = 0; + virtual Position getAccept() const = 0; + virtual Position getAcceptEOD() const = 0; + + virtual bool isSpecialState(Position p) const = 0; + + virtual void setNodeReportID(Position position, int offsetAdjust) = 0; + virtual void addCharReach(Position position, const CharReach &cr) = 0; + + /* or-in vertex assertions */ + virtual void setAssertFlag(Position position, u32 flag) = 0; + virtual u32 getAssertFlag(Position position) = 0; + + virtual void addVertex(Position p) = 0; + + virtual void addEdge(Position start, Position end) = 0; + + virtual bool hasEdge(Position start, Position end) const = 0; + + virtual u32 numVertices() const = 0; + + virtual void cloneRegion(Position first, Position last, + unsigned posOffset) = 0; + + /** * \brief Returns the built NGHolder graph and ExpressionInfo. - * Note that this builder cannot be used after this call. - */ + * Note that this builder cannot be used after this call. + */ virtual BuiltExpression getGraph() = 0; -}; - -/** Construct a usable NFABuilder. */ -std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, - const CompileContext &cc, - const ParsedExpression &expr); - -} // namespace ue2 - -#endif +}; + +/** Construct a usable NFABuilder. */ +std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, + const CompileContext &cc, + const ParsedExpression &expr); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp index 3e9454eeed..3474ca9875 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp @@ -1,232 +1,232 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Splits an NFA graph into its connected components. - * - * This pass takes a NGHolder and splits its graph into a set of connected - * components, returning them as individual NGHolder graphs. For example, the - * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split - * into four NGHolders, representing these four components: - * - * - /foo.*bar/ - * - /[a-z]{7,13}/ - * - /hatstand/ - * - /teakettle$/ - * - * The pass operates by creating an undirected graph from the input graph, and - * then using the BGL's connected_components algorithm to do the work, cloning - * the identified components into their own graphs. A "shell" of vertices - * is identified and removed first from the head and tail of the graph, in - * order to handle cases where there is a common head/tail region. - * - * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/, - * are not split, as later optimisations will handle these cases efficiently. - */ -#include "ng_calc_components.h" - -#include "ng_depth.h" -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Splits an NFA graph into its connected components. + * + * This pass takes a NGHolder and splits its graph into a set of connected + * components, returning them as individual NGHolder graphs. For example, the + * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split + * into four NGHolders, representing these four components: + * + * - /foo.*bar/ + * - /[a-z]{7,13}/ + * - /hatstand/ + * - /teakettle$/ + * + * The pass operates by creating an undirected graph from the input graph, and + * then using the BGL's connected_components algorithm to do the work, cloning + * the identified components into their own graphs. A "shell" of vertices + * is identified and removed first from the head and tail of the graph, in + * order to handle cases where there is a common head/tail region. + * + * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/, + * are not split, as later optimisations will handle these cases efficiently. + */ +#include "ng_calc_components.h" + +#include "ng_depth.h" +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" #include "grey.h" -#include "ue2common.h" -#include "util/graph_range.h" +#include "ue2common.h" +#include "util/graph_range.h" #include "util/graph_undirected.h" -#include "util/make_unique.h" - -#include <map> -#include <vector> - -#include <boost/graph/connected_components.hpp> +#include "util/make_unique.h" + +#include <map> +#include <vector> + +#include <boost/graph/connected_components.hpp> #include <boost/graph/filtered_graph.hpp> - -using namespace std; - -namespace ue2 { - -static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3; -static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3; - -/** - * \brief Returns true if the whole graph is just an alternation of character - * classes. - */ -bool isAlternationOfClasses(const NGHolder &g) { - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - // Vertex must have in edges from starts only. - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_any_start(u, g)) { - return false; - } - } - // Vertex must have out edges to accepts only. - for (auto w : adjacent_vertices_range(v, g)) { - if (!is_any_accept(w, g)) { - return false; - } - } - } - - DEBUG_PRINTF("alternation of single states, treating as one comp\n"); - return true; -} - -/** - * \brief Compute initial max distance to v from start (i.e. ignoring its own - * self-loop). - */ -static -depth max_dist_from_start(const NGHolder &g, - const vector<NFAVertexBidiDepth> &depths, - NFAVertex v) { - depth max_depth(0); - for (const auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - const auto &d = depths.at(g[u].index); - if (d.fromStart.max.is_reachable()) { - max_depth = max(max_depth, d.fromStart.max); - } - if (d.fromStartDotStar.max.is_reachable()) { - max_depth = max(max_depth, d.fromStartDotStar.max); - } - } - return max_depth + 1; -} - -/** - * \brief Compute initial max depth from v from accept (i.e. ignoring its own - * self-loop). - */ -static -depth max_dist_to_accept(const NGHolder &g, - const vector<NFAVertexBidiDepth> &depths, - NFAVertex v) { - depth max_depth(0); - for (const auto w : adjacent_vertices_range(v, g)) { - if (w == v) { - continue; - } - const auto &d = depths.at(g[w].index); - if (d.toAccept.max.is_reachable()) { - max_depth = max(max_depth, d.toAccept.max); - } - if (d.toAcceptEod.max.is_reachable()) { - max_depth = max(max_depth, d.toAcceptEod.max); - } - } - return max_depth + 1; -} - -static -flat_set<NFAVertex> findHeadShell(const NGHolder &g, - const vector<NFAVertexBidiDepth> &depths, - const depth &max_dist) { - flat_set<NFAVertex> shell; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - if (max_dist_from_start(g, depths, v) <= max_dist) { - shell.insert(v); - } - } - - for (UNUSED auto v : shell) { + +using namespace std; + +namespace ue2 { + +static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3; +static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3; + +/** + * \brief Returns true if the whole graph is just an alternation of character + * classes. + */ +bool isAlternationOfClasses(const NGHolder &g) { + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + // Vertex must have in edges from starts only. + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_any_start(u, g)) { + return false; + } + } + // Vertex must have out edges to accepts only. + for (auto w : adjacent_vertices_range(v, g)) { + if (!is_any_accept(w, g)) { + return false; + } + } + } + + DEBUG_PRINTF("alternation of single states, treating as one comp\n"); + return true; +} + +/** + * \brief Compute initial max distance to v from start (i.e. ignoring its own + * self-loop). + */ +static +depth max_dist_from_start(const NGHolder &g, + const vector<NFAVertexBidiDepth> &depths, + NFAVertex v) { + depth max_depth(0); + for (const auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + const auto &d = depths.at(g[u].index); + if (d.fromStart.max.is_reachable()) { + max_depth = max(max_depth, d.fromStart.max); + } + if (d.fromStartDotStar.max.is_reachable()) { + max_depth = max(max_depth, d.fromStartDotStar.max); + } + } + return max_depth + 1; +} + +/** + * \brief Compute initial max depth from v from accept (i.e. ignoring its own + * self-loop). + */ +static +depth max_dist_to_accept(const NGHolder &g, + const vector<NFAVertexBidiDepth> &depths, + NFAVertex v) { + depth max_depth(0); + for (const auto w : adjacent_vertices_range(v, g)) { + if (w == v) { + continue; + } + const auto &d = depths.at(g[w].index); + if (d.toAccept.max.is_reachable()) { + max_depth = max(max_depth, d.toAccept.max); + } + if (d.toAcceptEod.max.is_reachable()) { + max_depth = max(max_depth, d.toAcceptEod.max); + } + } + return max_depth + 1; +} + +static +flat_set<NFAVertex> findHeadShell(const NGHolder &g, + const vector<NFAVertexBidiDepth> &depths, + const depth &max_dist) { + flat_set<NFAVertex> shell; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + if (max_dist_from_start(g, depths, v) <= max_dist) { + shell.insert(v); + } + } + + for (UNUSED auto v : shell) { DEBUG_PRINTF("shell: %zu\n", g[v].index); - } - - return shell; -} - -static -flat_set<NFAVertex> findTailShell(const NGHolder &g, - const vector<NFAVertexBidiDepth> &depths, - const depth &max_dist) { - flat_set<NFAVertex> shell; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - if (max_dist_to_accept(g, depths, v) <= max_dist) { - shell.insert(v); - } - } - - for (UNUSED auto v : shell) { + } + + return shell; +} + +static +flat_set<NFAVertex> findTailShell(const NGHolder &g, + const vector<NFAVertexBidiDepth> &depths, + const depth &max_dist) { + flat_set<NFAVertex> shell; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + if (max_dist_to_accept(g, depths, v) <= max_dist) { + shell.insert(v); + } + } + + for (UNUSED auto v : shell) { DEBUG_PRINTF("shell: %zu\n", g[v].index); - } - - return shell; -} - -static -vector<NFAEdge> findShellEdges(const NGHolder &g, - const flat_set<NFAVertex> &head_shell, - const flat_set<NFAVertex> &tail_shell) { - vector<NFAEdge> shell_edges; - - for (const auto &e : edges_range(g)) { - auto u = source(e, g); - auto v = target(e, g); - - if (v == g.startDs && is_any_start(u, g)) { - continue; - } - if (u == g.accept && v == g.acceptEod) { - continue; - } - - if ((is_special(u, g) || contains(head_shell, u)) && - (is_special(v, g) || contains(tail_shell, v))) { + } + + return shell; +} + +static +vector<NFAEdge> findShellEdges(const NGHolder &g, + const flat_set<NFAVertex> &head_shell, + const flat_set<NFAVertex> &tail_shell) { + vector<NFAEdge> shell_edges; + + for (const auto &e : edges_range(g)) { + auto u = source(e, g); + auto v = target(e, g); + + if (v == g.startDs && is_any_start(u, g)) { + continue; + } + if (u == g.accept && v == g.acceptEod) { + continue; + } + + if ((is_special(u, g) || contains(head_shell, u)) && + (is_special(v, g) || contains(tail_shell, v))) { DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index, g[v].index); - shell_edges.push_back(e); - } - } - - return shell_edges; -} - + shell_edges.push_back(e); + } + } + + return shell_edges; +} + template<typename GetAdjRange> bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell, GetAdjRange adj_range_func) { if (shell.empty()) { DEBUG_PRINTF("no shell\n"); return false; - } + } NFAVertex exit_vertex = NGHolder::null_vertex(); for (auto u : shell) { @@ -246,62 +246,62 @@ bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell, } return true; -} - +} + /** * True if all edges out of vertices in the head shell lead to at most a single * outside vertex, or the inverse for the tail shell. */ -static +static bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell, const flat_set<NFAVertex> &tail_shell) { if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) { DEBUG_PRINTF("head shell has only one path through it\n"); return true; - } + } if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) { DEBUG_PRINTF("tail shell has only one path into it\n"); return true; } return false; -} - -/** - * Common code called by calc- and recalc- below. Splits the given holder into - * one or more connected components, adding them to the comps deque. - */ -static +} + +/** + * Common code called by calc- and recalc- below. Splits the given holder into + * one or more connected components, adding them to the comps deque. + */ +static void splitIntoComponents(unique_ptr<NGHolder> g, deque<unique_ptr<NGHolder>> &comps, - const depth &max_head_depth, - const depth &max_tail_depth, bool *shell_comp) { + const depth &max_head_depth, + const depth &max_tail_depth, bool *shell_comp) { DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g)); - - assert(shell_comp); - *shell_comp = false; - - // Compute "shell" head and tail subgraphs. + + assert(shell_comp); + *shell_comp = false; + + // Compute "shell" head and tail subgraphs. auto depths = calcBidiDepths(*g); auto head_shell = findHeadShell(*g, depths, max_head_depth); auto tail_shell = findTailShell(*g, depths, max_tail_depth); - for (auto v : head_shell) { - tail_shell.erase(v); - } - + for (auto v : head_shell) { + tail_shell.erase(v); + } + if (head_shell.size() + tail_shell.size() + N_SPECIALS >= num_vertices(*g)) { - DEBUG_PRINTF("all in shell component\n"); + DEBUG_PRINTF("all in shell component\n"); comps.push_back(std::move(g)); - *shell_comp = true; - return; - } - + *shell_comp = true; + return; + } + // Find edges connecting the head and tail shells directly. vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell); - - DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", - head_shell.size(), tail_shell.size(), shell_edges.size()); - + + DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", + head_shell.size(), tail_shell.size(), shell_edges.size()); + // If there are no shell edges and only one path out of the head shell or // into the tail shell, we aren't going to find more than one component. if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) { @@ -309,152 +309,152 @@ void splitIntoComponents(unique_ptr<NGHolder> g, comps.push_back(std::move(g)); return; } - + auto ug = make_undirected_graph(*g); - + // Filter specials and shell vertices from undirected graph. unordered_set<NFAVertex> bad_vertices( {g->start, g->startDs, g->accept, g->acceptEod}); bad_vertices.insert(head_shell.begin(), head_shell.end()); bad_vertices.insert(tail_shell.begin(), tail_shell.end()); - + auto filtered_ug = boost::make_filtered_graph( ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices)); - + // Actually run the connected components algorithm. map<NFAVertex, u32> split_components; - const u32 num = connected_components( + const u32 num = connected_components( filtered_ug, boost::make_assoc_property_map(split_components)); - - assert(num > 0); - if (num == 1 && shell_edges.empty()) { - DEBUG_PRINTF("single component\n"); + + assert(num > 0); + if (num == 1 && shell_edges.empty()) { + DEBUG_PRINTF("single component\n"); comps.push_back(std::move(g)); - return; - } - - DEBUG_PRINTF("broke graph into %u components\n", num); - - vector<deque<NFAVertex>> verts(num); - - // Collect vertex lists per component. - for (const auto &m : split_components) { + return; + } + + DEBUG_PRINTF("broke graph into %u components\n", num); + + vector<deque<NFAVertex>> verts(num); + + // Collect vertex lists per component. + for (const auto &m : split_components) { NFAVertex v = m.first; - u32 c = m.second; - verts[c].push_back(v); + u32 c = m.second; + verts[c].push_back(v); DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); - } - + } + unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder - for (auto &vv : verts) { - // Shells are in every component. - vv.insert(vv.end(), begin(head_shell), end(head_shell)); - vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - + for (auto &vv : verts) { + // Shells are in every component. + vv.insert(vv.end(), begin(head_shell), end(head_shell)); + vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); + /* Sort for determinism. Still required as NFAUndirectedVertex have * no deterministic ordering (split_components map). */ sort(begin(vv), end(vv)); - - auto gc = ue2::make_unique<NGHolder>(); - v_map.clear(); + + auto gc = ue2::make_unique<NGHolder>(); + v_map.clear(); fillHolder(gc.get(), *g, vv, &v_map); - - // Remove shell edges, which will get their own component. - for (const auto &e : shell_edges) { + + // Remove shell edges, which will get their own component. + for (const auto &e : shell_edges) { auto cu = v_map.at(source(e, *g)); auto cv = v_map.at(target(e, *g)); - assert(edge(cu, cv, *gc).second); - remove_edge(cu, cv, *gc); - } - - pruneUseless(*gc); - DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(), - num_vertices(*gc)); - comps.push_back(move(gc)); - } - - // Another component to handle the direct shell-to-shell edges. - if (!shell_edges.empty()) { - deque<NFAVertex> vv; - vv.insert(vv.end(), begin(head_shell), end(head_shell)); - vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - - auto gc = ue2::make_unique<NGHolder>(); - v_map.clear(); + assert(edge(cu, cv, *gc).second); + remove_edge(cu, cv, *gc); + } + + pruneUseless(*gc); + DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(), + num_vertices(*gc)); + comps.push_back(move(gc)); + } + + // Another component to handle the direct shell-to-shell edges. + if (!shell_edges.empty()) { + deque<NFAVertex> vv; + vv.insert(vv.end(), begin(head_shell), end(head_shell)); + vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); + + auto gc = ue2::make_unique<NGHolder>(); + v_map.clear(); fillHolder(gc.get(), *g, vv, &v_map); - - pruneUseless(*gc); - DEBUG_PRINTF("shell edge component %zu has %zu vertices\n", - comps.size(), num_vertices(*gc)); - comps.push_back(move(gc)); - *shell_comp = true; - } - + + pruneUseless(*gc); + DEBUG_PRINTF("shell edge component %zu has %zu vertices\n", + comps.size(), num_vertices(*gc)); + comps.push_back(move(gc)); + *shell_comp = true; + } + // Ensure that only vertices with accept edges have reports. for (auto &gc : comps) { assert(gc); clearReports(*gc); } - // We should never produce empty component graphs. - assert(all_of(begin(comps), end(comps), - [](const unique_ptr<NGHolder> &g_comp) { - return num_vertices(*g_comp) > N_SPECIALS; - })); -} - + // We should never produce empty component graphs. + assert(all_of(begin(comps), end(comps), + [](const unique_ptr<NGHolder> &g_comp) { + return num_vertices(*g_comp) > N_SPECIALS; + })); +} + deque<unique_ptr<NGHolder>> calcComponents(unique_ptr<NGHolder> g, const Grey &grey) { - deque<unique_ptr<NGHolder>> comps; - - // For trivial cases, we needn't bother running the full - // connected_components algorithm. + deque<unique_ptr<NGHolder>> comps; + + // For trivial cases, we needn't bother running the full + // connected_components algorithm. if (!grey.calcComponents || isAlternationOfClasses(*g)) { comps.push_back(std::move(g)); - return comps; - } - - bool shell_comp = false; + return comps; + } + + bool shell_comp = false; splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH), depth(MAX_TAIL_SHELL_DEPTH), &shell_comp); - - if (shell_comp) { - DEBUG_PRINTF("re-running on shell comp\n"); - assert(!comps.empty()); + + if (shell_comp) { + DEBUG_PRINTF("re-running on shell comp\n"); + assert(!comps.empty()); auto sc = std::move(comps.back()); - comps.pop_back(); + comps.pop_back(); splitIntoComponents(std::move(sc), comps, depth(0), depth(0), &shell_comp); - } - - DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); - return comps; -} - + } + + DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); + return comps; +} + void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) { if (!grey.calcComponents) { return; } - deque<unique_ptr<NGHolder>> out; - - for (auto &gc : comps) { - if (!gc) { - continue; // graph has been consumed already. - } - - if (isAlternationOfClasses(*gc)) { + deque<unique_ptr<NGHolder>> out; + + for (auto &gc : comps) { + if (!gc) { + continue; // graph has been consumed already. + } + + if (isAlternationOfClasses(*gc)) { out.push_back(std::move(gc)); - continue; - } - + continue; + } + auto gc_comps = calcComponents(std::move(gc), grey); out.insert(end(out), std::make_move_iterator(begin(gc_comps)), std::make_move_iterator(end(gc_comps))); - } - - // Replace comps with our recalculated list. - comps.swap(out); -} - -} // namespace ue2 + } + + // Replace comps with our recalculated list. + comps.swap(out); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h index 1bcdc5f81e..3c9cc08c24 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h @@ -1,54 +1,54 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Splits an NFA graph into its connected components. - */ - -#ifndef NG_CALC_COMPONENTS_H -#define NG_CALC_COMPONENTS_H - -#include <deque> -#include <memory> - -namespace ue2 { - -class NGHolder; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Splits an NFA graph into its connected components. + */ + +#ifndef NG_CALC_COMPONENTS_H +#define NG_CALC_COMPONENTS_H + +#include <deque> +#include <memory> + +namespace ue2 { + +class NGHolder; struct Grey; - -bool isAlternationOfClasses(const NGHolder &g); - + +bool isAlternationOfClasses(const NGHolder &g); + std::deque<std::unique_ptr<NGHolder>> calcComponents(std::unique_ptr<NGHolder> g, const Grey &grey); - + void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps, const Grey &grey); - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp index 0b24bf07a8..8d84acfd9e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp @@ -1,213 +1,213 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths - * leading to a cyclic repeat. - * - * This is a graph reduction pass intended to remove vertices that are - * redundant because they lead solely to a cyclic vertex with a superset of - * their character reachability. For example, in this pattern: - * - * /(abc|def|abcghi).*0123/s - * - * The vertices for 'ghi' can be removed due to the presence of the dot-star - * repeat. - * - * Algorithm: - * - * for each cyclic vertex V: - * for each proper predecessor U of V: - * let S be the set of successors of U that are successors of V - * (including V itself) - * for each successor W of U not in S: - * perform a DFS forward from W, stopping exploration when a vertex - * in S is encountered; - * if a vertex with reach not in reach(V) or an accept is encountered: - * fail and continue to the next W. - * else: - * remove (U, W) - * - * NOTE: the following code is templated not just for fun, but so that we can - * run this analysis both forward and in reverse over the graph. - */ -#include "ng_cyclic_redundancy.h" - -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths + * leading to a cyclic repeat. + * + * This is a graph reduction pass intended to remove vertices that are + * redundant because they lead solely to a cyclic vertex with a superset of + * their character reachability. For example, in this pattern: + * + * /(abc|def|abcghi).*0123/s + * + * The vertices for 'ghi' can be removed due to the presence of the dot-star + * repeat. + * + * Algorithm: + * + * for each cyclic vertex V: + * for each proper predecessor U of V: + * let S be the set of successors of U that are successors of V + * (including V itself) + * for each successor W of U not in S: + * perform a DFS forward from W, stopping exploration when a vertex + * in S is encountered; + * if a vertex with reach not in reach(V) or an accept is encountered: + * fail and continue to the next W. + * else: + * remove (U, W) + * + * NOTE: the following code is templated not just for fun, but so that we can + * run this analysis both forward and in reverse over the graph. + */ +#include "ng_cyclic_redundancy.h" + +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" - + #include <algorithm> -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/reverse_graph.hpp> - -using namespace std; -using boost::reverse_graph; - -namespace ue2 { - -namespace { - -// Terminator function for depth first traversal, tells us not to explore -// beyond vertices in set S. -template<class Vertex, class Graph> -class VertexInSet { - public: - explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {} - bool operator()(const Vertex &v, const Graph&) const { - return contains(verts, v); - } - - private: - const flat_set<Vertex> &verts; -}; - -struct SearchFailed {}; - -// Visitor for depth first traversal, throws an error if we encounter a vertex -// with bad reach or a report. -class SearchVisitor : public boost::default_dfs_visitor { - public: - explicit SearchVisitor(const CharReach &r) : cr(r) {} - - template<class Vertex, class Graph> - void discover_vertex(const Vertex &v, const Graph &g) const { +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/reverse_graph.hpp> + +using namespace std; +using boost::reverse_graph; + +namespace ue2 { + +namespace { + +// Terminator function for depth first traversal, tells us not to explore +// beyond vertices in set S. +template<class Vertex, class Graph> +class VertexInSet { + public: + explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {} + bool operator()(const Vertex &v, const Graph&) const { + return contains(verts, v); + } + + private: + const flat_set<Vertex> &verts; +}; + +struct SearchFailed {}; + +// Visitor for depth first traversal, throws an error if we encounter a vertex +// with bad reach or a report. +class SearchVisitor : public boost::default_dfs_visitor { + public: + explicit SearchVisitor(const CharReach &r) : cr(r) {} + + template<class Vertex, class Graph> + void discover_vertex(const Vertex &v, const Graph &g) const { DEBUG_PRINTF("vertex %zu\n", g[v].index); - if (is_special(v, g)) { - DEBUG_PRINTF("start or accept\n"); - throw SearchFailed(); - } - - if (g[v].assert_flags) { - DEBUG_PRINTF("assert flags\n"); - throw SearchFailed(); - } - - const CharReach &vcr = g[v].char_reach; - if (vcr != (vcr & cr)) { - DEBUG_PRINTF("bad reach\n"); - throw SearchFailed(); - } - } - - private: - const CharReach &cr; -}; - -} // namespace - + if (is_special(v, g)) { + DEBUG_PRINTF("start or accept\n"); + throw SearchFailed(); + } + + if (g[v].assert_flags) { + DEBUG_PRINTF("assert flags\n"); + throw SearchFailed(); + } + + const CharReach &vcr = g[v].char_reach; + if (vcr != (vcr & cr)) { + DEBUG_PRINTF("bad reach\n"); + throw SearchFailed(); + } + } + + private: + const CharReach &cr; +}; + +} // namespace + template<class Graph, class ColorMap> -static -bool searchForward(const Graph &g, const CharReach &reach, +static +bool searchForward(const Graph &g, const CharReach &reach, ColorMap &colours, - const flat_set<typename Graph::vertex_descriptor> &s, - typename Graph::vertex_descriptor w) { + const flat_set<typename Graph::vertex_descriptor> &s, + typename Graph::vertex_descriptor w) { colours.fill(small_color::white); - try { + try { depth_first_visit(g, w, SearchVisitor(reach), colours, VertexInSet<typename Graph::vertex_descriptor, Graph>(s)); } catch (SearchFailed &) { - return false; - } - - return true; -} - -static + return false; + } + + return true; +} + +static NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { - return e; -} - -static + return e; +} + +static NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e, const reverse_graph<NGHolder, NGHolder &> &g) { return get(boost::edge_underlying, g, e); -} - -/* returns true if we did stuff */ -template<class Graph> -static -bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, - NGHolder &raw) { - bool did_stuff = false; - - const CharReach &reach = g[v].char_reach; - - typedef typename Graph::vertex_descriptor vertex_descriptor; - +} + +/* returns true if we did stuff */ +template<class Graph> +static +bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, + NGHolder &raw) { + bool did_stuff = false; + + const CharReach &reach = g[v].char_reach; + + typedef typename Graph::vertex_descriptor vertex_descriptor; + // Colour map used for depth_first_visit(). auto colours = make_small_color_map(g); - // precalc successors of v. - flat_set<vertex_descriptor> succ_v; - insert(&succ_v, adjacent_vertices(v, g)); - - flat_set<vertex_descriptor> s; - - for (const auto &e : in_edges_range(v, g)) { - vertex_descriptor u = source(e, g); - if (u == v) { - continue; - } - if (is_any_accept(u, g)) { - continue; - } - + // precalc successors of v. + flat_set<vertex_descriptor> succ_v; + insert(&succ_v, adjacent_vertices(v, g)); + + flat_set<vertex_descriptor> s; + + for (const auto &e : in_edges_range(v, g)) { + vertex_descriptor u = source(e, g); + if (u == v) { + continue; + } + if (is_any_accept(u, g)) { + continue; + } + DEBUG_PRINTF("- checking u %zu\n", g[u].index); - - // let s be intersection(succ(u), succ(v)) - s.clear(); - for (auto b : adjacent_vertices_range(u, g)) { - if (contains(succ_v, b)) { - s.insert(b); - } - } - - for (const auto &e_u : make_vector_from(out_edges(u, g))) { - vertex_descriptor w = target(e_u, g); - if (is_special(w, g) || contains(s, w)) { - continue; - } - - const CharReach &w_reach = g[w].char_reach; - if (!w_reach.isSubsetOf(reach)) { - continue; - } - + + // let s be intersection(succ(u), succ(v)) + s.clear(); + for (auto b : adjacent_vertices_range(u, g)) { + if (contains(succ_v, b)) { + s.insert(b); + } + } + + for (const auto &e_u : make_vector_from(out_edges(u, g))) { + vertex_descriptor w = target(e_u, g); + if (is_special(w, g) || contains(s, w)) { + continue; + } + + const CharReach &w_reach = g[w].char_reach; + if (!w_reach.isSubsetOf(reach)) { + continue; + } + DEBUG_PRINTF(" - checking w %zu\n", g[w].index); - + if (!searchForward(g, reach, colours, succ_v, w)) { continue; - } + } DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index); /* we are currently iterating over the in-edges of v, so it @@ -215,50 +215,50 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, assert(w != v); /* as v is in s */ remove_edge(to_raw(e_u, g), raw); did_stuff = true; - } - } - - return did_stuff; -} - -template<class Graph> -static -bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { - bool did_stuff = false; - - for (auto v : vertices_range(g)) { - if (is_special(v, g) || !edge(v, v, g).second) { - continue; - } - + } + } + + return did_stuff; +} + +template<class Graph> +static +bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { + bool did_stuff = false; + + for (auto v : vertices_range(g)) { + if (is_special(v, g) || !edge(v, v, g).second) { + continue; + } + DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index); - did_stuff |= removeCyclicPathRedundancy(g, v, raw); - } - - return did_stuff; -} - -bool removeCyclicPathRedundancy(NGHolder &g) { + did_stuff |= removeCyclicPathRedundancy(g, v, raw); + } + + return did_stuff; +} + +bool removeCyclicPathRedundancy(NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - // Forward pass. + // Forward pass. bool f_changed = cyclicPathRedundancyPass(g, g); - if (f_changed) { - DEBUG_PRINTF("edges removed by forward pass\n"); - pruneUseless(g); - } - - // Reverse pass. - DEBUG_PRINTF("REVERSE PASS\n"); + if (f_changed) { + DEBUG_PRINTF("edges removed by forward pass\n"); + pruneUseless(g); + } + + // Reverse pass. + DEBUG_PRINTF("REVERSE PASS\n"); typedef reverse_graph<NGHolder, NGHolder &> RevGraph; RevGraph revg(g); - bool r_changed = cyclicPathRedundancyPass(revg, g); - if (r_changed) { - DEBUG_PRINTF("edges removed by reverse pass\n"); - pruneUseless(g); - } - - return f_changed || r_changed; -} - -} // namespace ue2 + bool r_changed = cyclicPathRedundancyPass(revg, g); + if (r_changed) { + DEBUG_PRINTF("edges removed by reverse pass\n"); + pruneUseless(g); + } + + return f_changed || r_changed; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h index 3ce07c6688..9a83c49361 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h @@ -1,45 +1,45 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths - * leading to a cyclic repeat. - */ - -#ifndef NG_CYCLIC_REDUNDANCY_H -#define NG_CYCLIC_REDUNDANCY_H - -namespace ue2 { - -class NGHolder; - -bool removeCyclicPathRedundancy(NGHolder &g); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths + * leading to a cyclic repeat. + */ + +#ifndef NG_CYCLIC_REDUNDANCY_H +#define NG_CYCLIC_REDUNDANCY_H + +namespace ue2 { + +class NGHolder; + +bool removeCyclicPathRedundancy(NGHolder &g); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp index 6c90326ce4..e952ff445e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp @@ -1,398 +1,398 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief NFA graph vertex depth calculations. - */ -#include "ng_depth.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/graph_range.h" + * \brief NFA graph vertex depth calculations. + */ +#include "ng_depth.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" - -#include <deque> -#include <vector> - + +#include <deque> +#include <vector> + #include <boost/graph/breadth_first_search.hpp> -#include <boost/graph/dag_shortest_paths.hpp> -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/filtered_graph.hpp> +#include <boost/graph/dag_shortest_paths.hpp> +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/filtered_graph.hpp> #include <boost/graph/property_maps/constant_property_map.hpp> -#include <boost/graph/reverse_graph.hpp> -#include <boost/graph/topological_sort.hpp> +#include <boost/graph/reverse_graph.hpp> +#include <boost/graph/topological_sort.hpp> #include <boost/range/adaptor/reversed.hpp> - -using namespace std; -using boost::filtered_graph; + +using namespace std; +using boost::filtered_graph; using boost::make_filtered_graph; -using boost::make_constant_property; -using boost::reverse_graph; +using boost::make_constant_property; +using boost::reverse_graph; using boost::adaptors::reverse; - -namespace ue2 { - -namespace { - -/** Distance value used to indicate that the vertex can't be reached. */ -static constexpr int DIST_UNREACHABLE = INT_MAX; - -/** - * Distance value used to indicate that the distance to a vertex is infinite - * (for example, it's the max distance and there's a cycle in the path) or so - * large that we should consider it effectively infinite. - */ -static constexpr int DIST_INFINITY = INT_MAX - 1; - -// -// Filters -// - -template <class GraphT> -struct NodeFilter { - typedef typename GraphT::edge_descriptor EdgeT; - NodeFilter() {} // BGL filters must be default-constructible. - NodeFilter(const vector<bool> *bad_in, const GraphT *g_in) - : bad(bad_in), g(g_in) { } - bool operator()(const EdgeT &e) const { - assert(g && bad); - - u32 src_idx = (*g)[source(e, *g)].index; - u32 tar_idx = (*g)[target(e, *g)].index; - - if (tar_idx == NODE_START_DOTSTAR) { - return false; - } - - return !(*bad)[src_idx] && !(*bad)[tar_idx]; - } - -private: - const vector<bool> *bad = nullptr; - const GraphT *g = nullptr; -}; - -template <class GraphT> -struct StartFilter { - typedef typename GraphT::edge_descriptor EdgeT; - StartFilter() {} // BGL filters must be default-constructible. - explicit StartFilter(const GraphT *g_in) : g(g_in) { } - bool operator()(const EdgeT &e) const { - assert(g); - - u32 src_idx = (*g)[source(e, *g)].index; - u32 tar_idx = (*g)[target(e, *g)].index; - - // Remove our stylised edges from anchored start to startDs. - if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) { - return false; - } - // Also remove the equivalent in the reversed direction. - if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) { - return false; - } - return true; - } - -private: - const GraphT *g = nullptr; -}; - -} // namespace - + +namespace ue2 { + +namespace { + +/** Distance value used to indicate that the vertex can't be reached. */ +static constexpr int DIST_UNREACHABLE = INT_MAX; + +/** + * Distance value used to indicate that the distance to a vertex is infinite + * (for example, it's the max distance and there's a cycle in the path) or so + * large that we should consider it effectively infinite. + */ +static constexpr int DIST_INFINITY = INT_MAX - 1; + +// +// Filters +// + +template <class GraphT> +struct NodeFilter { + typedef typename GraphT::edge_descriptor EdgeT; + NodeFilter() {} // BGL filters must be default-constructible. + NodeFilter(const vector<bool> *bad_in, const GraphT *g_in) + : bad(bad_in), g(g_in) { } + bool operator()(const EdgeT &e) const { + assert(g && bad); + + u32 src_idx = (*g)[source(e, *g)].index; + u32 tar_idx = (*g)[target(e, *g)].index; + + if (tar_idx == NODE_START_DOTSTAR) { + return false; + } + + return !(*bad)[src_idx] && !(*bad)[tar_idx]; + } + +private: + const vector<bool> *bad = nullptr; + const GraphT *g = nullptr; +}; + +template <class GraphT> +struct StartFilter { + typedef typename GraphT::edge_descriptor EdgeT; + StartFilter() {} // BGL filters must be default-constructible. + explicit StartFilter(const GraphT *g_in) : g(g_in) { } + bool operator()(const EdgeT &e) const { + assert(g); + + u32 src_idx = (*g)[source(e, *g)].index; + u32 tar_idx = (*g)[target(e, *g)].index; + + // Remove our stylised edges from anchored start to startDs. + if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) { + return false; + } + // Also remove the equivalent in the reversed direction. + if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) { + return false; + } + return true; + } + +private: + const GraphT *g = nullptr; +}; + +} // namespace + template<class Graph> -static +static vector<bool> findLoopReachable(const Graph &g, const typename Graph::vertex_descriptor src) { vector<bool> deadNodes(num_vertices(g)); - + using Edge = typename Graph::edge_descriptor; using Vertex = typename Graph::vertex_descriptor; using EdgeSet = set<Edge>; - EdgeSet deadEdges; - BackEdges<EdgeSet> be(deadEdges); - + EdgeSet deadEdges; + BackEdges<EdgeSet> be(deadEdges); + auto colors = make_small_color_map(g); - + depth_first_search(g, be, colors, src); auto af = make_bad_edge_filter(&deadEdges); auto acyclic_g = make_filtered_graph(g, af); - + vector<Vertex> topoOrder; /* actually reverse topological order */ - topoOrder.reserve(deadNodes.size()); + topoOrder.reserve(deadNodes.size()); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors)); - - for (const auto &e : deadEdges) { + + for (const auto &e : deadEdges) { size_t srcIdx = g[source(e, g)].index; - if (srcIdx != NODE_START_DOTSTAR) { - deadNodes[srcIdx] = true; - } - } - + if (srcIdx != NODE_START_DOTSTAR) { + deadNodes[srcIdx] = true; + } + } + for (auto v : reverse(topoOrder)) { - for (const auto &e : in_edges_range(v, g)) { - if (deadNodes[g[source(e, g)].index]) { - deadNodes[g[v].index] = true; - break; - } - } - } + for (const auto &e : in_edges_range(v, g)) { + if (deadNodes[g[source(e, g)].index]) { + deadNodes[g[v].index] = true; + break; + } + } + } return deadNodes; -} - -template <class GraphT> -static +} + +template <class GraphT> +static void calcDepthFromSource(const GraphT &g, - typename GraphT::vertex_descriptor srcVertex, + typename GraphT::vertex_descriptor srcVertex, const vector<bool> &deadNodes, vector<int> &dMin, vector<int> &dMax) { - typedef typename GraphT::edge_descriptor EdgeT; - + typedef typename GraphT::edge_descriptor EdgeT; + const size_t numVerts = num_vertices(g); - - NodeFilter<GraphT> nf(&deadNodes, &g); - StartFilter<GraphT> sf(&g); - - /* minimum distance needs to run on a graph with .*start unreachable - * from start */ - typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph; - const StartFilteredGraph mindist_g(g, sf); - - /* maximum distance needs to run on a graph without cycles & nodes - * reachable from cycles */ - typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph; - const NodeFilteredGraph maxdist_g(g, nf); - - // Record distance of each vertex from source using one of the following - // algorithms. - - /* note: filtered graphs have same num_{vertices,edges} as base */ - - dMin.assign(numVerts, DIST_UNREACHABLE); - dMax.assign(numVerts, DIST_UNREACHABLE); - dMin[mindist_g[srcVertex].index] = 0; - - using boost::make_iterator_property_map; - + + NodeFilter<GraphT> nf(&deadNodes, &g); + StartFilter<GraphT> sf(&g); + + /* minimum distance needs to run on a graph with .*start unreachable + * from start */ + typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph; + const StartFilteredGraph mindist_g(g, sf); + + /* maximum distance needs to run on a graph without cycles & nodes + * reachable from cycles */ + typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph; + const NodeFilteredGraph maxdist_g(g, nf); + + // Record distance of each vertex from source using one of the following + // algorithms. + + /* note: filtered graphs have same num_{vertices,edges} as base */ + + dMin.assign(numVerts, DIST_UNREACHABLE); + dMax.assign(numVerts, DIST_UNREACHABLE); + dMin[mindist_g[srcVertex].index] = 0; + + using boost::make_iterator_property_map; + auto min_index_map = get(vertex_index, mindist_g); - - breadth_first_search(mindist_g, srcVertex, - visitor(make_bfs_visitor(record_distances( + + breadth_first_search(mindist_g, srcVertex, + visitor(make_bfs_visitor(record_distances( make_iterator_property_map(dMin.begin(), min_index_map), boost::on_tree_edge()))) .color_map(make_small_color_map(mindist_g))); - + auto max_index_map = get(vertex_index, maxdist_g); - - dag_shortest_paths(maxdist_g, srcVertex, + + dag_shortest_paths(maxdist_g, srcVertex, distance_map(make_iterator_property_map(dMax.begin(), max_index_map)) .weight_map(make_constant_property<EdgeT>(-1)) .color_map(make_small_color_map(maxdist_g))); - - for (size_t i = 0; i < numVerts; i++) { - if (dMin[i] > DIST_UNREACHABLE) { - dMin[i] = DIST_UNREACHABLE; - } - DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]); - if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) { - dMax[i] = -DIST_INFINITY; /* max depths currently negative */ - DEBUG_PRINTF("bumping max to %d\n", dMax[i]); - } else if (dMax[i] >= DIST_UNREACHABLE - || dMax[i] < -DIST_UNREACHABLE) { - dMax[i] = -DIST_UNREACHABLE; - DEBUG_PRINTF("bumping max to %d\n", dMax[i]); - } - } -} - -/** - * \brief Convert the integer distance we use in our shortest path calculations - * to a \ref depth value. - */ -static -depth depthFromDistance(int val) { - assert(val >= 0); - if (val >= DIST_UNREACHABLE) { - return depth::unreachable(); - } else if (val == DIST_INFINITY) { - return depth::infinity(); - } - return depth((u32)val); -} - -static -DepthMinMax getDepths(u32 idx, const vector<int> &dMin, - const vector<int> &dMax) { - DepthMinMax d(depthFromDistance(dMin[idx]), - depthFromDistance(-1 * dMax[idx])); - DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str()); - assert(d.min <= d.max); - return d; -} - -template<class Graph, class Output> -static + + for (size_t i = 0; i < numVerts; i++) { + if (dMin[i] > DIST_UNREACHABLE) { + dMin[i] = DIST_UNREACHABLE; + } + DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]); + if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) { + dMax[i] = -DIST_INFINITY; /* max depths currently negative */ + DEBUG_PRINTF("bumping max to %d\n", dMax[i]); + } else if (dMax[i] >= DIST_UNREACHABLE + || dMax[i] < -DIST_UNREACHABLE) { + dMax[i] = -DIST_UNREACHABLE; + DEBUG_PRINTF("bumping max to %d\n", dMax[i]); + } + } +} + +/** + * \brief Convert the integer distance we use in our shortest path calculations + * to a \ref depth value. + */ +static +depth depthFromDistance(int val) { + assert(val >= 0); + if (val >= DIST_UNREACHABLE) { + return depth::unreachable(); + } else if (val == DIST_INFINITY) { + return depth::infinity(); + } + return depth((u32)val); +} + +static +DepthMinMax getDepths(u32 idx, const vector<int> &dMin, + const vector<int> &dMax) { + DepthMinMax d(depthFromDistance(dMin[idx]), + depthFromDistance(-1 * dMax[idx])); + DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str()); + assert(d.min <= d.max); + return d; +} + +template<class Graph, class Output> +static void calcAndStoreDepth(const Graph &g, - const typename Graph::vertex_descriptor src, - const vector<bool> &deadNodes, - vector<int> &dMin /* util */, - vector<int> &dMax /* util */, - vector<Output> &depths, - DepthMinMax Output::*store) { + const typename Graph::vertex_descriptor src, + const vector<bool> &deadNodes, + vector<int> &dMin /* util */, + vector<int> &dMax /* util */, + vector<Output> &depths, + DepthMinMax Output::*store) { calcDepthFromSource(g, src, deadNodes, dMin, dMax); - - for (auto v : vertices_range(g)) { - u32 idx = g[v].index; - assert(idx < depths.size()); - Output &d = depths.at(idx); - d.*store = getDepths(idx, dMin, dMax); - } -} - + + for (auto v : vertices_range(g)) { + u32 idx = g[v].index; + assert(idx < depths.size()); + Output &d = depths.at(idx); + d.*store = getDepths(idx, dMin, dMax); + } +} + vector<NFAVertexDepth> calcDepths(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - const size_t numVertices = num_vertices(g); - + assert(hasCorrectlyNumberedVertices(g)); + const size_t numVertices = num_vertices(g); + vector<NFAVertexDepth> depths(numVertices); - vector<int> dMin; - vector<int> dMax; - - /* - * create a filtered graph for max depth calculations: all nodes/edges - * reachable from a loop need to be removed - */ + vector<int> dMin; + vector<int> dMax; + + /* + * create a filtered graph for max depth calculations: all nodes/edges + * reachable from a loop need to be removed + */ auto deadNodes = findLoopReachable(g, g.start); - - DEBUG_PRINTF("doing start\n"); + + DEBUG_PRINTF("doing start\n"); calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStart); - DEBUG_PRINTF("doing startds\n"); + DEBUG_PRINTF("doing startds\n"); calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStartDotStar); return depths; -} - +} + vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - const size_t numVertices = num_vertices(g); - + assert(hasCorrectlyNumberedVertices(g)); + const size_t numVertices = num_vertices(g); + vector<NFAVertexRevDepth> depths(numVertices); - vector<int> dMin; - vector<int> dMax; - - /* reverse the graph before walking it */ + vector<int> dMin; + vector<int> dMax; + + /* reverse the graph before walking it */ typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; const RevNFAGraph rg(g); - + assert(num_vertices(g) == num_vertices(rg)); - /* - * create a filtered graph for max depth calculations: all nodes/edges - * reachable from a loop need to be removed - */ + /* + * create a filtered graph for max depth calculations: all nodes/edges + * reachable from a loop need to be removed + */ auto deadNodes = findLoopReachable(rg, g.acceptEod); - - DEBUG_PRINTF("doing accept\n"); - calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( + + DEBUG_PRINTF("doing accept\n"); + calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( rg, g.accept, deadNodes, dMin, dMax, depths, - &NFAVertexRevDepth::toAccept); - DEBUG_PRINTF("doing accepteod\n"); - deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. - calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( + &NFAVertexRevDepth::toAccept); + DEBUG_PRINTF("doing accepteod\n"); + deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. + calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( rg, g.acceptEod, deadNodes, dMin, dMax, depths, - &NFAVertexRevDepth::toAcceptEod); + &NFAVertexRevDepth::toAcceptEod); return depths; -} - +} + vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - const size_t numVertices = num_vertices(g); - + assert(hasCorrectlyNumberedVertices(g)); + const size_t numVertices = num_vertices(g); + vector<NFAVertexBidiDepth> depths(numVertices); - vector<int> dMin; - vector<int> dMax; - - /* - * create a filtered graph for max depth calculations: all nodes/edges - * reachable from a loop need to be removed - */ + vector<int> dMin; + vector<int> dMax; + + /* + * create a filtered graph for max depth calculations: all nodes/edges + * reachable from a loop need to be removed + */ auto deadNodes = findLoopReachable(g, g.start); - - DEBUG_PRINTF("doing start\n"); + + DEBUG_PRINTF("doing start\n"); calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( g, g.start, deadNodes, dMin, dMax, depths, - &NFAVertexBidiDepth::fromStart); - DEBUG_PRINTF("doing startds\n"); + &NFAVertexBidiDepth::fromStart); + DEBUG_PRINTF("doing startds\n"); calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( g, g.startDs, deadNodes, dMin, dMax, depths, - &NFAVertexBidiDepth::fromStartDotStar); - - /* Now go backwards */ + &NFAVertexBidiDepth::fromStartDotStar); + + /* Now go backwards */ typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; const RevNFAGraph rg(g); deadNodes = findLoopReachable(rg, g.acceptEod); - - DEBUG_PRINTF("doing accept\n"); - calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( + + DEBUG_PRINTF("doing accept\n"); + calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( rg, g.accept, deadNodes, dMin, dMax, depths, - &NFAVertexBidiDepth::toAccept); - DEBUG_PRINTF("doing accepteod\n"); - deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. - calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( + &NFAVertexBidiDepth::toAccept); + DEBUG_PRINTF("doing accepteod\n"); + deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. + calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( rg, g.acceptEod, deadNodes, dMin, dMax, depths, - &NFAVertexBidiDepth::toAcceptEod); + &NFAVertexBidiDepth::toAcceptEod); return depths; -} - +} + vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src) { - assert(hasCorrectlyNumberedVertices(g)); - const size_t numVertices = num_vertices(g); - + assert(hasCorrectlyNumberedVertices(g)); + const size_t numVertices = num_vertices(g); + auto deadNodes = findLoopReachable(g, g.start); - - vector<int> dMin, dMax; + + vector<int> dMin, dMax; calcDepthFromSource(g, src, deadNodes, dMin, dMax); - + vector<DepthMinMax> depths(numVertices); - - for (auto v : vertices_range(g)) { + + for (auto v : vertices_range(g)) { auto idx = g[v].index; - depths.at(idx) = getDepths(idx, dMin, dMax); - } + depths.at(idx) = getDepths(idx, dMin, dMax); + } return depths; -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h index 36cca87e84..418e5e4412 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h @@ -1,99 +1,99 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief NFA graph vertex depth calculations. - */ - + * \brief NFA graph vertex depth calculations. + */ + #ifndef NG_DEPTH_H #define NG_DEPTH_H - + #include "ue2common.h" -#include "nfagraph/ng_holder.h" -#include "util/depth.h" - -#include <vector> - -namespace ue2 { - -/** - * \brief Encapsulates min/max depths relative to the start and startDs - * vertices. - */ -struct NFAVertexDepth { - DepthMinMax fromStart; - DepthMinMax fromStartDotStar; -}; - -/** - * \brief Encapsulates min/max depths relative to the accept and acceptEod - * vertices. - */ -struct NFAVertexRevDepth { - DepthMinMax toAccept; - DepthMinMax toAcceptEod; -}; - -/** - * \brief Encapsulates min/max depths relative to all of our special vertices. - */ +#include "nfagraph/ng_holder.h" +#include "util/depth.h" + +#include <vector> + +namespace ue2 { + +/** + * \brief Encapsulates min/max depths relative to the start and startDs + * vertices. + */ +struct NFAVertexDepth { + DepthMinMax fromStart; + DepthMinMax fromStartDotStar; +}; + +/** + * \brief Encapsulates min/max depths relative to the accept and acceptEod + * vertices. + */ +struct NFAVertexRevDepth { + DepthMinMax toAccept; + DepthMinMax toAcceptEod; +}; + +/** + * \brief Encapsulates min/max depths relative to all of our special vertices. + */ struct NFAVertexBidiDepth { DepthMinMax fromStart; DepthMinMax fromStartDotStar; DepthMinMax toAccept; DepthMinMax toAcceptEod; -}; - -/** +}; + +/** * \brief Calculate depths from start and startDs. Returns them in a vector, * indexed by vertex index. - */ + */ std::vector<NFAVertexDepth> calcDepths(const NGHolder &g); - -/** + +/** * \brief Calculate depths to accept and acceptEod. Returns them in a vector, * indexed by vertex index. - */ + */ std::vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g); - -/** + +/** * \brief Calculate depths to/from all special vertices. Returns them in a * vector, indexed by vertex index. - */ + */ std::vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g); - + /** * \brief Calculate the (min, max) depths from the given \p src to every vertex * in the graph and return them in a vector, indexed by \p vertex_index. */ std::vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src); - -} // namespace ue2 - + +} // namespace ue2 + #endif // NG_DEPTH_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp index d6a064d12f..2589881009 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp @@ -1,73 +1,73 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Calculate dominator and post-dominator trees. - * - * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm. - */ -#include "ng_dominators.h" - -#include "ue2common.h" -#include "ng_holder.h" -#include "ng_util.h" - -#include <boost-patched/graph/dominator_tree.hpp> // locally patched version + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Calculate dominator and post-dominator trees. + * + * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm. + */ +#include "ng_dominators.h" + +#include "ue2common.h" +#include "ng_holder.h" +#include "ng_util.h" + +#include <boost-patched/graph/dominator_tree.hpp> // locally patched version #include <boost-patched/graph/reverse_graph.hpp> - -using namespace std; -using boost::make_assoc_property_map; -using boost::make_iterator_property_map; - -namespace ue2 { - -template <class Graph> + +using namespace std; +using boost::make_assoc_property_map; +using boost::make_iterator_property_map; + +namespace ue2 { + +template <class Graph> unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g, typename Graph::vertex_descriptor source) { using Vertex = typename Graph::vertex_descriptor; - const size_t num_verts = num_vertices(g); - auto index_map = get(&NFAGraphVertexProps::index, g); - - vector<size_t> dfnum(num_verts, 0); + const size_t num_verts = num_vertices(g); + auto index_map = get(&NFAGraphVertexProps::index, g); + + vector<size_t> dfnum(num_verts, 0); vector<Vertex> parents(num_verts, Graph::null_vertex()); - - auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map); - auto parent_map = make_iterator_property_map(parents.begin(), index_map); + + auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map); + auto parent_map = make_iterator_property_map(parents.begin(), index_map); vector<Vertex> vertices_by_dfnum(num_verts, Graph::null_vertex()); - - // Output map. + + // Output map. vector<Vertex> doms(num_verts, Graph::null_vertex()); auto dom_map = make_iterator_property_map(doms.begin(), index_map); - - boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, - parent_map, vertices_by_dfnum, - dom_map); - + + boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, + parent_map, vertices_by_dfnum, + dom_map); + /* Translate back to an NFAVertex map */ unordered_map<NFAVertex, NFAVertex> doms2; doms2.reserve(num_verts); @@ -78,17 +78,17 @@ unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g, } } return doms2; -} - +} + unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); return calcDominators(g, g.start); -} - +} + unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); return calcDominators(boost::reverse_graph<NGHolder, const NGHolder &>(g), - g.acceptEod); -} - -} // namespace ue2 + g.acceptEod); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h index f505b7e471..eefc7e93df 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h @@ -1,50 +1,50 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Calculate dominator and post-dominator trees. - * - * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm. - */ - -#ifndef NG_DOMINATORS_H -#define NG_DOMINATORS_H - -#include "ng_holder.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Calculate dominator and post-dominator trees. + * + * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm. + */ + +#ifndef NG_DOMINATORS_H +#define NG_DOMINATORS_H + +#include "ng_holder.h" + #include <unordered_map> -namespace ue2 { - +namespace ue2 { + std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g); - + std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g); - -} // namespace ue2 - -#endif // NG_DOMINATORS_H + +} // namespace ue2 + +#endif // NG_DOMINATORS_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h index 3e12d1d22e..6b22ac2e21 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h @@ -1,175 +1,175 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Dump code for NFA graphs. - */ - -#ifndef NG_DUMP_H -#define NG_DUMP_H - -#include "grey.h" -#include "ng_holder.h" // for graph types -#include "ue2common.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Dump code for NFA graphs. + */ + +#ifndef NG_DUMP_H +#define NG_DUMP_H + +#include "grey.h" +#include "ng_holder.h" // for graph types +#include "ue2common.h" + #include <unordered_map> -#ifdef DUMP_SUPPORT -#include <fstream> -#endif - -struct RoseEngine; - -namespace ue2 { - -class NGHolder; -class NG; +#ifdef DUMP_SUPPORT +#include <fstream> +#endif + +struct RoseEngine; + +namespace ue2 { + +class NGHolder; +class NG; class ExpressionInfo; -class ReportManager; - -// Implementations for stubs below -- all have the suffix "Impl". - -#ifdef DUMP_SUPPORT - -template <typename GraphT> -void dumpGraphImpl(const char *name, const GraphT &g); - -template <typename GraphT> -void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); - +class ReportManager; + +// Implementations for stubs below -- all have the suffix "Impl". + +#ifdef DUMP_SUPPORT + +template <typename GraphT> +void dumpGraphImpl(const char *name, const GraphT &g); + +template <typename GraphT> +void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); + void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, const char *name, const Grey &grey); - -void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, - const Grey &grey); - -void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr, - u32 comp, u32 plan, const Grey &grey); - -void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, - const char *stageName, const Grey &grey); - -// Variant that takes a region map as well. -void dumpHolderImpl(const NGHolder &h, + +void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, + const Grey &grey); + +void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr, + u32 comp, u32 plan, const Grey &grey); + +void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, + const char *stageName, const Grey &grey); + +// Variant that takes a region map as well. +void dumpHolderImpl(const NGHolder &h, const std::unordered_map<NFAVertex, u32> ®ion_map, - unsigned int stageNumber, const char *stageName, - const Grey &grey); - -template <typename GraphT> -static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { - dumpGraphImpl(name, g); -} - -#endif // DUMP_SUPPORT - -// Stubs which call through to dump code if compiled in. - -UNUSED static inline + unsigned int stageNumber, const char *stageName, + const Grey &grey); + +template <typename GraphT> +static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { + dumpGraphImpl(name, g); +} + +#endif // DUMP_SUPPORT + +// Stubs which call through to dump code if compiled in. + +UNUSED static inline void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, UNUSED const char *name, UNUSED const Grey &grey) { -#ifdef DUMP_SUPPORT +#ifdef DUMP_SUPPORT dumpDotWrapperImpl(g, expr, name, grey); -#endif -} - -UNUSED static inline -void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name, - UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) { -#ifdef DUMP_SUPPORT - dumpComponentImpl(h, name, expr, comp, grey); -#endif -} - -UNUSED static inline -void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name, - UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan, - UNUSED const Grey &grey) { -#ifdef DUMP_SUPPORT - dumpSomSubComponentImpl(h, name, expr, comp, plan, grey); -#endif -} - -UNUSED static inline -void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber, - UNUSED const char *name, UNUSED const Grey &grey) { -#ifdef DUMP_SUPPORT - dumpHolderImpl(h, stageNumber, name, grey); -#endif -} - -UNUSED static inline -void dumpHolder(UNUSED const NGHolder &h, +#endif +} + +UNUSED static inline +void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name, + UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) { +#ifdef DUMP_SUPPORT + dumpComponentImpl(h, name, expr, comp, grey); +#endif +} + +UNUSED static inline +void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name, + UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan, + UNUSED const Grey &grey) { +#ifdef DUMP_SUPPORT + dumpSomSubComponentImpl(h, name, expr, comp, plan, grey); +#endif +} + +UNUSED static inline +void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber, + UNUSED const char *name, UNUSED const Grey &grey) { +#ifdef DUMP_SUPPORT + dumpHolderImpl(h, stageNumber, name, grey); +#endif +} + +UNUSED static inline +void dumpHolder(UNUSED const NGHolder &h, UNUSED const std::unordered_map<NFAVertex, u32> ®ion_map, - UNUSED unsigned int stageNumber, UNUSED const char *name, - UNUSED const Grey &grey) { -#ifdef DUMP_SUPPORT - dumpHolderImpl(h, region_map, stageNumber, name, grey); -#endif -} - -#ifdef DUMP_SUPPORT -void dumpReportManager(const ReportManager &rm, const Grey &grey); -void dumpSmallWrite(const RoseEngine *rose, const Grey &grey); -#else -static UNUSED -void dumpReportManager(const ReportManager &, const Grey &) { -} -static UNUSED -void dumpSmallWrite(const RoseEngine *, const Grey &) { -} -#endif - -#ifdef DUMP_SUPPORT -// replace boost's graphviz writer -template <typename GraphT, typename WriterT, typename VertexID> -static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w, - const VertexID &vertex_id) { - const std::string delimiter(" -> "); - out << "digraph G {" << std::endl; - - typename boost::graph_traits<GraphT>::vertex_iterator i, end; - for(boost::tie(i,end) = vertices(g); i != end; ++i) { - out << get(vertex_id, *i); - w(out, *i); // print vertex attributes - out << ";" << std::endl; - } - typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end; - for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) { - out << (get(vertex_id, source(*ei, g))) << delimiter - << (get(vertex_id, target(*ei, g))) << " "; - w(out, *ei); // print edge attributes - out << ";" << std::endl; - } - out << "}" << std::endl; -} - -#endif // DUMP_SUPPORT - -} // namespace ue2 - -#endif // NG_DUMP_H + UNUSED unsigned int stageNumber, UNUSED const char *name, + UNUSED const Grey &grey) { +#ifdef DUMP_SUPPORT + dumpHolderImpl(h, region_map, stageNumber, name, grey); +#endif +} + +#ifdef DUMP_SUPPORT +void dumpReportManager(const ReportManager &rm, const Grey &grey); +void dumpSmallWrite(const RoseEngine *rose, const Grey &grey); +#else +static UNUSED +void dumpReportManager(const ReportManager &, const Grey &) { +} +static UNUSED +void dumpSmallWrite(const RoseEngine *, const Grey &) { +} +#endif + +#ifdef DUMP_SUPPORT +// replace boost's graphviz writer +template <typename GraphT, typename WriterT, typename VertexID> +static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w, + const VertexID &vertex_id) { + const std::string delimiter(" -> "); + out << "digraph G {" << std::endl; + + typename boost::graph_traits<GraphT>::vertex_iterator i, end; + for(boost::tie(i,end) = vertices(g); i != end; ++i) { + out << get(vertex_id, *i); + w(out, *i); // print vertex attributes + out << ";" << std::endl; + } + typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end; + for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) { + out << (get(vertex_id, source(*ei, g))) << delimiter + << (get(vertex_id, target(*ei, g))) << " "; + w(out, *ei); // print edge attributes + out << ";" << std::endl; + } + out << "}" << std::endl; +} + +#endif // DUMP_SUPPORT + +} // namespace ue2 + +#endif // NG_DUMP_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp index b8354bd42a..ed2de70598 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp @@ -1,186 +1,186 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Edge redundancy graph reductions. - */ -#include "ng_edge_redundancy.h" - -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "ue2common.h" -#include "parser/position.h" -#include "util/compile_context.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Edge redundancy graph reductions. + */ +#include "ng_edge_redundancy.h" + +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "ue2common.h" +#include "parser/position.h" +#include "util/compile_context.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" - -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -/* reverse edge redundancy removal is possible but is not implemented as it - * regressed rose pattern support in the regression suite: 19026 - 19027 - * (foo.{1,5}b?ar) - * - * If rose becomes smarter we can reimplement. - */ - -static never_inline -bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad, - const set<NFAVertex> &happy) { - /* need to check if for each vertex in sad if it has an edge to a happy - * vertex */ - for (auto u : sad) { - bool ok = false; - for (auto v : adjacent_vertices_range(u, g)) { - if (contains(happy, v)) { - ok = true; - break; - } - } - - if (!ok) { - return false; - } - } - - return true; -} - -static never_inline -bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad, - const set<NFAVertex> &happy) { - /* need to check if for each vertex in sad if it has an edge to a happy - * vertex */ - for (auto v : sad) { - bool ok = false; - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (contains(happy, u)) { - ok = true; - break; - } - } - - if (!ok) { - return false; - } - } - - return true; -} - -/** \brief Redundant self-loop removal. - * - * A self loop on a vertex v can be removed if: - * - * For every vertex u in pred(v) either: - * 1: u has a self loop and cr(v) subset of cr(u) - * OR - * 2: u has an edge to vertex satisfying criterion 1 - * - * Note: we remove all dead loops at the end of the pass and do not check the - * live status of the loops we are depending on during the analysis. - * - * We don't end up in situations where we remove a group of loops which depend - * on each other as: - * - * - there must be at least one vertex not in the group which is a pred of some - * member of the group (as we don't remove loops on specials) - * - * For each pred vertex of the group: - * - the vertex must be 'sad' as it is not part of the group - * - therefore it must have edges to each member of the group (to happy, trans) - * - therefore the group is enabled simultaneously - * - due to internal group edges, all members will still be active after the - * next character. - * - * Actually, the vertex redundancy code will merge the entire group into one - * cyclic state. - */ -static -bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { - unsigned dead_count = 0; - - set<NFAVertex> happy; - set<NFAVertex> sad; - - for (auto v : vertices_range(g)) { - if (is_special(v, g) || !hasSelfLoop(v, g)) { - continue; - } - - const CharReach &cr_v = g[v].char_reach; - - happy.clear(); - sad.clear(); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - if (!hasSelfLoop(u, g)) { - sad.insert(u); - continue; - } - - if (ignore_starts) { - if (u == g.startDs || is_virtual_start(u, g)) { - sad.insert(u); - continue; - } - } - - const CharReach &cr_u = g[u].char_reach; - - if ((cr_u & cr_v) != cr_v) { - sad.insert(u); - continue; - } - - happy.insert(u); - } - - if (!happy.empty() && checkVerticesFwd(g, sad, happy)) { - dead_count++; - remove_edge(v, v, g); - } - } - - DEBUG_PRINTF("found %u removable edges.\n", dead_count); - return dead_count; -} - +#include "util/graph_range.h" + +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +/* reverse edge redundancy removal is possible but is not implemented as it + * regressed rose pattern support in the regression suite: 19026 - 19027 + * (foo.{1,5}b?ar) + * + * If rose becomes smarter we can reimplement. + */ + +static never_inline +bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad, + const set<NFAVertex> &happy) { + /* need to check if for each vertex in sad if it has an edge to a happy + * vertex */ + for (auto u : sad) { + bool ok = false; + for (auto v : adjacent_vertices_range(u, g)) { + if (contains(happy, v)) { + ok = true; + break; + } + } + + if (!ok) { + return false; + } + } + + return true; +} + +static never_inline +bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad, + const set<NFAVertex> &happy) { + /* need to check if for each vertex in sad if it has an edge to a happy + * vertex */ + for (auto v : sad) { + bool ok = false; + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (contains(happy, u)) { + ok = true; + break; + } + } + + if (!ok) { + return false; + } + } + + return true; +} + +/** \brief Redundant self-loop removal. + * + * A self loop on a vertex v can be removed if: + * + * For every vertex u in pred(v) either: + * 1: u has a self loop and cr(v) subset of cr(u) + * OR + * 2: u has an edge to vertex satisfying criterion 1 + * + * Note: we remove all dead loops at the end of the pass and do not check the + * live status of the loops we are depending on during the analysis. + * + * We don't end up in situations where we remove a group of loops which depend + * on each other as: + * + * - there must be at least one vertex not in the group which is a pred of some + * member of the group (as we don't remove loops on specials) + * + * For each pred vertex of the group: + * - the vertex must be 'sad' as it is not part of the group + * - therefore it must have edges to each member of the group (to happy, trans) + * - therefore the group is enabled simultaneously + * - due to internal group edges, all members will still be active after the + * next character. + * + * Actually, the vertex redundancy code will merge the entire group into one + * cyclic state. + */ +static +bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { + unsigned dead_count = 0; + + set<NFAVertex> happy; + set<NFAVertex> sad; + + for (auto v : vertices_range(g)) { + if (is_special(v, g) || !hasSelfLoop(v, g)) { + continue; + } + + const CharReach &cr_v = g[v].char_reach; + + happy.clear(); + sad.clear(); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + if (!hasSelfLoop(u, g)) { + sad.insert(u); + continue; + } + + if (ignore_starts) { + if (u == g.startDs || is_virtual_start(u, g)) { + sad.insert(u); + continue; + } + } + + const CharReach &cr_u = g[u].char_reach; + + if ((cr_u & cr_v) != cr_v) { + sad.insert(u); + continue; + } + + happy.insert(u); + } + + if (!happy.empty() && checkVerticesFwd(g, sad, happy)) { + dead_count++; + remove_edge(v, v, g); + } + } + + DEBUG_PRINTF("found %u removable edges.\n", dead_count); + return dead_count; +} + static bool checkReportsRev(const NGHolder &g, NFAVertex v, const set<NFAVertex> &happy) { @@ -203,336 +203,336 @@ bool checkReportsRev(const NGHolder &g, NFAVertex v, return is_subset_of(g[v].reports, happy_reports); } -/** \brief Redundant self-loop removal (reverse version). - * - * A self loop on a vertex v can be removed if: - * - * For every vertex u in succ(v) either: - * 1: u has a self loop and cr(v) is a subset of cr(u). - * OR - * 2: u is not an accept and u has an edge from a vertex satisfying - * criterion 1. - * OR - * 3: u is in an accept and u has an edge from a vertex v' satisfying - * criterion 1 and report(v) == report(v'). - */ -static -bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { - unsigned dead_count = 0; - - set<NFAVertex> happy; - set<NFAVertex> sad; - - for (auto v : vertices_range(g)) { - if (is_special(v, g) || !hasSelfLoop(v, g)) { - continue; - } - - const CharReach &cr_v = g[v].char_reach; - - happy.clear(); - sad.clear(); - - for (auto u : adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - if (!hasSelfLoop(u, g)) { - sad.insert(u); - continue; - } - - assert(!is_special(u, g)); - - const CharReach &cr_u = g[u].char_reach; - - if (!cr_v.isSubsetOf(cr_u)) { - sad.insert(u); - continue; - } - - happy.insert(u); - } - +/** \brief Redundant self-loop removal (reverse version). + * + * A self loop on a vertex v can be removed if: + * + * For every vertex u in succ(v) either: + * 1: u has a self loop and cr(v) is a subset of cr(u). + * OR + * 2: u is not an accept and u has an edge from a vertex satisfying + * criterion 1. + * OR + * 3: u is in an accept and u has an edge from a vertex v' satisfying + * criterion 1 and report(v) == report(v'). + */ +static +bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { + unsigned dead_count = 0; + + set<NFAVertex> happy; + set<NFAVertex> sad; + + for (auto v : vertices_range(g)) { + if (is_special(v, g) || !hasSelfLoop(v, g)) { + continue; + } + + const CharReach &cr_v = g[v].char_reach; + + happy.clear(); + sad.clear(); + + for (auto u : adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + if (!hasSelfLoop(u, g)) { + sad.insert(u); + continue; + } + + assert(!is_special(u, g)); + + const CharReach &cr_u = g[u].char_reach; + + if (!cr_v.isSubsetOf(cr_u)) { + sad.insert(u); + continue; + } + + happy.insert(u); + } + if (!happy.empty() && checkVerticesRev(g, sad, happy) && checkReportsRev(g, v, happy)) { - dead_count++; - remove_edge(v, v, g); - } - } - - DEBUG_PRINTF("found %u removable edges.\n", dead_count); - return dead_count; -} - -static -bool parentsSubsetOf(const NGHolder &g, NFAVertex v, - const flat_set<NFAVertex> &other_parents, NFAVertex other, - map<NFAVertex, bool> &done) { - map<NFAVertex, bool>::const_iterator dit = done.find(v); - if (dit != done.end()) { - return dit->second; - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v && contains(other_parents, other)) { - continue; - } - - if (!contains(other_parents, u)) { - done[v] = false; - return false; - } - } - - done[v] = true; - return true; -} - -static -bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, - const flat_set<NFAVertex> &fixed_parents, - const NFAEdge &candidate, - map<NFAVertex, bool> &done) { - NFAVertex w = source(candidate, g); - NFAVertex v = target(candidate, g); - const CharReach &cr_w = g[w].char_reach; - const CharReach &cr_u = g[fixed_src].char_reach; - - /* There is no reason why self loops cannot be considered by this - * transformation but the removal is already handled by many other - * transformations. */ - if (w == v) { - return false; - } - - if (is_special(w, g)) { - return false; - } - - if (!cr_w.isSubsetOf(cr_u)) { - return false; - } - - /* check that each parent of w is also a parent of u */ - if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) { - return false; - } - + dead_count++; + remove_edge(v, v, g); + } + } + + DEBUG_PRINTF("found %u removable edges.\n", dead_count); + return dead_count; +} + +static +bool parentsSubsetOf(const NGHolder &g, NFAVertex v, + const flat_set<NFAVertex> &other_parents, NFAVertex other, + map<NFAVertex, bool> &done) { + map<NFAVertex, bool>::const_iterator dit = done.find(v); + if (dit != done.end()) { + return dit->second; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v && contains(other_parents, other)) { + continue; + } + + if (!contains(other_parents, u)) { + done[v] = false; + return false; + } + } + + done[v] = true; + return true; +} + +static +bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, + const flat_set<NFAVertex> &fixed_parents, + const NFAEdge &candidate, + map<NFAVertex, bool> &done) { + NFAVertex w = source(candidate, g); + NFAVertex v = target(candidate, g); + const CharReach &cr_w = g[w].char_reach; + const CharReach &cr_u = g[fixed_src].char_reach; + + /* There is no reason why self loops cannot be considered by this + * transformation but the removal is already handled by many other + * transformations. */ + if (w == v) { + return false; + } + + if (is_special(w, g)) { + return false; + } + + if (!cr_w.isSubsetOf(cr_u)) { + return false; + } + + /* check that each parent of w is also a parent of u */ + if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) { + return false; + } + DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n", g[w].index, g[v].index, g[fixed_src].index, g[v].index); - return true; -} - -static never_inline -void checkLargeOutU(const NGHolder &g, NFAVertex u, - const flat_set<NFAVertex> &parents_u, - flat_set<NFAVertex> &possible_w, - map<NFAVertex, bool> &done, - set<NFAEdge> *dead) { - /* only vertices with at least one parent in common with u need to be - * considered, and we also only consider potential siblings with subset - * reach. */ - possible_w.clear(); - const CharReach &cr_u = g[u].char_reach; - for (auto p : parents_u) { - for (auto v : adjacent_vertices_range(p, g)) { - const CharReach &cr_w = g[v].char_reach; - if (cr_w.isSubsetOf(cr_u)) { - possible_w.insert(v); - } - } - } - - // If there's only one, it's us, and we have no work to do. - if (possible_w.size() <= 1) { - assert(possible_w.empty() || *possible_w.begin() == u); - return; - } - - for (const auto &e : out_edges_range(u, g)) { - const NFAVertex v = target(e, g); - - if (is_special(v, g)) { - continue; - } - - if (contains(*dead, e)) { - continue; - } - - /* Now need check to find any edges which can be removed due to the - * existence of edge e */ - for (const auto &e2 : in_edges_range(v, g)) { - if (e == e2 || contains(*dead, e2)) { - continue; - } - - const NFAVertex w = source(e2, g); - if (!contains(possible_w, w)) { - continue; - } - - if (checkFwdCandidate(g, u, parents_u, e2, done)) { - dead->insert(e2); - } - } - } -} - -static never_inline -void checkSmallOutU(const NGHolder &g, NFAVertex u, - const flat_set<NFAVertex> &parents_u, - map<NFAVertex, bool> &done, - set<NFAEdge> *dead) { - for (const auto &e : out_edges_range(u, g)) { - const NFAVertex v = target(e, g); - - if (is_special(v, g)) { - continue; - } - - if (contains(*dead, e)) { - continue; - } - - /* Now need check to find any edges which can be removed due to the - * existence of edge e */ - for (const auto &e2 : in_edges_range(v, g)) { - if (e == e2 || contains(*dead, e2)) { - continue; - } - - if (checkFwdCandidate(g, u, parents_u, e2, done)) { - dead->insert(e2); - } - } - } -} - -/** \brief Forward edge redundancy pass. - * - * An edge e from w to v is redundant if there exists an edge e' such that: - * e' is from u to v - * and: reach(w) is a subset of reach(u) - * and: proper_pred(w) is a subset of pred(u) - * and: self_loop(w) implies self_loop(u) or edge from (w to u) - * - * Note: edges to accepts also require report ID checks. - */ -static -bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { - set<NFAEdge> dead; - map<NFAVertex, bool> done; - flat_set<NFAVertex> parents_u; - flat_set<NFAVertex> possible_w; - - for (auto u : vertices_range(g)) { - if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) { - continue; - } - - parents_u.clear(); - pred(g, u, &parents_u); - - done.clear(); + return true; +} + +static never_inline +void checkLargeOutU(const NGHolder &g, NFAVertex u, + const flat_set<NFAVertex> &parents_u, + flat_set<NFAVertex> &possible_w, + map<NFAVertex, bool> &done, + set<NFAEdge> *dead) { + /* only vertices with at least one parent in common with u need to be + * considered, and we also only consider potential siblings with subset + * reach. */ + possible_w.clear(); + const CharReach &cr_u = g[u].char_reach; + for (auto p : parents_u) { + for (auto v : adjacent_vertices_range(p, g)) { + const CharReach &cr_w = g[v].char_reach; + if (cr_w.isSubsetOf(cr_u)) { + possible_w.insert(v); + } + } + } + + // If there's only one, it's us, and we have no work to do. + if (possible_w.size() <= 1) { + assert(possible_w.empty() || *possible_w.begin() == u); + return; + } + + for (const auto &e : out_edges_range(u, g)) { + const NFAVertex v = target(e, g); + + if (is_special(v, g)) { + continue; + } + + if (contains(*dead, e)) { + continue; + } + + /* Now need check to find any edges which can be removed due to the + * existence of edge e */ + for (const auto &e2 : in_edges_range(v, g)) { + if (e == e2 || contains(*dead, e2)) { + continue; + } + + const NFAVertex w = source(e2, g); + if (!contains(possible_w, w)) { + continue; + } + + if (checkFwdCandidate(g, u, parents_u, e2, done)) { + dead->insert(e2); + } + } + } +} + +static never_inline +void checkSmallOutU(const NGHolder &g, NFAVertex u, + const flat_set<NFAVertex> &parents_u, + map<NFAVertex, bool> &done, + set<NFAEdge> *dead) { + for (const auto &e : out_edges_range(u, g)) { + const NFAVertex v = target(e, g); + + if (is_special(v, g)) { + continue; + } + + if (contains(*dead, e)) { + continue; + } + + /* Now need check to find any edges which can be removed due to the + * existence of edge e */ + for (const auto &e2 : in_edges_range(v, g)) { + if (e == e2 || contains(*dead, e2)) { + continue; + } + + if (checkFwdCandidate(g, u, parents_u, e2, done)) { + dead->insert(e2); + } + } + } +} + +/** \brief Forward edge redundancy pass. + * + * An edge e from w to v is redundant if there exists an edge e' such that: + * e' is from u to v + * and: reach(w) is a subset of reach(u) + * and: proper_pred(w) is a subset of pred(u) + * and: self_loop(w) implies self_loop(u) or edge from (w to u) + * + * Note: edges to accepts also require report ID checks. + */ +static +bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { + set<NFAEdge> dead; + map<NFAVertex, bool> done; + flat_set<NFAVertex> parents_u; + flat_set<NFAVertex> possible_w; + + for (auto u : vertices_range(g)) { + if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) { + continue; + } + + parents_u.clear(); + pred(g, u, &parents_u); + + done.clear(); if (out_degree(u, g) > 1) { - checkLargeOutU(g, u, parents_u, possible_w, done, &dead); - } else { - checkSmallOutU(g, u, parents_u, done, &dead); - } - } - - if (dead.empty()) { - return false; - } - - DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size()); - remove_edges(dead, g); - pruneUseless(g); - return true; -} - -/** Entry point: Runs all the edge redundancy passes. If SoM is tracked, - * don't consider startDs or virtual starts as cyclic vertices. */ -bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) { - if (!cc.grey.removeEdgeRedundancy) { - return false; - } - - bool changed = false; - changed |= removeEdgeRedundancyNearCyclesFwd(g, som); - changed |= removeEdgeRedundancyNearCyclesRev(g); - changed |= removeEdgeRedundancyFwd(g, som); - return changed; -} - -/** \brief Removes optional stuff from the front of floating patterns, since it's - * redundant with startDs. - * - * For each successor of startDs, remove any in-edges that aren't from either - * start or startDs. This allows us to prune redundant vertices at the start of - * a pattern: - * - * /(hat)?stand --> /stand/ - * - */ -bool removeSiblingsOfStartDotStar(NGHolder &g) { - vector<NFAEdge> dead; - - for (auto v : adjacent_vertices_range(g.startDs, g)) { + checkLargeOutU(g, u, parents_u, possible_w, done, &dead); + } else { + checkSmallOutU(g, u, parents_u, done, &dead); + } + } + + if (dead.empty()) { + return false; + } + + DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size()); + remove_edges(dead, g); + pruneUseless(g); + return true; +} + +/** Entry point: Runs all the edge redundancy passes. If SoM is tracked, + * don't consider startDs or virtual starts as cyclic vertices. */ +bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) { + if (!cc.grey.removeEdgeRedundancy) { + return false; + } + + bool changed = false; + changed |= removeEdgeRedundancyNearCyclesFwd(g, som); + changed |= removeEdgeRedundancyNearCyclesRev(g); + changed |= removeEdgeRedundancyFwd(g, som); + return changed; +} + +/** \brief Removes optional stuff from the front of floating patterns, since it's + * redundant with startDs. + * + * For each successor of startDs, remove any in-edges that aren't from either + * start or startDs. This allows us to prune redundant vertices at the start of + * a pattern: + * + * /(hat)?stand --> /stand/ + * + */ +bool removeSiblingsOfStartDotStar(NGHolder &g) { + vector<NFAEdge> dead; + + for (auto v : adjacent_vertices_range(g.startDs, g)) { DEBUG_PRINTF("checking %zu\n", g[v].index); - if (is_special(v, g)) { - continue; - } - - for (const auto &e : in_edges_range(v, g)) { - NFAVertex u = source(e, g); - if (is_special(u, g)) { - continue; - } + if (is_special(v, g)) { + continue; + } + + for (const auto &e : in_edges_range(v, g)) { + NFAVertex u = source(e, g); + if (is_special(u, g)) { + continue; + } DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index); - dead.push_back(e); - } - } - - if (dead.empty()) { - return false; - } - - DEBUG_PRINTF("found %zu removable edges.\n", dead.size()); - remove_edges(dead, g); - pruneUseless(g); - return true; -} - -/** Removes all edges into virtual starts other than those from start/startDs, - * providing there is an edge from startDs. This operation is an optimisation - * for SOM mode. (see UE-1544) */ -bool optimiseVirtualStarts(NGHolder &g) { - vector<NFAEdge> dead; - for (auto v : adjacent_vertices_range(g.startDs, g)) { - u32 flags = g[v].assert_flags; - if (!(flags & POS_FLAG_VIRTUAL_START)) { - continue; - } - - for (const auto &e : in_edges_range(v, g)) { - if (!is_any_start(source(e, g), g)) { - dead.push_back(e); - } - } - } - - if (dead.empty()) { - return false; - } - - DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size()); - remove_edges(dead, g); - pruneUseless(g); - return true; -} - -} // namespace ue2 + dead.push_back(e); + } + } + + if (dead.empty()) { + return false; + } + + DEBUG_PRINTF("found %zu removable edges.\n", dead.size()); + remove_edges(dead, g); + pruneUseless(g); + return true; +} + +/** Removes all edges into virtual starts other than those from start/startDs, + * providing there is an edge from startDs. This operation is an optimisation + * for SOM mode. (see UE-1544) */ +bool optimiseVirtualStarts(NGHolder &g) { + vector<NFAEdge> dead; + for (auto v : adjacent_vertices_range(g.startDs, g)) { + u32 flags = g[v].assert_flags; + if (!(flags & POS_FLAG_VIRTUAL_START)) { + continue; + } + + for (const auto &e : in_edges_range(v, g)) { + if (!is_any_start(source(e, g), g)) { + dead.push_back(e); + } + } + } + + if (dead.empty()) { + return false; + } + + DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size()); + remove_edges(dead, g); + pruneUseless(g); + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h index 08cf31f26c..f589ff727e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h @@ -1,65 +1,65 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Edge redundancy graph reductions. - */ -#ifndef NG_EDGE_REDUNDANCY_H -#define NG_EDGE_REDUNDANCY_H - -#include "som/som.h" - -namespace ue2 { - -class NGHolder; -struct CompileContext; - -/** \brief Entry point: Runs all the edge redundancy passes. */ -bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc); - -/** \brief Removes optional stuff from the front of floating patterns, since - * it's redundant with startDs. - * - * For each successor of startDs, remove any in-edges that aren't from either - * start or startDs. This allows us to prune redundant vertices at the start of - * a pattern: - * - * /(hat)?stand --> /stand/ - * - */ -bool removeSiblingsOfStartDotStar(NGHolder &g); - -/** \brief Removes all edges into virtual starts other than those from - * start/startDs, providing there is an edge from startDs. - * - * This operation is an optimisation for SOM mode. (see UE-1544) */ -bool optimiseVirtualStarts(NGHolder &g); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Edge redundancy graph reductions. + */ +#ifndef NG_EDGE_REDUNDANCY_H +#define NG_EDGE_REDUNDANCY_H + +#include "som/som.h" + +namespace ue2 { + +class NGHolder; +struct CompileContext; + +/** \brief Entry point: Runs all the edge redundancy passes. */ +bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc); + +/** \brief Removes optional stuff from the front of floating patterns, since + * it's redundant with startDs. + * + * For each successor of startDs, remove any in-edges that aren't from either + * start or startDs. This allows us to prune redundant vertices at the start of + * a pattern: + * + * /(hat)?stand --> /stand/ + * + */ +bool removeSiblingsOfStartDotStar(NGHolder &g); + +/** \brief Removes all edges into virtual starts other than those from + * start/startDs, providing there is an edge from startDs. + * + * This operation is an optimisation for SOM mode. (see UE-1544) */ +bool optimiseVirtualStarts(NGHolder &g); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp index fba8ce7b74..90d6fd8b75 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp @@ -1,317 +1,317 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Equivalence class graph reduction pass. - */ - -#include "ng_equivalence.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "util/compile_context.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Equivalence class graph reduction pass. + */ + +#include "ng_equivalence.h" + +#include "grey.h" +#include "ng_depth.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "util/compile_context.h" #include "util/flat_containers.h" -#include "util/graph_range.h" +#include "util/graph_range.h" #include "util/make_unique.h" #include "util/unordered.h" - -#include <algorithm> + +#include <algorithm> #include <memory> -#include <set> -#include <stack> -#include <vector> - -using namespace std; - -namespace ue2 { - -enum EquivalenceType { +#include <set> +#include <stack> +#include <vector> + +using namespace std; + +namespace ue2 { + +enum EquivalenceType { LEFT_EQUIVALENCE, - RIGHT_EQUIVALENCE, -}; - -namespace { -class VertexInfo; - -// custom comparison functor for unordered_set and flat_set -struct VertexInfoPtrCmp { - // for flat_set - bool operator()(const VertexInfo *a, const VertexInfo *b) const; -}; - + RIGHT_EQUIVALENCE, +}; + +namespace { +class VertexInfo; + +// custom comparison functor for unordered_set and flat_set +struct VertexInfoPtrCmp { + // for flat_set + bool operator()(const VertexInfo *a, const VertexInfo *b) const; +}; + using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>; -/** Precalculated (and maintained) information about a vertex. */ -class VertexInfo { -public: - VertexInfo(NFAVertex v_in, const NGHolder &g) +/** Precalculated (and maintained) information about a vertex. */ +class VertexInfo { +public: + VertexInfo(NFAVertex v_in, const NGHolder &g) : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), - equivalence_class(~0), vertex_flags(g[v].assert_flags) {} - + equivalence_class(~0), vertex_flags(g[v].assert_flags) {} + VertexInfoSet pred; //!< predecessors of this vertex VertexInfoSet succ; //!< successors of this vertex - NFAVertex v; + NFAVertex v; size_t vert_index; - CharReach cr; - CharReach pred_cr; - CharReach succ_cr; + CharReach cr; + CharReach pred_cr; + CharReach succ_cr; flat_set<u32> edge_tops; /**< tops on edge from start */ - unsigned equivalence_class; - unsigned vertex_flags; -}; - -// compare two vertex info pointers on their vertex index -bool VertexInfoPtrCmp::operator()(const VertexInfo *a, - const VertexInfo *b) const { - return a->vert_index < b->vert_index; -} - -// to avoid traversing infomap each time we need to check the class during -// partitioning, we will cache the information pertaining to a particular class -class ClassInfo { -public: - struct ClassDepth { - ClassDepth() {} - ClassDepth(const NFAVertexDepth &d) - : d1(d.fromStart), d2(d.fromStartDotStar) {} - ClassDepth(const NFAVertexRevDepth &rd) - : d1(rd.toAccept), d2(rd.toAcceptEod) {} - DepthMinMax d1; - DepthMinMax d2; - }; + unsigned equivalence_class; + unsigned vertex_flags; +}; + +// compare two vertex info pointers on their vertex index +bool VertexInfoPtrCmp::operator()(const VertexInfo *a, + const VertexInfo *b) const { + return a->vert_index < b->vert_index; +} + +// to avoid traversing infomap each time we need to check the class during +// partitioning, we will cache the information pertaining to a particular class +class ClassInfo { +public: + struct ClassDepth { + ClassDepth() {} + ClassDepth(const NFAVertexDepth &d) + : d1(d.fromStart), d2(d.fromStartDotStar) {} + ClassDepth(const NFAVertexRevDepth &rd) + : d1(rd.toAccept), d2(rd.toAcceptEod) {} + DepthMinMax d1; + DepthMinMax d2; + }; ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in, - EquivalenceType eq) + EquivalenceType eq) : /* reports only matter for right-equiv */ rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()), vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), /* treat non-special vertices the same */ node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {} - + bool operator==(const ClassInfo &b) const { return node_type == b.node_type && depth.d1 == b.depth.d1 && depth.d2 == b.depth.d2 && cr == b.cr && adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops && vertex_flags == b.vertex_flags && rs == b.rs; } - + size_t hash() const { return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1, depth.d2); - } - -private: - flat_set<ReportID> rs; /* for right equiv only */ - unsigned vertex_flags; + } + +private: + flat_set<ReportID> rs; /* for right equiv only */ + unsigned vertex_flags; flat_set<u32> edge_tops; - CharReach cr; - CharReach adjacent_cr; - unsigned node_type; - ClassDepth depth; -}; - -// work queue class. this contraption has two goals: -// 1. uniqueness of elements -// 2. FILO operation -class WorkQueue { -public: - explicit WorkQueue(unsigned c) { - q.reserve(c); - } - // unique push - void push(unsigned id) { - if (ids.insert(id).second) { - q.push_back(id); - } - } - - // pop - unsigned pop() { - unsigned id = q.back(); - ids.erase(id); - q.pop_back(); - return id; - } - - void append(WorkQueue &other) { - for (const auto &e : other) { - push(e); - } - } - - void clear() { - ids.clear(); - q.clear(); - } - - bool empty() const { - return ids.empty(); - } - - vector<unsigned>::const_iterator begin() const { - return q.begin(); - } - - vector<unsigned>::const_iterator end() const { - return q.end(); - } - - size_t capacity() const { - return q.capacity(); - } -private: + CharReach cr; + CharReach adjacent_cr; + unsigned node_type; + ClassDepth depth; +}; + +// work queue class. this contraption has two goals: +// 1. uniqueness of elements +// 2. FILO operation +class WorkQueue { +public: + explicit WorkQueue(unsigned c) { + q.reserve(c); + } + // unique push + void push(unsigned id) { + if (ids.insert(id).second) { + q.push_back(id); + } + } + + // pop + unsigned pop() { + unsigned id = q.back(); + ids.erase(id); + q.pop_back(); + return id; + } + + void append(WorkQueue &other) { + for (const auto &e : other) { + push(e); + } + } + + void clear() { + ids.clear(); + q.clear(); + } + + bool empty() const { + return ids.empty(); + } + + vector<unsigned>::const_iterator begin() const { + return q.begin(); + } + + vector<unsigned>::const_iterator end() const { + return q.end(); + } + + size_t capacity() const { + return q.capacity(); + } +private: unordered_set<unsigned> ids; //!< stores id's, for uniqueness - vector<unsigned> q; //!< vector of id's that we use as FILO. -}; - -} - -static -bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { - unsigned nonSpecialVertices = 0; - for (auto w : adjacent_vertices_range(v, g)) { - if (!is_special(w, g) && w != v) { - nonSpecialVertices++; - } - } - return nonSpecialVertices == 1; -} - -static -bool inIsIrreducible(NFAVertex &v, const NGHolder &g) { - unsigned nonSpecialVertices = 0; - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g) && u != v) { - nonSpecialVertices++; - } - } - return nonSpecialVertices == 1; -} - -/** Cheaply check whether this graph can't be reduced at all, because it is - * just a chain of vertices with no other edges. */ -static -bool isIrreducible(const NGHolder &g) { - for (auto v : vertices_range(g)) { - // skip specials - if (is_special(v, g)) { - continue; - } - - // we want meaningful in_degree to be 1. we also want to make sure we - // don't count self-loop + 1 incoming edge as not irreducible - if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) { - return false; - } - // we want meaningful out_degree to be 1. we also want to make sure we - // don't count self-loop + 1 outgoing edge as not irreducible - if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) { - return false; - } - } - - return true; -} - -#ifndef NDEBUG -static -bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { - for (const auto &e : in_edges_range(v, g)) { - if (g[e].assert_flags != 0) { - return true; - } - } - for (const auto &e : out_edges_range(v, g)) { - if (g[e].assert_flags != 0) { - return true; - } - } - return false; -} -#endif - -// populate VertexInfo table -static + vector<unsigned> q; //!< vector of id's that we use as FILO. +}; + +} + +static +bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { + unsigned nonSpecialVertices = 0; + for (auto w : adjacent_vertices_range(v, g)) { + if (!is_special(w, g) && w != v) { + nonSpecialVertices++; + } + } + return nonSpecialVertices == 1; +} + +static +bool inIsIrreducible(NFAVertex &v, const NGHolder &g) { + unsigned nonSpecialVertices = 0; + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_special(u, g) && u != v) { + nonSpecialVertices++; + } + } + return nonSpecialVertices == 1; +} + +/** Cheaply check whether this graph can't be reduced at all, because it is + * just a chain of vertices with no other edges. */ +static +bool isIrreducible(const NGHolder &g) { + for (auto v : vertices_range(g)) { + // skip specials + if (is_special(v, g)) { + continue; + } + + // we want meaningful in_degree to be 1. we also want to make sure we + // don't count self-loop + 1 incoming edge as not irreducible + if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) { + return false; + } + // we want meaningful out_degree to be 1. we also want to make sure we + // don't count self-loop + 1 outgoing edge as not irreducible + if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) { + return false; + } + } + + return true; +} + +#ifndef NDEBUG +static +bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { + for (const auto &e : in_edges_range(v, g)) { + if (g[e].assert_flags != 0) { + return true; + } + } + for (const auto &e : out_edges_range(v, g)) { + if (g[e].assert_flags != 0) { + return true; + } + } + return false; +} +#endif + +// populate VertexInfo table +static vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) { const size_t num_verts = num_vertices(g); vector<unique_ptr<VertexInfo>> infos; infos.reserve(num_verts * 2); - vector<VertexInfo *> vertex_map; // indexed by vertex_index property + vector<VertexInfo *> vertex_map; // indexed by vertex_index property vertex_map.resize(num_verts); - - for (auto v : vertices_range(g)) { + + for (auto v : vertices_range(g)) { infos.push_back(std::make_unique<VertexInfo>(v, g)); vertex_map[g[v].index] = infos.back().get(); } - + // now, go through each vertex and populate its predecessor and successor // lists for (auto &vi : infos) { assert(vi); NFAVertex v = vi->v; - - // find predecessors + + // find predecessors for (const auto &e : in_edges_range(v, g)) { - NFAVertex u = source(e, g); + NFAVertex u = source(e, g); VertexInfo *u_vi = vertex_map[g[u].index]; - + vi->pred_cr |= u_vi->cr; vi->pred.insert(u_vi); - - // also set up edge tops - if (is_triggered(g) && u == g.start) { + + // also set up edge tops + if (is_triggered(g) && u == g.start) { vi->edge_tops = g[e].tops; - } - } - - // find successors + } + } + + // find successors for (auto w : adjacent_vertices_range(v, g)) { VertexInfo *w_vi = vertex_map[g[w].index]; vi->succ_cr |= w_vi->cr; vi->succ.insert(w_vi); - } + } assert(!hasEdgeAsserts(vi->v, g)); - } + } return infos; -} - -// store equivalence class in VertexInfo for each vertex -static +} + +// store equivalence class in VertexInfo for each vertex +static vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos, WorkQueue &work_queue, const NGHolder &g, EquivalenceType eq) { const size_t num_verts = infos.size(); - + vector<VertexInfoSet> classes; ue2_unordered_map<ClassInfo, unsigned> classinfomap; @@ -320,323 +320,323 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos, classes.reserve(num_verts); classinfomap.reserve(num_verts); - // get distances from start (or accept) for all vertices - // only one of them is used at a time, never both - vector<NFAVertexDepth> depths; - vector<NFAVertexRevDepth> rdepths; - - if (eq == LEFT_EQUIVALENCE) { + // get distances from start (or accept) for all vertices + // only one of them is used at a time, never both + vector<NFAVertexDepth> depths; + vector<NFAVertexRevDepth> rdepths; + + if (eq == LEFT_EQUIVALENCE) { depths = calcDepths(g); - } else { + } else { rdepths = calcRevDepths(g); - } - - // partition the graph based on CharReach + } + + // partition the graph based on CharReach for (auto &vi : infos) { assert(vi); - ClassInfo::ClassDepth depth; - - if (eq == LEFT_EQUIVALENCE) { + ClassInfo::ClassDepth depth; + + if (eq == LEFT_EQUIVALENCE) { depth = depths[vi->vert_index]; - } else { + } else { depth = rdepths[vi->vert_index]; - } + } ClassInfo ci(g, *vi, depth, eq); - - auto ii = classinfomap.find(ci); - if (ii == classinfomap.end()) { + + auto ii = classinfomap.find(ci); + if (ii == classinfomap.end()) { // vertex is in a new equivalence class by itself. unsigned eq_class = classes.size(); vi->equivalence_class = eq_class; classes.push_back({vi.get()}); classinfomap.emplace(move(ci), eq_class); - } else { + } else { // vertex is added to an existing class. - unsigned eq_class = ii->second; + unsigned eq_class = ii->second; vi->equivalence_class = eq_class; classes.at(eq_class).insert(vi.get()); - - // we now know that this particular class has more than one - // vertex, so we add it to the work queue - work_queue.push(eq_class); - } - } + + // we now know that this particular class has more than one + // vertex, so we add it to the work queue + work_queue.push(eq_class); + } + } DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size()); return classes; -} - -// generalized equivalence processing (left and right) -// basically, goes through every vertex in a class and checks if all successor or -// predecessor classes match in all vertices. if classes mismatch, a vertex is -// split into a separate class, along with all vertices having the same set of -// successor/predecessor classes. the opposite side (successors for left -// equivalence, predecessors for right equivalence) classes get revalidated in -// case of a split. -static +} + +// generalized equivalence processing (left and right) +// basically, goes through every vertex in a class and checks if all successor or +// predecessor classes match in all vertices. if classes mismatch, a vertex is +// split into a separate class, along with all vertices having the same set of +// successor/predecessor classes. the opposite side (successors for left +// equivalence, predecessors for right equivalence) classes get revalidated in +// case of a split. +static void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue, - EquivalenceType eq_type) { - // now, go through the work queue until it's empty - map<flat_set<unsigned>, VertexInfoSet> tentative_classmap; - flat_set<unsigned> cur_classes; - // local work queue, to store classes we want to revalidate in case of split - WorkQueue reval_queue(work_queue.capacity()); - - while (!work_queue.empty()) { - // dequeue our class from the work queue - unsigned cur_class = work_queue.pop(); - - // get all vertices in current equivalence class + EquivalenceType eq_type) { + // now, go through the work queue until it's empty + map<flat_set<unsigned>, VertexInfoSet> tentative_classmap; + flat_set<unsigned> cur_classes; + // local work queue, to store classes we want to revalidate in case of split + WorkQueue reval_queue(work_queue.capacity()); + + while (!work_queue.empty()) { + // dequeue our class from the work queue + unsigned cur_class = work_queue.pop(); + + // get all vertices in current equivalence class VertexInfoSet &cur_class_vertices = classes.at(cur_class); - - if (cur_class_vertices.size() < 2) { - continue; - } - - // clear data from previous iterations - tentative_classmap.clear(); - - DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n", - cur_class, cur_class_vertices.size()); - - // go through vertices in this class - for (VertexInfo *vi : cur_class_vertices) { - cur_classes.clear(); - - // get vertex lists for equivalence vertices and vertices for - // revalidation in case of split - const auto &eq_vertices = - (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ; - const auto &reval_vertices = - (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred; - - // go through equivalence and note the classes - for (const VertexInfo *tmp : eq_vertices) { - cur_classes.insert(tmp->equivalence_class); - } - - // note all the classes that need to be reevaluated - for (const VertexInfo *tmp : reval_vertices) { - reval_queue.push(tmp->equivalence_class); - } - - VertexInfoSet &tentative_classes = tentative_classmap[cur_classes]; - tentative_classes.insert(vi); - } - - // if we found more than one class, split and revalidate everything - if (tentative_classmap.size() > 1) { - auto tmi = tentative_classmap.begin(); - - // start from the second class - for (++tmi; tmi != tentative_classmap.end(); ++tmi) { - const VertexInfoSet &vertices_to_split = tmi->second; + + if (cur_class_vertices.size() < 2) { + continue; + } + + // clear data from previous iterations + tentative_classmap.clear(); + + DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n", + cur_class, cur_class_vertices.size()); + + // go through vertices in this class + for (VertexInfo *vi : cur_class_vertices) { + cur_classes.clear(); + + // get vertex lists for equivalence vertices and vertices for + // revalidation in case of split + const auto &eq_vertices = + (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ; + const auto &reval_vertices = + (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred; + + // go through equivalence and note the classes + for (const VertexInfo *tmp : eq_vertices) { + cur_classes.insert(tmp->equivalence_class); + } + + // note all the classes that need to be reevaluated + for (const VertexInfo *tmp : reval_vertices) { + reval_queue.push(tmp->equivalence_class); + } + + VertexInfoSet &tentative_classes = tentative_classmap[cur_classes]; + tentative_classes.insert(vi); + } + + // if we found more than one class, split and revalidate everything + if (tentative_classmap.size() > 1) { + auto tmi = tentative_classmap.begin(); + + // start from the second class + for (++tmi; tmi != tentative_classmap.end(); ++tmi) { + const VertexInfoSet &vertices_to_split = tmi->second; unsigned new_class = classes.size(); VertexInfoSet new_class_vertices; - - for (VertexInfo *vi : vertices_to_split) { - vi->equivalence_class = new_class; + + for (VertexInfo *vi : vertices_to_split) { + vi->equivalence_class = new_class; // note: we cannot use the cur_class_vertices ref, as it is // invalidated by modifications to the classes vector. classes[cur_class].erase(vi); - new_class_vertices.insert(vi); - } + new_class_vertices.insert(vi); + } classes.push_back(move(new_class_vertices)); if (contains(tmi->first, cur_class)) { - reval_queue.push(new_class); - } - } - work_queue.append(reval_queue); - } - reval_queue.clear(); - } -} - -static -bool require_separate_eod_vertex(const VertexInfoSet &vert_infos, - const NGHolder &g) { - /* We require separate eod and normal accept vertices for a class if we have - * both normal accepts and eod accepts AND the reports are different for eod - * and non-eod reports. */ - - flat_set<ReportID> non_eod; - flat_set<ReportID> eod; - - for (const VertexInfo *vi : vert_infos) { - NFAVertex v = vi->v; - - if (edge(v, g.accept, g).second) { - insert(&non_eod, g[v].reports); - } - - if (edge(v, g.acceptEod, g).second) { - insert(&eod, g[v].reports); - } - } - - if (non_eod.empty() || eod.empty()) { - return false; - } - - return non_eod != eod; - -} - -static + reval_queue.push(new_class); + } + } + work_queue.append(reval_queue); + } + reval_queue.clear(); + } +} + +static +bool require_separate_eod_vertex(const VertexInfoSet &vert_infos, + const NGHolder &g) { + /* We require separate eod and normal accept vertices for a class if we have + * both normal accepts and eod accepts AND the reports are different for eod + * and non-eod reports. */ + + flat_set<ReportID> non_eod; + flat_set<ReportID> eod; + + for (const VertexInfo *vi : vert_infos) { + NFAVertex v = vi->v; + + if (edge(v, g.accept, g).second) { + insert(&non_eod, g[v].reports); + } + + if (edge(v, g.acceptEod, g).second) { + insert(&eod, g[v].reports); + } + } + + if (non_eod.empty() || eod.empty()) { + return false; + } + + return non_eod != eod; + +} + +static void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, unsigned eq_class, VertexInfoSet &cur_class_vertices, set<NFAVertex> *toRemove) { - DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a " - "single vertex.\n", cur_class_vertices.size(), eq_class); - - // replace equivalence class with a single vertex: - // 1. create new vertex with matching properties - // 2. wire all predecessors to new vertex - // 2a. update info for new vertex with new predecessors - // 2b. update each predecessor's successor list - // 3. wire all successors to new vertex - // 3a. update info for new vertex with new successors - // 3b. update each successor's predecessor list - // 4. remove old vertex - - // any differences between vertex properties were resolved during - // initial partitioning, so we assume that every vertex in equivalence - // class has the same CharReach et al. - // so, we find the first vertex in our class and get all its properties - - /* For left equivalence, if the members have different reporting behaviour - * we sometimes require two vertices to be created (one connected to accept - * and one to accepteod) */ - - NFAVertex old_v = (*cur_class_vertices.begin())->v; - NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same - * props */ - g[new_v].reports.clear(); /* populated as we pull in succs */ - - // store this vertex in our global vertex list + DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a " + "single vertex.\n", cur_class_vertices.size(), eq_class); + + // replace equivalence class with a single vertex: + // 1. create new vertex with matching properties + // 2. wire all predecessors to new vertex + // 2a. update info for new vertex with new predecessors + // 2b. update each predecessor's successor list + // 3. wire all successors to new vertex + // 3a. update info for new vertex with new successors + // 3b. update each successor's predecessor list + // 4. remove old vertex + + // any differences between vertex properties were resolved during + // initial partitioning, so we assume that every vertex in equivalence + // class has the same CharReach et al. + // so, we find the first vertex in our class and get all its properties + + /* For left equivalence, if the members have different reporting behaviour + * we sometimes require two vertices to be created (one connected to accept + * and one to accepteod) */ + + NFAVertex old_v = (*cur_class_vertices.begin())->v; + NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same + * props */ + g[new_v].reports.clear(); /* populated as we pull in succs */ + + // store this vertex in our global vertex list infos.push_back(std::make_unique<VertexInfo>(new_v, g)); VertexInfo *new_vertex_info = infos.back().get(); - - NFAVertex new_v_eod = NGHolder::null_vertex(); - VertexInfo *new_vertex_info_eod = nullptr; - - if (require_separate_eod_vertex(cur_class_vertices, g)) { - new_v_eod = clone_vertex(g, old_v); - g[new_v_eod].reports.clear(); + + NFAVertex new_v_eod = NGHolder::null_vertex(); + VertexInfo *new_vertex_info_eod = nullptr; + + if (require_separate_eod_vertex(cur_class_vertices, g)) { + new_v_eod = clone_vertex(g, old_v); + g[new_v_eod].reports.clear(); infos.push_back(std::make_unique<VertexInfo>(new_v_eod, g)); new_vertex_info_eod = infos.back().get(); - } - + } + const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; - for (VertexInfo *old_vertex_info : cur_class_vertices) { - assert(old_vertex_info->equivalence_class == eq_class); - - // mark this vertex for removal - toRemove->insert(old_vertex_info->v); - - // for each predecessor, add edge to new vertex and update info - for (VertexInfo *pred_info : old_vertex_info->pred) { - // update info for new vertex - new_vertex_info->pred.insert(pred_info); - if (new_vertex_info_eod) { - new_vertex_info_eod->pred.insert(pred_info); - } - - // update info for predecessor - pred_info->succ.erase(old_vertex_info); - - // if edge doesn't exist, create it + for (VertexInfo *old_vertex_info : cur_class_vertices) { + assert(old_vertex_info->equivalence_class == eq_class); + + // mark this vertex for removal + toRemove->insert(old_vertex_info->v); + + // for each predecessor, add edge to new vertex and update info + for (VertexInfo *pred_info : old_vertex_info->pred) { + // update info for new vertex + new_vertex_info->pred.insert(pred_info); + if (new_vertex_info_eod) { + new_vertex_info_eod->pred.insert(pred_info); + } + + // update info for predecessor + pred_info->succ.erase(old_vertex_info); + + // if edge doesn't exist, create it NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); - + // put edge tops, if applicable if (!edgetops.empty()) { assert(g[e].tops.empty() || g[e].tops == edgetops); g[e].tops = edgetops; - } - - pred_info->succ.insert(new_vertex_info); - - if (new_v_eod) { - NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, + } + + pred_info->succ.insert(new_vertex_info); + + if (new_v_eod) { + NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, g); - + // put edge tops, if applicable if (!edgetops.empty()) { assert(g[e].tops.empty() || g[e].tops == edgetops); g[ee].tops = edgetops; - } - - pred_info->succ.insert(new_vertex_info_eod); - } - } - - // for each successor, add edge from new vertex and update info - for (VertexInfo *succ_info : old_vertex_info->succ) { - NFAVertex succ_v = succ_info->v; - - // update info for successor - succ_info->pred.erase(old_vertex_info); - - if (new_v_eod && succ_v == g.acceptEod) { - // update info for new vertex - new_vertex_info_eod->succ.insert(succ_info); - insert(&g[new_v_eod].reports, - g[old_vertex_info->v].reports); - - add_edge_if_not_present(new_v_eod, succ_v, g); - succ_info->pred.insert(new_vertex_info_eod); - } else { - // update info for new vertex - new_vertex_info->succ.insert(succ_info); - - // if edge doesn't exist, create it - add_edge_if_not_present(new_v, succ_v, g); - succ_info->pred.insert(new_vertex_info); - - if (is_any_accept(succ_v, g)) { - insert(&g[new_v].reports, - g[old_vertex_info->v].reports); - } - } - } - } - - // update classmap - new_vertex_info->equivalence_class = eq_class; - cur_class_vertices.insert(new_vertex_info); -} - -// walk through vertices of an equivalence class and replace them with a single -// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the -// report behaviour with a single vertex). -static + } + + pred_info->succ.insert(new_vertex_info_eod); + } + } + + // for each successor, add edge from new vertex and update info + for (VertexInfo *succ_info : old_vertex_info->succ) { + NFAVertex succ_v = succ_info->v; + + // update info for successor + succ_info->pred.erase(old_vertex_info); + + if (new_v_eod && succ_v == g.acceptEod) { + // update info for new vertex + new_vertex_info_eod->succ.insert(succ_info); + insert(&g[new_v_eod].reports, + g[old_vertex_info->v].reports); + + add_edge_if_not_present(new_v_eod, succ_v, g); + succ_info->pred.insert(new_vertex_info_eod); + } else { + // update info for new vertex + new_vertex_info->succ.insert(succ_info); + + // if edge doesn't exist, create it + add_edge_if_not_present(new_v, succ_v, g); + succ_info->pred.insert(new_vertex_info); + + if (is_any_accept(succ_v, g)) { + insert(&g[new_v].reports, + g[old_vertex_info->v].reports); + } + } + } + } + + // update classmap + new_vertex_info->equivalence_class = eq_class; + cur_class_vertices.insert(new_vertex_info); +} + +// walk through vertices of an equivalence class and replace them with a single +// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the +// report behaviour with a single vertex). +static bool mergeEquivalentClasses(vector<VertexInfoSet> &classes, vector<unique_ptr<VertexInfo>> &infos, - NGHolder &g) { - bool merged = false; - set<NFAVertex> toRemove; - - // go through all classes and merge classes with more than one vertex + NGHolder &g) { + bool merged = false; + set<NFAVertex> toRemove; + + // go through all classes and merge classes with more than one vertex for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) { - // get all vertices in current equivalence class + // get all vertices in current equivalence class VertexInfoSet &cur_class_vertices = classes[eq_class]; - - // we don't care for single-vertex classes - if (cur_class_vertices.size() > 1) { - merged = true; - mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove); - } - } - - // remove all dead vertices - DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size()); - remove_vertices(toRemove, g); - - return merged; -} - + + // we don't care for single-vertex classes + if (cur_class_vertices.size() > 1) { + merged = true; + mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove); + } + } + + // remove all dead vertices + DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size()); + remove_vertices(toRemove, g); + + return merged; +} + static bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { // create a list of equivalence classes to check @@ -657,26 +657,26 @@ bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { return mergeEquivalentClasses(classes, infos, g); } -bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { - if (!cc.grey.equivalenceEnable) { - DEBUG_PRINTF("equivalence processing disabled in grey box\n"); - return false; - } +bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { + if (!cc.grey.equivalenceEnable) { + DEBUG_PRINTF("equivalence processing disabled in grey box\n"); + return false; + } renumber_vertices(g); - - // Cheap check: if all the non-special vertices have in-degree one and - // out-degree one, there's no redundancy in this here graph and we can - // vamoose. - if (isIrreducible(g)) { - DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n"); - return false; - } - - // take note if we have merged any vertices - bool merge = false; + + // Cheap check: if all the non-special vertices have in-degree one and + // out-degree one, there's no redundancy in this here graph and we can + // vamoose. + if (isIrreducible(g)) { + DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n"); + return false; + } + + // take note if we have merged any vertices + bool merge = false; merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE); merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE); - return merge; -} - -} // namespace ue2 + return merge; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h index ef8f92e7e3..d716841e94 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h @@ -1,47 +1,47 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Equivalence class graph reduction pass. - */ - -#ifndef NG_EQUIVALENCE_H_ -#define NG_EQUIVALENCE_H_ - -namespace ue2 { - -class NGHolder; -struct CompileContext; - -/** Attempt to make the NFA graph \p g smaller by performing a number of local - * transformations. */ -bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc); - -} // namespace ue2 - -#endif /* NG_EQUIVALENCE_H_ */ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Equivalence class graph reduction pass. + */ + +#ifndef NG_EQUIVALENCE_H_ +#define NG_EQUIVALENCE_H_ + +namespace ue2 { + +class NGHolder; +struct CompileContext; + +/** Attempt to make the NFA graph \p g smaller by performing a number of local + * transformations. */ +bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc); + +} // namespace ue2 + +#endif /* NG_EQUIVALENCE_H_ */ diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp index 9d90489471..9ef0f01ce7 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp @@ -1,328 +1,328 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Execute an NFA over a given input, returning the set of states that - * are active afterwards. - * - * Note: although our external interfaces for execute_graph() use std::set, we - * use a dynamic bitset containing the vertex indices internally for - * performance. - */ -#include "ng_execute.h" - -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" -#include "util/ue2string.h" - -#include <sstream> -#include <string> - -#include <boost/dynamic_bitset.hpp> -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/reverse_graph.hpp> - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -struct StateInfo { - StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Execute an NFA over a given input, returning the set of states that + * are active afterwards. + * + * Note: although our external interfaces for execute_graph() use std::set, we + * use a dynamic bitset containing the vertex indices internally for + * performance. + */ +#include "ng_execute.h" + +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" +#include "util/ue2string.h" + +#include <sstream> +#include <string> + +#include <boost/dynamic_bitset.hpp> +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/reverse_graph.hpp> + +using namespace std; +using boost::dynamic_bitset; + +namespace ue2 { + +struct StateInfo { + StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} StateInfo() : vertex(NGHolder::null_vertex()) {} - NFAVertex vertex; - CharReach reach; -}; - -#ifdef DEBUG -static -std::string dumpStates(const dynamic_bitset<> &s) { - std::ostringstream oss; - for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) { - oss << i << " "; - } - return oss.str(); -} -#endif - -static -void step(const NGHolder &g, const vector<StateInfo> &info, - const dynamic_bitset<> &in, dynamic_bitset<> *out) { - out->reset(); - for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { - NFAVertex u = info[i].vertex; - for (auto v : adjacent_vertices_range(u, g)) { - out->set(g[v].index); - } - } -} - -static -void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states, - const CharReach &cr) { - for (size_t i = states->find_first(); i != states->npos; - i = states->find_next(i)) { - if ((info[i].reach & cr).none()) { - states->reset(i); - } - } -} - -template<typename inputT> -static -void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info, - const inputT &input, dynamic_bitset<> *states, - bool kill_sds) { - dynamic_bitset<> &curr = *states; - dynamic_bitset<> next(curr.size()); - DEBUG_PRINTF("%zu states in\n", states->count()); - - for (const auto &e : input) { - DEBUG_PRINTF("processing %s\n", describeClass(e).c_str()); - step(g, info, curr, &next); - if (kill_sds) { - next.reset(NODE_START_DOTSTAR); - } - filter_by_reach(info, &next, e); - next.swap(curr); - - if (curr.empty()) { - DEBUG_PRINTF("went dead\n"); - break; - } - } - - DEBUG_PRINTF("%zu states out\n", states->size()); -} - -static -dynamic_bitset<> makeStateBitset(const NGHolder &g, - const flat_set<NFAVertex> &in) { - dynamic_bitset<> work_states(num_vertices(g)); - for (const auto &v : in) { - u32 idx = g[v].index; - work_states.set(idx); - } - return work_states; -} - -static -flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in, - const vector<StateInfo> &info) { - flat_set<NFAVertex> out; - for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { - out.insert(info[i].vertex); - } - return out; -} - -static -vector<StateInfo> makeInfoTable(const NGHolder &g) { - vector<StateInfo> info(num_vertices(g)); - for (auto v : vertices_range(g)) { - u32 idx = g[v].index; - const CharReach &cr = g[v].char_reach; - assert(idx < info.size()); - info[idx] = StateInfo(v, cr); - } - return info; -} - -flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, - const flat_set<NFAVertex> &initial_states, - bool kill_sds) { - assert(hasCorrectlyNumberedVertices(g)); - - auto info = makeInfoTable(g); - auto work_states = makeStateBitset(g, initial_states); - - execute_graph_i(g, info, input, &work_states, kill_sds); - - return getVertices(work_states, info); -} - -flat_set<NFAVertex> execute_graph(const NGHolder &g, - const vector<CharReach> &input, - const flat_set<NFAVertex> &initial_states) { - assert(hasCorrectlyNumberedVertices(g)); - - auto info = makeInfoTable(g); - auto work_states = makeStateBitset(g, initial_states); - - execute_graph_i(g, info, input, &work_states, false); - - return getVertices(work_states, info); -} - -namespace { -class eg_visitor : public boost::default_dfs_visitor { -public: - eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in, - const NGHolder &input_g_in, - map<NFAVertex, dynamic_bitset<> > &states_in) - : vertex_count(num_vertices(running_g_in)), running_g(running_g_in), - info(info_in), input_g(input_g_in), states(states_in), - succs(vertex_count) {} - + NFAVertex vertex; + CharReach reach; +}; + +#ifdef DEBUG +static +std::string dumpStates(const dynamic_bitset<> &s) { + std::ostringstream oss; + for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) { + oss << i << " "; + } + return oss.str(); +} +#endif + +static +void step(const NGHolder &g, const vector<StateInfo> &info, + const dynamic_bitset<> &in, dynamic_bitset<> *out) { + out->reset(); + for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { + NFAVertex u = info[i].vertex; + for (auto v : adjacent_vertices_range(u, g)) { + out->set(g[v].index); + } + } +} + +static +void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states, + const CharReach &cr) { + for (size_t i = states->find_first(); i != states->npos; + i = states->find_next(i)) { + if ((info[i].reach & cr).none()) { + states->reset(i); + } + } +} + +template<typename inputT> +static +void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info, + const inputT &input, dynamic_bitset<> *states, + bool kill_sds) { + dynamic_bitset<> &curr = *states; + dynamic_bitset<> next(curr.size()); + DEBUG_PRINTF("%zu states in\n", states->count()); + + for (const auto &e : input) { + DEBUG_PRINTF("processing %s\n", describeClass(e).c_str()); + step(g, info, curr, &next); + if (kill_sds) { + next.reset(NODE_START_DOTSTAR); + } + filter_by_reach(info, &next, e); + next.swap(curr); + + if (curr.empty()) { + DEBUG_PRINTF("went dead\n"); + break; + } + } + + DEBUG_PRINTF("%zu states out\n", states->size()); +} + +static +dynamic_bitset<> makeStateBitset(const NGHolder &g, + const flat_set<NFAVertex> &in) { + dynamic_bitset<> work_states(num_vertices(g)); + for (const auto &v : in) { + u32 idx = g[v].index; + work_states.set(idx); + } + return work_states; +} + +static +flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in, + const vector<StateInfo> &info) { + flat_set<NFAVertex> out; + for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { + out.insert(info[i].vertex); + } + return out; +} + +static +vector<StateInfo> makeInfoTable(const NGHolder &g) { + vector<StateInfo> info(num_vertices(g)); + for (auto v : vertices_range(g)) { + u32 idx = g[v].index; + const CharReach &cr = g[v].char_reach; + assert(idx < info.size()); + info[idx] = StateInfo(v, cr); + } + return info; +} + +flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set<NFAVertex> &initial_states, + bool kill_sds) { + assert(hasCorrectlyNumberedVertices(g)); + + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); + + execute_graph_i(g, info, input, &work_states, kill_sds); + + return getVertices(work_states, info); +} + +flat_set<NFAVertex> execute_graph(const NGHolder &g, + const vector<CharReach> &input, + const flat_set<NFAVertex> &initial_states) { + assert(hasCorrectlyNumberedVertices(g)); + + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); + + execute_graph_i(g, info, input, &work_states, false); + + return getVertices(work_states, info); +} + +namespace { +class eg_visitor : public boost::default_dfs_visitor { +public: + eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in, + const NGHolder &input_g_in, + map<NFAVertex, dynamic_bitset<> > &states_in) + : vertex_count(num_vertices(running_g_in)), running_g(running_g_in), + info(info_in), input_g(input_g_in), states(states_in), + succs(vertex_count) {} + void finish_vertex(NFAVertex input_v, const boost::reverse_graph<NGHolder, const NGHolder &> &) { - if (input_v == input_g.accept) { - return; - } - assert(input_v != input_g.acceptEod); - + if (input_v == input_g.accept) { + return; + } + assert(input_v != input_g.acceptEod); + DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index); - - /* finish vertex is called on vertex --> implies that all its parents - * (in the forward graph) are also finished. Our parents will have - * pushed all of their successors for us into our stateset. */ - states[input_v].resize(vertex_count); - dynamic_bitset<> our_states = states[input_v]; - states[input_v].reset(); - - filter_by_reach(info, &our_states, - input_g[input_v].char_reach); - - if (input_v != input_g.startDs && - edge(input_v, input_v, input_g).second) { - bool changed; - do { - DEBUG_PRINTF("actually not finished -> have self loop\n"); - succs.reset(); - step(running_g, info, our_states, &succs); - filter_by_reach(info, &succs, - input_g[input_v].char_reach); - dynamic_bitset<> our_states2 = our_states | succs; - changed = our_states2 != our_states; - our_states.swap(our_states2); - } while (changed); - } - - DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str()); - - succs.reset(); - step(running_g, info, our_states, &succs); - - /* we need to push into all our (forward) children their successors - * from us. */ - for (auto v : adjacent_vertices_range(input_v, input_g)) { + + /* finish vertex is called on vertex --> implies that all its parents + * (in the forward graph) are also finished. Our parents will have + * pushed all of their successors for us into our stateset. */ + states[input_v].resize(vertex_count); + dynamic_bitset<> our_states = states[input_v]; + states[input_v].reset(); + + filter_by_reach(info, &our_states, + input_g[input_v].char_reach); + + if (input_v != input_g.startDs && + edge(input_v, input_v, input_g).second) { + bool changed; + do { + DEBUG_PRINTF("actually not finished -> have self loop\n"); + succs.reset(); + step(running_g, info, our_states, &succs); + filter_by_reach(info, &succs, + input_g[input_v].char_reach); + dynamic_bitset<> our_states2 = our_states | succs; + changed = our_states2 != our_states; + our_states.swap(our_states2); + } while (changed); + } + + DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str()); + + succs.reset(); + step(running_g, info, our_states, &succs); + + /* we need to push into all our (forward) children their successors + * from us. */ + for (auto v : adjacent_vertices_range(input_v, input_g)) { DEBUG_PRINTF("pushing our states to pstate %zu\n", - input_g[v].index); - if (v == input_g.startDs) { - /* no need for intra start edges */ - continue; - } - - states[v].resize(vertex_count); // May not yet exist - - if (v != input_g.accept) { - states[v] |= succs; - } else { - /* accept is a magical pseudo state which does not consume - * characters and we are using to collect the output states. We - * must fill it with our states rather than our succs. */ - DEBUG_PRINTF("prev outputted rstates: %s\n", - dumpStates(states[v]).c_str()); - DEBUG_PRINTF("outputted rstates: %s\n", - dumpStates(our_states).c_str()); - - states[v] |= our_states; - - DEBUG_PRINTF("new outputted rstates: %s\n", - dumpStates(states[v]).c_str()); - } - } - - /* note: the states at this vertex are no longer required */ - } - -private: - const size_t vertex_count; - const NGHolder &running_g; - const vector<StateInfo> &info; - const NGHolder &input_g; - map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of - states in running_g */ - dynamic_bitset<> succs; // temp use internally -}; -} // namespace - -flat_set<NFAVertex> execute_graph(const NGHolder &running_g, - const NGHolder &input_dag, - const flat_set<NFAVertex> &input_start_states, - const flat_set<NFAVertex> &initial_states) { - DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n", - num_vertices(running_g), num_vertices(input_dag)); - assert(hasCorrectlyNumberedVertices(running_g)); - assert(in_degree(input_dag.acceptEod, input_dag) == 1); - - map<NFAVertex, boost::default_color_type> colours; - /* could just a topo order, but really it is time to pull a slightly bigger - * gun: DFS */ + input_g[v].index); + if (v == input_g.startDs) { + /* no need for intra start edges */ + continue; + } + + states[v].resize(vertex_count); // May not yet exist + + if (v != input_g.accept) { + states[v] |= succs; + } else { + /* accept is a magical pseudo state which does not consume + * characters and we are using to collect the output states. We + * must fill it with our states rather than our succs. */ + DEBUG_PRINTF("prev outputted rstates: %s\n", + dumpStates(states[v]).c_str()); + DEBUG_PRINTF("outputted rstates: %s\n", + dumpStates(our_states).c_str()); + + states[v] |= our_states; + + DEBUG_PRINTF("new outputted rstates: %s\n", + dumpStates(states[v]).c_str()); + } + } + + /* note: the states at this vertex are no longer required */ + } + +private: + const size_t vertex_count; + const NGHolder &running_g; + const vector<StateInfo> &info; + const NGHolder &input_g; + map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of + states in running_g */ + dynamic_bitset<> succs; // temp use internally +}; +} // namespace + +flat_set<NFAVertex> execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set<NFAVertex> &input_start_states, + const flat_set<NFAVertex> &initial_states) { + DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n", + num_vertices(running_g), num_vertices(input_dag)); + assert(hasCorrectlyNumberedVertices(running_g)); + assert(in_degree(input_dag.acceptEod, input_dag) == 1); + + map<NFAVertex, boost::default_color_type> colours; + /* could just a topo order, but really it is time to pull a slightly bigger + * gun: DFS */ boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag); - map<NFAVertex, dynamic_bitset<> > dfs_states; - - auto info = makeInfoTable(running_g); - auto input_fs = makeStateBitset(running_g, initial_states); - - for (auto v : input_start_states) { - dfs_states[v] = input_fs; - } - - depth_first_visit(revg, input_dag.accept, - eg_visitor(running_g, info, input_dag, dfs_states), - make_assoc_property_map(colours)); - - auto states = getVertices(dfs_states[input_dag.accept], info); - -#ifdef DEBUG - DEBUG_PRINTF(" output rstates:"); - for (const auto &v : states) { + map<NFAVertex, dynamic_bitset<> > dfs_states; + + auto info = makeInfoTable(running_g); + auto input_fs = makeStateBitset(running_g, initial_states); + + for (auto v : input_start_states) { + dfs_states[v] = input_fs; + } + + depth_first_visit(revg, input_dag.accept, + eg_visitor(running_g, info, input_dag, dfs_states), + make_assoc_property_map(colours)); + + auto states = getVertices(dfs_states[input_dag.accept], info); + +#ifdef DEBUG + DEBUG_PRINTF(" output rstates:"); + for (const auto &v : states) { printf(" %zu", running_g[v].index); - } - printf("\n"); -#endif - - return states; -} - -flat_set<NFAVertex> execute_graph(const NGHolder &running_g, - const NGHolder &input_dag, - const flat_set<NFAVertex> &initial_states) { - auto input_start_states = {input_dag.start, input_dag.startDs}; - return execute_graph(running_g, input_dag, input_start_states, - initial_states); -} - + } + printf("\n"); +#endif + + return states; +} + +flat_set<NFAVertex> execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set<NFAVertex> &initial_states) { + auto input_start_states = {input_dag.start, input_dag.startDs}; + return execute_graph(running_g, input_dag, input_start_states, + initial_states); +} + static bool can_die_early(const NGHolder &g, const vector<StateInfo> &info, const dynamic_bitset<> &s, @@ -368,4 +368,4 @@ bool can_die_early(const NGHolder &g, u32 age_limit) { age_limit); } -} // namespace ue2 +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h index 32f5520d33..17625b2aa3 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h @@ -1,72 +1,72 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Execute an NFA over a given input, returning the set of states that - * are active afterwards. - */ - -#ifndef NG_EXECUTE_H -#define NG_EXECUTE_H - -#include "ng_holder.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Execute an NFA over a given input, returning the set of states that + * are active afterwards. + */ + +#ifndef NG_EXECUTE_H +#define NG_EXECUTE_H + +#include "ng_holder.h" #include "util/flat_containers.h" - -#include <vector> - -namespace ue2 { - -class CharReach; -struct ue2_literal; - -flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, - const flat_set<NFAVertex> &initial, - bool kill_sds = false); - -flat_set<NFAVertex> execute_graph(const NGHolder &g, - const std::vector<CharReach> &input, - const flat_set<NFAVertex> &initial); - -/** on exit, states contains any state which may still be enabled after - * receiving an input which corresponds to some path through the input_dag from - * start or startDs to accept. input_dag MUST be acyclic aside from self-loops. - */ -flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag, - const flat_set<NFAVertex> &initial); - -/* as above, but able to specify the source states for the input graph */ -flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag, - const flat_set<NFAVertex> &input_start_states, - const flat_set<NFAVertex> &initial); - + +#include <vector> + +namespace ue2 { + +class CharReach; +struct ue2_literal; + +flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set<NFAVertex> &initial, + bool kill_sds = false); + +flat_set<NFAVertex> execute_graph(const NGHolder &g, + const std::vector<CharReach> &input, + const flat_set<NFAVertex> &initial); + +/** on exit, states contains any state which may still be enabled after + * receiving an input which corresponds to some path through the input_dag from + * start or startDs to accept. input_dag MUST be acyclic aside from self-loops. + */ +flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set<NFAVertex> &initial); + +/* as above, but able to specify the source states for the input graph */ +flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set<NFAVertex> &input_start_states, + const flat_set<NFAVertex> &initial); + /* returns true if it is possible for the nfa to die within age_limit bytes */ bool can_die_early(const NGHolder &g, u32 age_limit); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp index f8abbd04a2..378c22bf82 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp @@ -1,102 +1,102 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file * \brief Code for discovering properties of an NFA graph used by * hs_expression_info(). - */ -#include "ng_expr_info.h" - -#include "hs_internal.h" -#include "ng.h" -#include "ng_asserts.h" -#include "ng_depth.h" -#include "ng_edge_redundancy.h" + */ +#include "ng_expr_info.h" + +#include "hs_internal.h" +#include "ng.h" +#include "ng_asserts.h" +#include "ng_depth.h" +#include "ng_edge_redundancy.h" #include "ng_extparam.h" #include "ng_fuzzy.h" -#include "ng_holder.h" +#include "ng_holder.h" #include "ng_prune.h" -#include "ng_reports.h" -#include "ng_util.h" -#include "ue2common.h" +#include "ng_reports.h" +#include "ng_util.h" +#include "ue2common.h" #include "compiler/expression_info.h" -#include "parser/position.h" // for POS flags -#include "util/boundary_reports.h" -#include "util/compile_context.h" -#include "util/depth.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/report_manager.h" - -#include <limits.h> -#include <set> - -using namespace std; - -namespace ue2 { - -/* get rid of leading \b and multiline ^ vertices */ -static +#include "parser/position.h" // for POS flags +#include "util/boundary_reports.h" +#include "util/compile_context.h" +#include "util/depth.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/report_manager.h" + +#include <limits.h> +#include <set> + +using namespace std; + +namespace ue2 { + +/* get rid of leading \b and multiline ^ vertices */ +static void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { - vector<NFAVertex> victims; - + vector<NFAVertex> victims; + for (auto v : adjacent_vertices_range(root, g)) { if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { - DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); - victims.push_back(v); - } - } - - for (auto u : victims) { + DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); + victims.push_back(v); + } + } + + for (auto u : victims) { for (auto v : adjacent_vertices_range(u, g)) { add_edge_if_not_present(root, v, g); - } - } - + } + } + remove_vertices(victims, g); -} - -static +} + +static void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, - const vector<DepthMinMax> &depths, DepthMinMax &info) { + const vector<DepthMinMax> &depths, DepthMinMax &info) { if (is_any_accept(v, g)) { - return; - } + return; + } if (is_any_start(v, g)) { info.min = depth(0); - info.max = max(info.max, depth(0)); - return; - } - + info.max = max(info.max, depth(0)); + return; + } + u32 idx = g[v].index; - assert(idx < depths.size()); - const DepthMinMax &d = depths.at(idx); - + assert(idx < depths.size()); + const DepthMinMax &d = depths.at(idx); + for (ReportID report_id : g[v].reports) { const Report &report = rm.getReport(report_id); assert(report.type == EXTERNAL_CALLBACK); @@ -126,24 +126,24 @@ void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, rd.str().c_str()); info = unionDepthMinMax(info, rd); - } -} - -static + } +} + +static bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { for (const auto &report_id : all_reports(g)) { - if (rm.getReport(report_id).offsetAdjust) { - return true; - } - } - return false; -} - + if (rm.getReport(report_id).offsetAdjust) { + return true; + } + } + return false; +} + void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, NGHolder &g, ExpressionInfo &expr, hs_expr_info *info) { - assert(info); - + assert(info); + // remove reports that aren't on vertices connected to accept. clearReports(g); @@ -154,16 +154,16 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, * match those in NG::addGraph(). */ - /* ensure utf8 starts at cp boundary */ + /* ensure utf8 starts at cp boundary */ ensureCodePointStart(rm, g, expr); - + if (can_never_match(g)) { throw CompileError(expr.index, "Pattern can never match."); } - + bool hamming = expr.hamm_distance > 0; u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; - + // validate graph's suitability for fuzzing validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); @@ -189,30 +189,30 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, auto depths = calcDepthsFrom(g, g.start); - DepthMinMax d; - + DepthMinMax d; + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { checkVertex(rm, g, u, depths, d); - } - + } + for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { checkVertex(rm, g, u, depths, d); - } - - if (d.max.is_finite()) { - info->max_width = d.max; - } else { - info->max_width = UINT_MAX; - } - if (d.min.is_finite()) { - info->min_width = d.min; - } else { - info->min_width = UINT_MAX; - } - + } + + if (d.max.is_finite()) { + info->max_width = d.max; + } else { + info->max_width = UINT_MAX; + } + if (d.min.is_finite()) { + info->min_width = d.min; + } else { + info->min_width = UINT_MAX; + } + info->unordered_matches = hasOffsetAdjust(rm, g); info->matches_at_eod = can_match_at_eod(g); info->matches_only_at_eod = can_only_match_at_eod(g); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h index f9bd680939..9500338f55 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h @@ -1,51 +1,51 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file * \brief Code for discovering properties of an expression used by - * hs_expression_info. - */ - -#ifndef NG_EXPR_INFO_H -#define NG_EXPR_INFO_H - -struct hs_expr_info; - -namespace ue2 { - + * hs_expression_info. + */ + +#ifndef NG_EXPR_INFO_H +#define NG_EXPR_INFO_H + +struct hs_expr_info; + +namespace ue2 { + class ExpressionInfo; class NGHolder; -class ReportManager; +class ReportManager; struct CompileContext; - + void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, NGHolder &g, ExpressionInfo &expr, hs_expr_info *info); - -} // namespace ue2 - -#endif // NG_EXPR_INFO_H + +} // namespace ue2 + +#endif // NG_EXPR_INFO_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp index 6eb23113f3..cee47ffe70 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp @@ -1,74 +1,74 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Propagate extended parameters to vertex reports and reduce graph if - * possible. - * - * This code handles the propagation of the extension parameters specified by + * \brief Propagate extended parameters to vertex reports and reduce graph if + * possible. + * + * This code handles the propagation of the extension parameters specified by * the user with the \ref hs_expr_ext structure into the reports on the graph's - * vertices. - * - * There are also some analyses that prune edges that cannot contribute to a - * match given these constraints, or transform the graph in order to make a - * constraint implicit. - */ + * vertices. + * + * There are also some analyses that prune edges that cannot contribute to a + * match given these constraints, or transform the graph in order to make a + * constraint implicit. + */ #include "ng_extparam.h" -#include "ng.h" -#include "ng_depth.h" -#include "ng_dump.h" -#include "ng_prune.h" -#include "ng_reports.h" -#include "ng_som_util.h" -#include "ng_width.h" -#include "ng_util.h" -#include "ue2common.h" +#include "ng.h" +#include "ng_depth.h" +#include "ng_dump.h" +#include "ng_prune.h" +#include "ng_reports.h" +#include "ng_som_util.h" +#include "ng_width.h" +#include "ng_util.h" +#include "ue2common.h" #include "compiler/compiler.h" -#include "parser/position.h" -#include "util/compile_context.h" -#include "util/compile_error.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" - -#include <sstream> -#include <string> - -using namespace std; - -namespace ue2 { - -static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; -static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; - +#include "parser/position.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" + +#include <sstream> +#include <string> + +using namespace std; + +namespace ue2 { + +static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; +static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; + /** True if all the given reports have the same extparam bounds. */ template<typename Container> bool hasSameBounds(const Container &reports, const ReportManager &rm) { @@ -91,82 +91,82 @@ bool hasSameBounds(const Container &reports, const ReportManager &rm) { * \brief Find the (min, max) offset adjustment for the reports on a given * vertex. */ -static -pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm, - const NGHolder &g, NFAVertex v) { - s32 minAdj = 0, maxAdj = 0; - const auto &reports = g[v].reports; - for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) { - const Report &ir = rm.getReport(*ri); - if (ri == reports.begin()) { - minAdj = ir.offsetAdjust; - maxAdj = ir.offsetAdjust; - } else { - minAdj = min(minAdj, ir.offsetAdjust); - maxAdj = max(maxAdj, ir.offsetAdjust); - } - } - - return make_pair(minAdj, maxAdj); -} - -/** \brief Find the (min, max) length of any match for the given holder. */ -static -DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { - DepthMinMax match_depths; - - vector<DepthMinMax> depths = getDistancesFromSOM(g); - - pair<s32, s32> adj; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - u32 idx = g[v].index; - DepthMinMax d = depths[idx]; // copy - adj = getMinMaxOffsetAdjust(rm, g, v); - DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, - d.str().c_str(), adj.first, adj.second); - d.min += adj.first; - d.max += adj.second; - match_depths = unionDepthMinMax(match_depths, d); - } - - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - u32 idx = g[v].index; - DepthMinMax d = depths[idx]; // copy - adj = getMinMaxOffsetAdjust(rm, g, v); - DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, - d.str().c_str(), adj.first, adj.second); - d.min += adj.first; - d.max += adj.second; - match_depths = unionDepthMinMax(match_depths, d); - } - - DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str()); - - assert(match_depths.min.is_reachable()); - assert(match_depths.max.is_reachable()); - return match_depths; -} - +static +pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm, + const NGHolder &g, NFAVertex v) { + s32 minAdj = 0, maxAdj = 0; + const auto &reports = g[v].reports; + for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) { + const Report &ir = rm.getReport(*ri); + if (ri == reports.begin()) { + minAdj = ir.offsetAdjust; + maxAdj = ir.offsetAdjust; + } else { + minAdj = min(minAdj, ir.offsetAdjust); + maxAdj = max(maxAdj, ir.offsetAdjust); + } + } + + return make_pair(minAdj, maxAdj); +} + +/** \brief Find the (min, max) length of any match for the given holder. */ +static +DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { + DepthMinMax match_depths; + + vector<DepthMinMax> depths = getDistancesFromSOM(g); + + pair<s32, s32> adj; + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + u32 idx = g[v].index; + DepthMinMax d = depths[idx]; // copy + adj = getMinMaxOffsetAdjust(rm, g, v); + DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, + d.str().c_str(), adj.first, adj.second); + d.min += adj.first; + d.max += adj.second; + match_depths = unionDepthMinMax(match_depths, d); + } + + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + u32 idx = g[v].index; + DepthMinMax d = depths[idx]; // copy + adj = getMinMaxOffsetAdjust(rm, g, v); + DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, + d.str().c_str(), adj.first, adj.second); + d.min += adj.first; + d.max += adj.second; + match_depths = unionDepthMinMax(match_depths, d); + } + + DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str()); + + assert(match_depths.min.is_reachable()); + assert(match_depths.max.is_reachable()); + return match_depths; +} + template<typename Function> void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, Function func) { - for (auto v : inv_adjacent_vertices_range(accept, g)) { - if (v == g.accept) { + for (auto v : inv_adjacent_vertices_range(accept, g)) { + if (v == g.accept) { // Don't operate on accept: the accept->acceptEod edge is stylised. - assert(accept == g.acceptEod); + assert(accept == g.acceptEod); assert(g[v].reports.empty()); - continue; - } - + continue; + } + if (!seen.insert(v).second) { continue; // We have already processed v. - } - - auto &reports = g[v].reports; + } + + auto &reports = g[v].reports; if (reports.empty()) { continue; } @@ -177,7 +177,7 @@ void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, reports = std::move(new_reports); } } - + /** * Generic function for replacing all the reports in the graph. * @@ -190,7 +190,7 @@ void replaceReports(NGHolder &g, Function func) { replaceReports(g, g.accept, seen, func); replaceReports(g, g.acceptEod, seen, func); } - + /** \brief Replace the graph's reports with new reports that specify bounds. */ static void updateReportBounds(ReportManager &rm, NGHolder &g, @@ -199,9 +199,9 @@ void updateReportBounds(ReportManager &rm, NGHolder &g, replaceReports(g, [&](NFAVertex, ReportID id) { Report report = rm.getReport(id); // make a copy assert(!report.hasBounds()); - + // Note that we need to cope with offset adjustment here. - + report.minOffset = expr.min_offset - report.offsetAdjust; if (expr.max_offset == MAX_OFFSET) { report.maxOffset = MAX_OFFSET; @@ -209,30 +209,30 @@ void updateReportBounds(ReportManager &rm, NGHolder &g, report.maxOffset = expr.max_offset - report.offsetAdjust; } assert(report.maxOffset >= report.minOffset); - + report.minLength = expr.min_length; if (expr.min_length && !expr.som) { report.quashSom = true; - } - + } + DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " "min_length=%llu\n", id, report.minOffset, report.maxOffset, report.minLength); return rm.getInternalId(report); }); -} - -static -bool hasVirtualStarts(const NGHolder &g) { - for (auto v : adjacent_vertices_range(g.start, g)) { - if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { - return true; - } - } - return false; -} - +} + +static +bool hasVirtualStarts(const NGHolder &g) { + for (auto v : adjacent_vertices_range(g.start, g)) { + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { + return true; + } + } + return false; +} + /** Set the min_length param for all reports to zero. */ static void clearMinLengthParam(NGHolder &g, ReportManager &rm) { @@ -272,11 +272,11 @@ void clearOffsetParams(NGHolder &g, ReportManager &rm) { * can use that knowledge to anchor it which will limit its lifespan. Note that * we can't use this transformation if there's a min_length, as it's currently * handled using "sly SOM". - * - * Note that it is possible to handle graphs that have a combination of - * anchored and unanchored paths, but it's too tricky for the moment. - */ -static + * + * Note that it is possible to handle graphs that have a combination of + * anchored and unanchored paths, but it's too tricky for the moment. + */ +static bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { if (!isFloating(g)) { return false; @@ -303,99 +303,99 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { const depth minWidth = findMinWidth(g); const depth maxWidth = findMaxWidth(g); - assert(minWidth <= maxWidth); - assert(maxWidth.is_reachable()); - + assert(minWidth <= maxWidth); + assert(maxWidth.is_reachable()); + const auto &first_report = rm.getReport(*reports.begin()); const auto min_offset = first_report.minOffset; const auto max_offset = first_report.maxOffset; assert(max_offset < MAX_OFFSET); - DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", + DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", minWidth.str().c_str(), maxWidth.str().c_str(), min_offset, max_offset); - + if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { - return false; - } - + return false; + } + if (max_offset < minWidth) { - assert(0); - return false; - } - - // If the pattern has virtual starts, we probably don't want to touch it. - if (hasVirtualStarts(g)) { - DEBUG_PRINTF("virtual starts, bailing\n"); - return false; - } - - // Similarly, bail if the pattern is vacuous. TODO: this could be done, we - // would just need to be a little careful with reports. - if (isVacuous(g)) { - DEBUG_PRINTF("vacuous, bailing\n"); - return false; - } - - u32 min_bound, max_bound; - if (maxWidth.is_infinite()) { - min_bound = 0; + assert(0); + return false; + } + + // If the pattern has virtual starts, we probably don't want to touch it. + if (hasVirtualStarts(g)) { + DEBUG_PRINTF("virtual starts, bailing\n"); + return false; + } + + // Similarly, bail if the pattern is vacuous. TODO: this could be done, we + // would just need to be a little careful with reports. + if (isVacuous(g)) { + DEBUG_PRINTF("vacuous, bailing\n"); + return false; + } + + u32 min_bound, max_bound; + if (maxWidth.is_infinite()) { + min_bound = 0; max_bound = max_offset - minWidth; - } else { + } else { min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; max_bound = max_offset - minWidth; - } - - DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); - - vector<NFAVertex> initials; - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (v == g.startDs) { - continue; - } - initials.push_back(v); - } - if (initials.empty()) { - DEBUG_PRINTF("no initial vertices\n"); - return false; - } - - // Wire up 'min_offset' mandatory dots from anchored start. - NFAVertex u = g.start; - for (u32 i = 0; i < min_bound; i++) { - NFAVertex v = add_vertex(g); - g[v].char_reach.setall(); - add_edge(u, v, g); - u = v; - } - - NFAVertex head = u; - - // Wire up optional dots for (max_offset - min_offset). - for (u32 i = 0; i < max_bound - min_bound; i++) { - NFAVertex v = add_vertex(g); - g[v].char_reach.setall(); - if (head != u) { - add_edge(head, v, g); - } - add_edge(u, v, g); - u = v; - } - - // Remove edges from starts and wire both head and u to our initials. - for (auto v : initials) { - remove_edge(g.startDs, v, g); - remove_edge(g.start, v, g); - - if (head != u) { - add_edge(head, v, g); - } - add_edge(u, v, g); - } - + } + + DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); + + vector<NFAVertex> initials; + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (v == g.startDs) { + continue; + } + initials.push_back(v); + } + if (initials.empty()) { + DEBUG_PRINTF("no initial vertices\n"); + return false; + } + + // Wire up 'min_offset' mandatory dots from anchored start. + NFAVertex u = g.start; + for (u32 i = 0; i < min_bound; i++) { + NFAVertex v = add_vertex(g); + g[v].char_reach.setall(); + add_edge(u, v, g); + u = v; + } + + NFAVertex head = u; + + // Wire up optional dots for (max_offset - min_offset). + for (u32 i = 0; i < max_bound - min_bound; i++) { + NFAVertex v = add_vertex(g); + g[v].char_reach.setall(); + if (head != u) { + add_edge(head, v, g); + } + add_edge(u, v, g); + u = v; + } + + // Remove edges from starts and wire both head and u to our initials. + for (auto v : initials) { + remove_edge(g.startDs, v, g); + remove_edge(g.start, v, g); + + if (head != u) { + add_edge(head, v, g); + } + add_edge(u, v, g); + } + renumber_vertices(g); renumber_edges(g); - + if (minWidth == maxWidth) { // For a fixed width pattern, we can retire the offsets as // they are implicit in the graph now. @@ -403,68 +403,68 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { } clearReports(g); - return true; -} - -static -NFAVertex findSingleCyclic(const NGHolder &g) { + return true; +} + +static +NFAVertex findSingleCyclic(const NGHolder &g) { NFAVertex v = NGHolder::null_vertex(); - for (const auto &e : edges_range(g)) { - if (source(e, g) == target(e, g)) { - if (source(e, g) == g.startDs) { - continue; - } + for (const auto &e : edges_range(g)) { + if (source(e, g) == target(e, g)) { + if (source(e, g) == g.startDs) { + continue; + } if (v != NGHolder::null_vertex()) { - // More than one cyclic vertex. + // More than one cyclic vertex. return NGHolder::null_vertex(); - } - v = source(e, g); - } - } - + } + v = source(e, g); + } + } + if (v != NGHolder::null_vertex()) { DEBUG_PRINTF("cyclic is %zu\n", g[v].index); - assert(!is_special(v, g)); - } - return v; -} - -static + assert(!is_special(v, g)); + } + return v; +} + +static bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, - int *adjust) { - const auto &reports = all_reports(g); - if (reports.empty()) { - assert(0); - return false; - } - - int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust; - for (auto report : reports) { - const Report &ir = rm.getReport(report); - if (ir.offsetAdjust != offsetAdjust) { - DEBUG_PRINTF("different adjusts!\n"); - return false; - } - } - - *adjust = offsetAdjust; - return true; -} - + int *adjust) { + const auto &reports = all_reports(g); + if (reports.empty()) { + assert(0); + return false; + } + + int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust; + for (auto report : reports) { + const Report &ir = rm.getReport(report); + if (ir.offsetAdjust != offsetAdjust) { + DEBUG_PRINTF("different adjusts!\n"); + return false; + } + } + + *adjust = offsetAdjust; + return true; +} + /** * If the pattern has a min_length and is of "ratchet" form with one unbounded - * repeat, that repeat can become a bounded repeat. - * - * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - */ -static + * repeat, that repeat can become a bounded repeat. + * + * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ + */ +static bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { const auto &reports = all_reports(g); - + if (reports.empty()) { - return false; - } - + return false; + } + if (!hasSameBounds(reports, rm)) { DEBUG_PRINTF("mixed report bounds\n"); return false; @@ -475,249 +475,249 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { return false; } - // If the pattern has virtual starts, we probably don't want to touch it. - if (hasVirtualStarts(g)) { - DEBUG_PRINTF("virtual starts, bailing\n"); - return false; - } - - // The graph must contain a single cyclic vertex (other than startDs), and - // that vertex can have one pred and one successor. - NFAVertex cyclic = findSingleCyclic(g); + // If the pattern has virtual starts, we probably don't want to touch it. + if (hasVirtualStarts(g)) { + DEBUG_PRINTF("virtual starts, bailing\n"); + return false; + } + + // The graph must contain a single cyclic vertex (other than startDs), and + // that vertex can have one pred and one successor. + NFAVertex cyclic = findSingleCyclic(g); if (cyclic == NGHolder::null_vertex()) { - return false; - } - + return false; + } + NGHolder::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(g.start, g); - if (*ai == g.startDs) { - ++ai; - } - NFAVertex v = *ai; - if (++ai != ae) { - DEBUG_PRINTF("more than one initial vertex\n"); - return false; - } - - u32 width = 0; - - // Walk from the start vertex to the cyclic state and ensure we have a - // chain of vertices. - while (v != cyclic) { + tie(ai, ae) = adjacent_vertices(g.start, g); + if (*ai == g.startDs) { + ++ai; + } + NFAVertex v = *ai; + if (++ai != ae) { + DEBUG_PRINTF("more than one initial vertex\n"); + return false; + } + + u32 width = 0; + + // Walk from the start vertex to the cyclic state and ensure we have a + // chain of vertices. + while (v != cyclic) { DEBUG_PRINTF("vertex %zu\n", g[v].index); - width++; + width++; auto succ = succs(v, g); - if (contains(succ, cyclic)) { - if (succ.size() == 1) { - v = cyclic; - } else if (succ.size() == 2) { - // Cyclic and jump edge. - succ.erase(cyclic); - NFAVertex v2 = *succ.begin(); - if (!edge(cyclic, v2, g).second) { - DEBUG_PRINTF("bad form\n"); - return false; - } - v = cyclic; - } else { - DEBUG_PRINTF("bad form\n"); - return false; - } - } else { - if (succ.size() != 1) { - DEBUG_PRINTF("bad form\n"); - return false; - } - v = *succ.begin(); - } - } - - // Check the cyclic state is A-OK. - v = getSoleDestVertex(g, cyclic); + if (contains(succ, cyclic)) { + if (succ.size() == 1) { + v = cyclic; + } else if (succ.size() == 2) { + // Cyclic and jump edge. + succ.erase(cyclic); + NFAVertex v2 = *succ.begin(); + if (!edge(cyclic, v2, g).second) { + DEBUG_PRINTF("bad form\n"); + return false; + } + v = cyclic; + } else { + DEBUG_PRINTF("bad form\n"); + return false; + } + } else { + if (succ.size() != 1) { + DEBUG_PRINTF("bad form\n"); + return false; + } + v = *succ.begin(); + } + } + + // Check the cyclic state is A-OK. + v = getSoleDestVertex(g, cyclic); if (v == NGHolder::null_vertex()) { - DEBUG_PRINTF("cyclic has more than one successor\n"); - return false; - } - - // Walk from the cyclic state to an accept and ensure we have a chain of - // vertices. - while (!is_any_accept(v, g)) { + DEBUG_PRINTF("cyclic has more than one successor\n"); + return false; + } + + // Walk from the cyclic state to an accept and ensure we have a chain of + // vertices. + while (!is_any_accept(v, g)) { DEBUG_PRINTF("vertex %zu\n", g[v].index); - width++; + width++; auto succ = succs(v, g); - if (succ.size() != 1) { - DEBUG_PRINTF("bad form\n"); - return false; - } - v = *succ.begin(); - } - - int offsetAdjust = 0; - if (!hasOffsetAdjust(rm, g, &offsetAdjust)) { - return false; - } - DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); - width += offsetAdjust; - + if (succ.size() != 1) { + DEBUG_PRINTF("bad form\n"); + return false; + } + v = *succ.begin(); + } + + int offsetAdjust = 0; + if (!hasOffsetAdjust(rm, g, &offsetAdjust)) { + return false; + } + DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); + width += offsetAdjust; + DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, - g[cyclic].index); - + g[cyclic].index); + if (width >= min_length) { - DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", + DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", min_length, width); clearMinLengthParam(g, rm); - return true; - } - - vector<NFAVertex> preds; - vector<NFAEdge> dead; - for (auto u : inv_adjacent_vertices_range(cyclic, g)) { + return true; + } + + vector<NFAVertex> preds; + vector<NFAEdge> dead; + for (auto u : inv_adjacent_vertices_range(cyclic, g)) { DEBUG_PRINTF("pred %zu\n", g[u].index); - if (u == cyclic) { - continue; - } - preds.push_back(u); - - // We want to delete the out-edges of each predecessor, but need to - // make sure we don't delete the startDs self loop. - for (const auto &e : out_edges_range(u, g)) { - if (target(e, g) != g.startDs) { - dead.push_back(e); - } - } - } - - remove_edges(dead, g); - - assert(!preds.empty()); - - const CharReach &cr = g[cyclic].char_reach; - + if (u == cyclic) { + continue; + } + preds.push_back(u); + + // We want to delete the out-edges of each predecessor, but need to + // make sure we don't delete the startDs self loop. + for (const auto &e : out_edges_range(u, g)) { + if (target(e, g) != g.startDs) { + dead.push_back(e); + } + } + } + + remove_edges(dead, g); + + assert(!preds.empty()); + + const CharReach &cr = g[cyclic].char_reach; + for (u32 i = 0; i < min_length - width - 1; ++i) { - v = add_vertex(g); - g[v].char_reach = cr; - - for (auto u : preds) { - add_edge(u, v, g); - } - preds.clear(); - preds.push_back(v); - } - assert(!preds.empty()); - for (auto u : preds) { - add_edge(u, cyclic, g); - } - + v = add_vertex(g); + g[v].char_reach = cr; + + for (auto u : preds) { + add_edge(u, v, g); + } + preds.clear(); + preds.push_back(v); + } + assert(!preds.empty()); + for (auto u : preds) { + add_edge(u, cyclic, g); + } + renumber_vertices(g); renumber_edges(g); clearMinLengthParam(g, rm); - clearReports(g); - return true; -} - -static + clearReports(g); + return true; +} + +static bool hasExtParams(const ExpressionInfo &expr) { if (expr.min_length != 0) { - return true; - } + return true; + } if (expr.min_offset != 0) { - return true; - } + return true; + } if (expr.max_offset != MAX_OFFSET) { - return true; - } - return false; -} - -static -const depth& maxDistToAccept(const NFAVertexBidiDepth &d) { - if (d.toAccept.max.is_unreachable()) { - return d.toAcceptEod.max; - } else if (d.toAcceptEod.max.is_unreachable()) { - return d.toAccept.max; - } - return max(d.toAccept.max, d.toAcceptEod.max); -} - -static -const depth& minDistFromStart(const NFAVertexBidiDepth &d) { - return min(d.fromStartDotStar.min, d.fromStart.min); -} - -static -const depth& minDistToAccept(const NFAVertexBidiDepth &d) { - return min(d.toAccept.min, d.toAcceptEod.min); -} - -static + return true; + } + return false; +} + +static +const depth& maxDistToAccept(const NFAVertexBidiDepth &d) { + if (d.toAccept.max.is_unreachable()) { + return d.toAcceptEod.max; + } else if (d.toAcceptEod.max.is_unreachable()) { + return d.toAccept.max; + } + return max(d.toAccept.max, d.toAcceptEod.max); +} + +static +const depth& minDistFromStart(const NFAVertexBidiDepth &d) { + return min(d.fromStartDotStar.min, d.fromStart.min); +} + +static +const depth& minDistToAccept(const NFAVertexBidiDepth &d) { + return min(d.toAccept.min, d.toAcceptEod.min); +} + +static bool isEdgePrunable(const NGHolder &g, const Report &report, - const vector<NFAVertexBidiDepth> &depths, - const NFAEdge &e) { - const NFAVertex u = source(e, g); - const NFAVertex v = target(e, g); - + const vector<NFAVertexBidiDepth> &depths, + const NFAEdge &e) { + const NFAVertex u = source(e, g); + const NFAVertex v = target(e, g); + DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); - - // Leave our special-to-special edges alone. - if (is_special(u, g) && is_special(v, g)) { - DEBUG_PRINTF("ignoring special-to-special\n"); - return false; - } - - // We must be careful around start: we don't want to remove (start, v) if - // (startDs, v) exists as well, since later code will assume the presence - // of both edges, but other cases are OK. - if (u == g.start && edge(g.startDs, v, g).second) { - DEBUG_PRINTF("ignoring unanchored start edge\n"); - return false; - } - - u32 u_idx = g[u].index; - u32 v_idx = g[v].index; - assert(u_idx < depths.size() && v_idx < depths.size()); - - const NFAVertexBidiDepth &du = depths.at(u_idx); - const NFAVertexBidiDepth &dv = depths.at(v_idx); - + + // Leave our special-to-special edges alone. + if (is_special(u, g) && is_special(v, g)) { + DEBUG_PRINTF("ignoring special-to-special\n"); + return false; + } + + // We must be careful around start: we don't want to remove (start, v) if + // (startDs, v) exists as well, since later code will assume the presence + // of both edges, but other cases are OK. + if (u == g.start && edge(g.startDs, v, g).second) { + DEBUG_PRINTF("ignoring unanchored start edge\n"); + return false; + } + + u32 u_idx = g[u].index; + u32 v_idx = g[v].index; + assert(u_idx < depths.size() && v_idx < depths.size()); + + const NFAVertexBidiDepth &du = depths.at(u_idx); + const NFAVertexBidiDepth &dv = depths.at(v_idx); + if (report.minOffset) { depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv); if (max_offset.is_finite() && max_offset < report.minOffset) { - DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); - return true; - } - } - + DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); + return true; + } + } + if (report.maxOffset != MAX_OFFSET) { - depth min_offset = minDistFromStart(du) + minDistToAccept(dv); - assert(min_offset.is_finite()); - + depth min_offset = minDistFromStart(du) + minDistToAccept(dv); + assert(min_offset.is_finite()); + if (min_offset > report.maxOffset) { - DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); - return true; - } - } - + DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); + return true; + } + } + if (report.minLength && is_any_accept(v, g)) { - // Simple take on min_length. If we're an edge to accept and our max - // dist from start is too small, we can be pruned. + // Simple take on min_length. If we're an edge to accept and our max + // dist from start is too small, we can be pruned. const depth &width = maxDistFromInit(du); if (width.is_finite() && width < report.minLength) { - DEBUG_PRINTF("max width %s from start too small for min_length\n", - width.str().c_str()); - return true; - } - } - - return false; -} - -static + DEBUG_PRINTF("max width %s from start too small for min_length\n", + width.str().c_str()); + return true; + } + } + + return false; +} + +static void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { const auto &reports = all_reports(g); if (reports.empty()) { return; } - + if (!hasSameBounds(reports, rm)) { DEBUG_PRINTF("report bounds vary\n"); return; @@ -727,32 +727,32 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { auto depths = calcBidiDepths(g); - vector<NFAEdge> dead; - - for (const auto &e : edges_range(g)) { + vector<NFAEdge> dead; + + for (const auto &e : edges_range(g)) { if (isEdgePrunable(g, report, depths, e)) { - DEBUG_PRINTF("pruning\n"); - dead.push_back(e); - } - } - - if (dead.empty()) { - return; - } - - remove_edges(dead, g); - pruneUseless(g); + DEBUG_PRINTF("pruning\n"); + dead.push_back(e); + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, g); + pruneUseless(g); clearReports(g); -} - +} + /** * Remove vacuous edges in graphs where the min_offset or min_length * constraints dictate that they can never produce a match. */ -static +static void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { - vector<NFAEdge> dead; - + vector<NFAEdge> dead; + auto has_min_offset = [&](NFAVertex v) { assert(!g[v].reports.empty()); // must be reporter return all_of_in(g[v].reports, [&](ReportID id) { @@ -767,157 +767,157 @@ void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { }); }; - for (const auto &e : edges_range(g)) { - const NFAVertex u = source(e, g); - const NFAVertex v = target(e, g); - + for (const auto &e : edges_range(g)) { + const NFAVertex u = source(e, g); + const NFAVertex v = target(e, g); + // Special case: Crudely remove vacuous edges from start in graphs with // a min_offset. if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { - DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); - dead.push_back(e); - continue; - } - - // If a min_length is set, vacuous edges can be removed. + DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); + dead.push_back(e); + continue; + } + + // If a min_length is set, vacuous edges can be removed. if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { - DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); - dead.push_back(e); - continue; - } - } - - if (dead.empty()) { - return; - } - + DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); + dead.push_back(e); + continue; + } + } + + if (dead.empty()) { + return; + } + DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); - remove_edges(dead, g); - pruneUseless(g); + remove_edges(dead, g); + pruneUseless(g); clearReports(g); -} - -static +} + +static void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, - const ReportManager &rm, NFAVertex accept) { - vector<NFAEdge> dead; - - for (const auto &e : in_edges_range(accept, g)) { - NFAVertex v = source(e, g); - if (v == g.accept) { - assert(accept == g.acceptEod); // stylised edge - continue; - } - + const ReportManager &rm, NFAVertex accept) { + vector<NFAEdge> dead; + + for (const auto &e : in_edges_range(accept, g)) { + NFAVertex v = source(e, g); + if (v == g.accept) { + assert(accept == g.acceptEod); // stylised edge + continue; + } + if (!hasSameBounds(g[v].reports, rm)) { continue; } const auto &report = rm.getReport(*g[v].reports.begin()); - u32 idx = g[v].index; - DepthMinMax d = depths[idx]; // copy - pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v); - DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, - d.str().c_str(), adj.first, adj.second); - d.min += adj.first; - d.max += adj.second; - + u32 idx = g[v].index; + DepthMinMax d = depths[idx]; // copy + pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v); + DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, + d.str().c_str(), adj.first, adj.second); + d.min += adj.first; + d.max += adj.second; + if (d.max.is_finite() && d.max < report.minLength) { - DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", + DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", d.max.str().c_str(), report.minLength); - dead.push_back(e); - continue; - } - + dead.push_back(e); + continue; + } + if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { - DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", + DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", d.min.str().c_str(), report.maxOffset); - dead.push_back(e); - continue; - } - } - - remove_edges(dead, g); -} - + dead.push_back(e); + continue; + } + } + + remove_edges(dead, g); +} + /** * Remove edges to accepts that can never produce a match long enough to * satisfy our min_length and max_offset constraints. */ -static +static void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { if (!any_of_in(all_reports(g), [&](ReportID id) { return rm.getReport(id).minLength > 0; })) { - return; - } - - vector<DepthMinMax> depths = getDistancesFromSOM(g); - - pruneUnmatchable(g, depths, rm, g.accept); - pruneUnmatchable(g, depths, rm, g.acceptEod); - - pruneUseless(g); + return; + } + + vector<DepthMinMax> depths = getDistancesFromSOM(g); + + pruneUnmatchable(g, depths, rm, g.accept); + pruneUnmatchable(g, depths, rm, g.acceptEod); + + pruneUseless(g); clearReports(g); -} - -static -bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { +} + +static +bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { return any_of_in(all_reports(g), [&rm](ReportID id) { return rm.getReport(id).offsetAdjust != 0; }); -} - +} + void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, ReportManager &rm) { if (!hasExtParams(expr)) { - return; - } - - depth minWidth = findMinWidth(g); - depth maxWidth = findMaxWidth(g); - bool is_anchored = !has_proper_successor(g.startDs, g) - && out_degree(g.start, g); - - DepthMinMax match_depths = findMatchLengths(rm, g); - DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - + return; + } + + depth minWidth = findMinWidth(g); + depth maxWidth = findMaxWidth(g); + bool is_anchored = !has_proper_successor(g.startDs, g) + && out_degree(g.start, g); + + DepthMinMax match_depths = findMatchLengths(rm, g); + DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { - ostringstream oss; - oss << "Expression is anchored and cannot satisfy min_offset=" + ostringstream oss; + oss << "Expression is anchored and cannot satisfy min_offset=" << expr.min_offset << " as it can only produce matches of length " - << maxWidth << " bytes at most."; + << maxWidth << " bytes at most."; throw CompileError(expr.index, oss.str()); - } - + } + if (minWidth > expr.max_offset) { - ostringstream oss; + ostringstream oss; oss << "Expression has max_offset=" << expr.max_offset << " but requires " << minWidth << " bytes to match."; throw CompileError(expr.index, oss.str()); - } - + } + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { - ostringstream oss; + ostringstream oss; oss << "Expression has min_length=" << expr.min_length << " but can " - "only produce matches of length " << match_depths.max << - " bytes at most."; + "only produce matches of length " << match_depths.max << + " bytes at most."; throw CompileError(expr.index, oss.str()); - } - + } + if (expr.min_length && expr.min_length <= match_depths.min) { - DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", + DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", expr.min_length); expr.min_length = 0; - } - + } + if (!hasExtParams(expr)) { - return; - } - + return; + } + updateReportBounds(rm, g, expr); } - + /** * If the pattern is completely anchored and has a min_length set, this can * be converted to a min_offset. @@ -926,8 +926,8 @@ static void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { if (has_proper_successor(g.startDs, g)) { return; // not wholly anchored - } - + } + replaceReports(g, [&rm](NFAVertex, ReportID id) { const auto &report = rm.getReport(id); if (report.minLength) { @@ -984,52 +984,52 @@ void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { DEBUG_PRINTF("no extparam bounds\n"); return; - } - + } + DEBUG_PRINTF("graph has extparam bounds\n"); - + pruneVacuousEdges(g, rm); if (can_never_match(g)) { return; } - + pruneUnmatchable(g, rm); if (can_never_match(g)) { return; - } - + } + if (!hasOffsetAdjustments(rm, g)) { pruneExtUnreachable(g, rm); if (can_never_match(g)) { return; } - } - + } + replaceMinLengthWithOffset(g, rm); if (can_never_match(g)) { - return; - } - - // If the pattern has a min_length and is of "ratchet" form with one - // unbounded repeat, that repeat can become a bounded repeat. - // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ + return; + } + + // If the pattern has a min_length and is of "ratchet" form with one + // unbounded repeat, that repeat can become a bounded repeat. + // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ transformMinLengthToRepeat(g, rm); if (can_never_match(g)) { return; - } - - // If the pattern is unanchored, has a max_offset and has not asked for - // SOM, we can use that knowledge to anchor it which will limit its - // lifespan. Note that we can't use this transformation if there's a - // min_length, as it's currently handled using "sly SOM". + } + + // If the pattern is unanchored, has a max_offset and has not asked for + // SOM, we can use that knowledge to anchor it which will limit its + // lifespan. Note that we can't use this transformation if there's a + // min_length, as it's currently handled using "sly SOM". if (som == SOM_NONE) { anchorPatternWithBoundedRepeat(g, rm); if (can_never_match(g)) { return; - } - } - + } + } + removeUnneededOffsetBounds(g, rm); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h index ae818075c0..43543b1255 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h @@ -1,47 +1,47 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Propagate extended parameters to vertex reports and reduce graph if - * possible. - */ - -#ifndef NG_EXTPARAM_H -#define NG_EXTPARAM_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Propagate extended parameters to vertex reports and reduce graph if + * possible. + */ + +#ifndef NG_EXTPARAM_H +#define NG_EXTPARAM_H + #include "som/som.h" -namespace ue2 { - +namespace ue2 { + class ExpressionInfo; class NGHolder; -class ReportManager; - +class ReportManager; + /** * \brief Propagate extended parameter information to vertex reports. Will * throw CompileError if this expression's extended parameters are not @@ -52,13 +52,13 @@ class ReportManager; */ void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, ReportManager &rm); - + /** * \brief Perform graph reductions (if possible) to do with extended parameter * constraints on reports. */ void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp index 8fb264d8a9..01fb0090c6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp @@ -1,142 +1,142 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose mask construction from NGHolder. - */ -#include "ng_fixed_width.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "rose/rose_build.h" -#include "util/container.h" -#include "ue2common.h" - -#include <algorithm> -#include <iterator> -#include <set> - -using namespace std; - -namespace ue2 { - -static -bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose mask construction from NGHolder. + */ +#include "ng_fixed_width.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "rose/rose_build.h" +#include "util/container.h" +#include "ue2common.h" + +#include <algorithm> +#include <iterator> +#include <set> + +using namespace std; + +namespace ue2 { + +static +bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored, flat_set<ReportID> *reports) { - DEBUG_PRINTF("looking for a mask pattern\n"); - set<NFAVertex> s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); - - set<NFAVertex> sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); - - *anchored = sds_succ.size() == 1; /* sds itself */ - bool floating = is_subset_of(s_succ, sds_succ); - - DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(), - *anchored ? " anchored" : "", floating ? " floating" : ""); - if (!*anchored && !floating) { - DEBUG_PRINTF("semi-anchored\n"); - return false; - } - - set<NFAVertex> &succs = *anchored ? s_succ : sds_succ; - succs.erase(g.startDs); - if (succs.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } - - NFAVertex u = *anchored ? g.start : g.startDs; - NFAVertex v = *succs.begin(); - - while (true) { + DEBUG_PRINTF("looking for a mask pattern\n"); + set<NFAVertex> s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); + + set<NFAVertex> sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); + + *anchored = sds_succ.size() == 1; /* sds itself */ + bool floating = is_subset_of(s_succ, sds_succ); + + DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(), + *anchored ? " anchored" : "", floating ? " floating" : ""); + if (!*anchored && !floating) { + DEBUG_PRINTF("semi-anchored\n"); + return false; + } + + set<NFAVertex> &succs = *anchored ? s_succ : sds_succ; + succs.erase(g.startDs); + if (succs.size() != 1) { + DEBUG_PRINTF("branchy root\n"); + return false; + } + + NFAVertex u = *anchored ? g.start : g.startDs; + NFAVertex v = *succs.begin(); + + while (true) { DEBUG_PRINTF("validating vertex %zu\n", g[v].index); - - assert(v != g.acceptEod); - - // If we've reached an accept, we MAY have found a valid Rose pattern - if (v == g.accept) { - DEBUG_PRINTF("accept\n"); - insert(reports, g[u].reports); - return true; - } - - mask->push_back(g[v].char_reach); - - if (out_degree(v, g) != 1) { - DEBUG_PRINTF("out_degree != 1\n"); - return false; /* not a chain */ - } - - u = v; - v = *adjacent_vertices(v, g).first; - - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path - * to case accept (large cycles), - * ensures term */ - return false; - } - } -} - -bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) { - if (!grey.roseMasks) { - return false; - } - - if (in_degree(g.acceptEod,g) != 1) { - DEBUG_PRINTF("EOD anchoring not supported\n"); - return false; - } - + + assert(v != g.acceptEod); + + // If we've reached an accept, we MAY have found a valid Rose pattern + if (v == g.accept) { + DEBUG_PRINTF("accept\n"); + insert(reports, g[u].reports); + return true; + } + + mask->push_back(g[v].char_reach); + + if (out_degree(v, g) != 1) { + DEBUG_PRINTF("out_degree != 1\n"); + return false; /* not a chain */ + } + + u = v; + v = *adjacent_vertices(v, g).first; + + if (in_degree(v, g) != 1) { + DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path + * to case accept (large cycles), + * ensures term */ + return false; + } + } +} + +bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) { + if (!grey.roseMasks) { + return false; + } + + if (in_degree(g.acceptEod,g) != 1) { + DEBUG_PRINTF("EOD anchoring not supported\n"); + return false; + } + flat_set<ReportID> reports; - bool anchored = false; - vector<CharReach> mask; - - if (!findMask(g, &mask, &anchored, &reports)) { - return false; - } - - DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : ""); - - assert(!mask.empty()); - assert(!reports.empty()); - - if (rose.add(anchored, mask, reports)) { - DEBUG_PRINTF("added as rose mask\n"); - return true; - } else { - DEBUG_PRINTF("failed to add masky\n"); - return false; - } -} - -} // namespace ue2 + bool anchored = false; + vector<CharReach> mask; + + if (!findMask(g, &mask, &anchored, &reports)) { + return false; + } + + DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : ""); + + assert(!mask.empty()); + assert(!reports.empty()); + + if (rose.add(anchored, mask, reports)) { + DEBUG_PRINTF("added as rose mask\n"); + return true; + } else { + DEBUG_PRINTF("failed to add masky\n"); + return false; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h index d8286742cd..7a2d0fff3b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h @@ -1,46 +1,46 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose mask construction from NGHolder. - */ - -#ifndef NG_FIXED_WIDTH_H -#define NG_FIXED_WIDTH_H - -namespace ue2 { - -class RoseBuild; -class NGHolder; -struct Grey; - -bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey); - -} // namespace ue2 - -#endif // NG_FIXED_WIDTH_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose mask construction from NGHolder. + */ + +#ifndef NG_FIXED_WIDTH_H +#define NG_FIXED_WIDTH_H + +namespace ue2 { + +class RoseBuild; +class NGHolder; +struct Grey; + +bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey); + +} // namespace ue2 + +#endif // NG_FIXED_WIDTH_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp index 8054544772..f6594616c4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp @@ -1,124 +1,124 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for Haig SOM DFA. - */ -#include "ng_haig.h" - -#include "grey.h" -#include "nfa/goughcompile.h" -#include "ng_holder.h" -#include "ng_mcclellan_internal.h" -#include "ng_som_util.h" -#include "ng_squash.h" -#include "util/bitfield.h" -#include "util/container.h" -#include "util/determinise.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for Haig SOM DFA. + */ +#include "ng_haig.h" + +#include "grey.h" +#include "nfa/goughcompile.h" +#include "ng_holder.h" +#include "ng_mcclellan_internal.h" +#include "ng_som_util.h" +#include "ng_squash.h" +#include "util/bitfield.h" +#include "util/container.h" +#include "util/determinise.h" #include "util/flat_containers.h" #include "util/graph.h" -#include "util/graph_range.h" +#include "util/graph_range.h" #include "util/hash_dynamic_bitset.h" -#include "util/make_unique.h" +#include "util/make_unique.h" #include "util/unordered.h" - -#include <algorithm> -#include <functional> -#include <map> -#include <set> -#include <vector> -#include <boost/dynamic_bitset.hpp> - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -#define NFA_STATE_LIMIT 256 - -#define HAIG_MAX_NFA_STATE 600 -#define HAIG_MAX_LIVE_SOM_SLOTS 32 - -namespace { -struct haig_too_wide { -}; - -template<typename stateset> -static + +#include <algorithm> +#include <functional> +#include <map> +#include <set> +#include <vector> +#include <boost/dynamic_bitset.hpp> + +using namespace std; +using boost::dynamic_bitset; + +namespace ue2 { + +#define NFA_STATE_LIMIT 256 + +#define HAIG_MAX_NFA_STATE 600 +#define HAIG_MAX_LIVE_SOM_SLOTS 32 + +namespace { +struct haig_too_wide { +}; + +template<typename stateset> +static void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, - stateset *init, stateset *initDS, - vector<NFAVertex> *v_by_index) { + stateset *init, stateset *initDS, + vector<NFAVertex> *v_by_index) { DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str()); - for (auto v : vertices_range(g)) { + for (auto v : vertices_range(g)) { if (contains(unused, v)) { - continue; - } + continue; + } u32 v_index = g[v].index; - if (is_any_start(v, g)) { - init->set(v_index); - if (hasSelfLoop(v, g) || is_triggered(g)) { - DEBUG_PRINTF("setting %u\n", v_index); - initDS->set(v_index); - } - } - assert(v_index < init->size()); - } - - v_by_index->clear(); + if (is_any_start(v, g)) { + init->set(v_index); + if (hasSelfLoop(v, g) || is_triggered(g)) { + DEBUG_PRINTF("setting %u\n", v_index); + initDS->set(v_index); + } + } + assert(v_index < init->size()); + } + + v_by_index->clear(); v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); - - for (auto v : vertices_range(g)) { - u32 v_index = g[v].index; + + for (auto v : vertices_range(g)) { + u32 v_index = g[v].index; assert((*v_by_index)[v_index] == NGHolder::null_vertex()); - (*v_by_index)[v_index] = v; - } -} - -template<typename StateSet> -void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - accept->set(g[v].index); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - acceptEod->set(g[v].index); - } -} - + (*v_by_index)[v_index] = v; + } +} + +template<typename StateSet> +void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + accept->set(g[v].index); + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + acceptEod->set(g[v].index); + } +} + template<typename Automaton_Traits> -class Automaton_Base { +class Automaton_Base { public: using StateSet = typename Automaton_Traits::StateSet; using StateMap = typename Automaton_Traits::StateMap; -protected: +protected: Automaton_Base(const NGHolder &graph_in, som_type som, const vector<vector<CharReach>> &triggers, bool unordered_som) @@ -131,50 +131,50 @@ protected: acceptEod(Automaton_Traits::init_states(numStates)), toppable(Automaton_Traits::init_states(numStates)), dead(Automaton_Traits::init_states(numStates)) { - calculateAlphabet(graph, alpha, unalpha, &alphasize); - assert(alphasize <= ALPHABET_SIZE); - + calculateAlphabet(graph, alpha, unalpha, &alphasize); + assert(alphasize <= ALPHABET_SIZE); + populateInit(graph, unused, &init, &initDS, &v_by_index); - populateAccepts(graph, &accept, &acceptEod); - - start_anchored = DEAD_STATE + 1; - if (initDS == init) { - start_floating = start_anchored; - } else if (initDS.any()) { - start_floating = start_anchored + 1; - } else { - start_floating = DEAD_STATE; - } - + populateAccepts(graph, &accept, &acceptEod); + + start_anchored = DEAD_STATE + 1; + if (initDS == init) { + start_floating = start_anchored; + } else if (initDS.any()) { + start_floating = start_anchored + 1; + } else { + start_floating = DEAD_STATE; + } + cr_by_index = populateCR(graph, v_by_index, alpha); - if (!unordered_som) { - for (const auto &sq : findSquashers(graph, som)) { - NFAVertex v = sq.first; - u32 vert_id = graph[v].index; - squash.set(vert_id); - squash_mask[vert_id] = shrinkStateSet(sq.second); - } - } - - if (is_triggered(graph)) { + if (!unordered_som) { + for (const auto &sq : findSquashers(graph, som)) { + NFAVertex v = sq.first; + u32 vert_id = graph[v].index; + squash.set(vert_id); + squash_mask[vert_id] = shrinkStateSet(sq.second); + } + } + + if (is_triggered(graph)) { dynamic_bitset<> temp(numStates); markToppableStarts(graph, unused, false, triggers, &temp); toppable = Automaton_Traits::copy_states(temp, numStates); - } - } - -private: - // Convert an NFAStateSet (as used by the squash code) into a StateSet. - StateSet shrinkStateSet(const NFAStateSet &in) const { + } + } + +private: + // Convert an NFAStateSet (as used by the squash code) into a StateSet. + StateSet shrinkStateSet(const NFAStateSet &in) const { StateSet out = Automaton_Traits::init_states(numStates); - for (size_t i = in.find_first(); i != in.npos && i < out.size(); - i = in.find_next(i)) { - out.set(i); - } - return out; - } - + for (size_t i = in.find_first(); i != in.npos && i < out.size(); + i = in.find_next(i)) { + out.set(i); + } + return out; + } + void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { StateSet acc = in & (eod ? acceptEod : accept); for (size_t i = acc.find_first(); i != StateSet::npos; @@ -186,27 +186,27 @@ private: } } -public: - void transition(const StateSet &in, StateSet *next) { - transition_graph(*this, v_by_index, in, next); - } - - const vector<StateSet> initial() { +public: + void transition(const StateSet &in, StateSet *next) { + transition_graph(*this, v_by_index, in, next); + } + + const vector<StateSet> initial() { vector<StateSet> rv = {init}; - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(initDS); - } - return rv; - } - - void reports(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, false, rv); - } - - void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, true, rv); - } - + if (start_floating != DEAD_STATE && start_floating != start_anchored) { + rv.push_back(initDS); + } + return rv; + } + + void reports(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, false, rv); + } + + void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, true, rv); + } + static bool canPrune(const flat_set<ReportID> &) { return false; } const NGHolder &graph; @@ -223,40 +223,40 @@ public: u16 start_anchored; u16 start_floating; - vector<NFAVertex> v_by_index; - vector<CharReach> cr_by_index; /* pre alpha'ed */ - StateSet init; - StateSet initDS; - StateSet squash; /* states which allow us to mask out other states */ - StateSet accept; - StateSet acceptEod; - StateSet toppable; /* states which are allowed to be on when a top arrives, - * triggered dfas only */ - map<u32, StateSet> squash_mask; - StateSet dead; -}; - + vector<NFAVertex> v_by_index; + vector<CharReach> cr_by_index; /* pre alpha'ed */ + StateSet init; + StateSet initDS; + StateSet squash; /* states which allow us to mask out other states */ + StateSet accept; + StateSet acceptEod; + StateSet toppable; /* states which are allowed to be on when a top arrives, + * triggered dfas only */ + map<u32, StateSet> squash_mask; + StateSet dead; +}; + struct Big_Traits { using StateSet = dynamic_bitset<>; using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; - + static StateSet init_states(u32 num) { return StateSet(num); } - + static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { assert(in.size() == num); return in; } }; - + class Automaton_Big : public Automaton_Base<Big_Traits> { public: Automaton_Big(const NGHolder &graph_in, som_type som, const vector<vector<CharReach>> &triggers, bool unordered_som) : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; - + struct Graph_Traits { using StateSet = bitfield<NFA_STATE_LIMIT>; using StateMap = unordered_map<StateSet, dstate_id_t>; @@ -264,520 +264,520 @@ struct Graph_Traits { static StateSet init_states(UNUSED u32 num) { assert(num <= NFA_STATE_LIMIT); return StateSet(); - } - + } + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { StateSet out = init_states(num); - for (size_t i = in.find_first(); i != in.npos && i < out.size(); - i = in.find_next(i)) { - out.set(i); - } - return out; - } + for (size_t i = in.find_first(); i != in.npos && i < out.size(); + i = in.find_next(i)) { + out.set(i); + } + return out; + } }; - + class Automaton_Graph : public Automaton_Base<Graph_Traits> { -public: +public: Automaton_Graph(const NGHolder &graph_in, som_type som, const vector<vector<CharReach>> &triggers, bool unordered_som) : Automaton_Base(graph_in, som, triggers, unordered_som) {} -}; - -class Automaton_Haig_Merge { -public: +}; + +class Automaton_Haig_Merge { +public: using StateSet = vector<u16>; using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; - - explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in) - : nfas(in.begin(), in.end()), dead(in.size()) { - calculateAlphabet(); - populateAsFs(); - } - - void populateAsFs(void) { - bool fs_same = true; - bool fs_dead = true; - - as.resize(nfas.size()); - fs.resize(nfas.size()); - for (u32 i = 0; i < nfas.size(); i++) { - as[i] = nfas[i]->start_anchored; - fs[i] = nfas[i]->start_floating; - - if (fs[i]) { - fs_dead = false; - } - - if (as[i] != fs[i]) { - fs_same = false; - } - } - - start_anchored = DEAD_STATE + 1; - if (fs_same) { - start_floating = start_anchored; - } else if (fs_dead) { - start_floating = DEAD_STATE; - } else { - start_floating = start_anchored + 1; - } - } - - void calculateAlphabet(void) { - DEBUG_PRINTF("calculating alphabet\n"); - vector<CharReach> esets(1, CharReach::dot()); - - for (const auto &haig : nfas) { - DEBUG_PRINTF("...next dfa alphabet\n"); - assert(haig); - const auto &alpha_remap = haig->alpha_remap; - - for (size_t i = 0; i < esets.size(); i++) { - assert(esets[i].any()); - if (esets[i].count() == 1) { - DEBUG_PRINTF("skipping singleton eq set\n"); - continue; - } - - CharReach t; - u8 leader_s = alpha_remap[esets[i].find_first()]; - - DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s); - - for (size_t s = esets[i].find_first(); - s != CharReach::npos; s = esets[i].find_next(s)) { - if (alpha_remap[s] != leader_s) { - t.set(s); - } - } - - if (t.any() && t != esets[i]) { - esets[i] &= ~t; - esets.push_back(t); - } - } - } - - alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); - } - - void transition(const StateSet &in, StateSet *next) { - u16 t[ALPHABET_SIZE]; - - for (u32 i = 0; i < alphasize; i++) { - next[i].resize(nfas.size()); - } - - for (u32 j = 0; j < nfas.size(); j++) { - getFullTransitionFromState(*nfas[j], in[j], t); - for (u32 i = 0; i < alphasize; i++) { - next[i][j]= t[unalpha[i]]; - } - } - } - - const vector<StateSet> initial() { - vector<StateSet> rv(1, as); - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(fs); - } - return rv; - } - -private: - void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set, - flat_set<ReportID> &r) { - for (u32 i = 0; i < nfas.size(); i++) { - const auto &rs = nfas[i]->states[in[i]].*r_set; - insert(&r, rs); - } - } - -public: - void reports(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, &dstate::reports, rv); - } - void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, &dstate::reports_eod, rv); - } - - static bool canPrune(const flat_set<ReportID> &) { return false; } - -private: - vector<const raw_som_dfa *> nfas; - vector<dstate_id_t> as; - vector<dstate_id_t> fs; -public: - array<u16, ALPHABET_SIZE> alpha; - array<u16, ALPHABET_SIZE> unalpha; - u16 alphasize; - StateSet dead; - - u16 start_anchored; - u16 start_floating; -}; -} - -enum bslm_mode { - ONLY_EXISTING, - INCLUDE_INVALID -}; - -static -bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { - return is_virtual_start(v, g) || is_any_start(v, g); -} - -static + + explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in) + : nfas(in.begin(), in.end()), dead(in.size()) { + calculateAlphabet(); + populateAsFs(); + } + + void populateAsFs(void) { + bool fs_same = true; + bool fs_dead = true; + + as.resize(nfas.size()); + fs.resize(nfas.size()); + for (u32 i = 0; i < nfas.size(); i++) { + as[i] = nfas[i]->start_anchored; + fs[i] = nfas[i]->start_floating; + + if (fs[i]) { + fs_dead = false; + } + + if (as[i] != fs[i]) { + fs_same = false; + } + } + + start_anchored = DEAD_STATE + 1; + if (fs_same) { + start_floating = start_anchored; + } else if (fs_dead) { + start_floating = DEAD_STATE; + } else { + start_floating = start_anchored + 1; + } + } + + void calculateAlphabet(void) { + DEBUG_PRINTF("calculating alphabet\n"); + vector<CharReach> esets(1, CharReach::dot()); + + for (const auto &haig : nfas) { + DEBUG_PRINTF("...next dfa alphabet\n"); + assert(haig); + const auto &alpha_remap = haig->alpha_remap; + + for (size_t i = 0; i < esets.size(); i++) { + assert(esets[i].any()); + if (esets[i].count() == 1) { + DEBUG_PRINTF("skipping singleton eq set\n"); + continue; + } + + CharReach t; + u8 leader_s = alpha_remap[esets[i].find_first()]; + + DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s); + + for (size_t s = esets[i].find_first(); + s != CharReach::npos; s = esets[i].find_next(s)) { + if (alpha_remap[s] != leader_s) { + t.set(s); + } + } + + if (t.any() && t != esets[i]) { + esets[i] &= ~t; + esets.push_back(t); + } + } + } + + alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); + } + + void transition(const StateSet &in, StateSet *next) { + u16 t[ALPHABET_SIZE]; + + for (u32 i = 0; i < alphasize; i++) { + next[i].resize(nfas.size()); + } + + for (u32 j = 0; j < nfas.size(); j++) { + getFullTransitionFromState(*nfas[j], in[j], t); + for (u32 i = 0; i < alphasize; i++) { + next[i][j]= t[unalpha[i]]; + } + } + } + + const vector<StateSet> initial() { + vector<StateSet> rv(1, as); + if (start_floating != DEAD_STATE && start_floating != start_anchored) { + rv.push_back(fs); + } + return rv; + } + +private: + void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set, + flat_set<ReportID> &r) { + for (u32 i = 0; i < nfas.size(); i++) { + const auto &rs = nfas[i]->states[in[i]].*r_set; + insert(&r, rs); + } + } + +public: + void reports(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, &dstate::reports, rv); + } + void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, &dstate::reports_eod, rv); + } + + static bool canPrune(const flat_set<ReportID> &) { return false; } + +private: + vector<const raw_som_dfa *> nfas; + vector<dstate_id_t> as; + vector<dstate_id_t> fs; +public: + array<u16, ALPHABET_SIZE> alpha; + array<u16, ALPHABET_SIZE> unalpha; + u16 alphasize; + StateSet dead; + + u16 start_anchored; + u16 start_floating; +}; +} + +enum bslm_mode { + ONLY_EXISTING, + INCLUDE_INVALID +}; + +static +bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { + return is_virtual_start(v, g) || is_any_start(v, g); +} + +static s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused, - NFAVertex v) { - if (is_triggered(g) && v == g.start) { + NFAVertex v) { + if (is_triggered(g) && v == g.start) { assert(!contains(unused, v)); - } else if (is_any_start_inc_virtual(v, g)) { - return CREATE_NEW_SOM; - } - - return g[v].index; -} - -template<typename stateset> -static -void haig_do_preds(const NGHolder &g, const stateset &nfa_states, - const vector<NFAVertex> &state_mapping, - som_tran_info &preds) { - for (size_t i = nfa_states.find_first(); i != stateset::npos; - i = nfa_states.find_next(i)) { - NFAVertex v = state_mapping[i]; - s32 slot_id = g[v].index; - + } else if (is_any_start_inc_virtual(v, g)) { + return CREATE_NEW_SOM; + } + + return g[v].index; +} + +template<typename stateset> +static +void haig_do_preds(const NGHolder &g, const stateset &nfa_states, + const vector<NFAVertex> &state_mapping, + som_tran_info &preds) { + for (size_t i = nfa_states.find_first(); i != stateset::npos; + i = nfa_states.find_next(i)) { + NFAVertex v = state_mapping[i]; + s32 slot_id = g[v].index; + DEBUG_PRINTF("d vertex %zu\n", g[v].index); - vector<u32> &out_map = preds[slot_id]; - for (auto u : inv_adjacent_vertices_range(v, g)) { - out_map.push_back(g[u].index); - } - - sort(out_map.begin(), out_map.end()); - assert(!out_map.empty() || v == g.start); - } -} - -template<typename stateset> -static + vector<u32> &out_map = preds[slot_id]; + for (auto u : inv_adjacent_vertices_range(v, g)) { + out_map.push_back(g[u].index); + } + + sort(out_map.begin(), out_map.end()); + assert(!out_map.empty() || v == g.start); + } +} + +template<typename stateset> +static void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused, - NFAVertex accept_v, const stateset &source_nfa_states, - const vector<NFAVertex> &state_mapping, - set<som_report> &out) { - for (size_t i = source_nfa_states.find_first(); i != stateset::npos; - i = source_nfa_states.find_next(i)) { - NFAVertex v = state_mapping[i]; - if (!edge(v, accept_v, g).second) { - continue; - } - for (ReportID report_id : g[v].reports) { + NFAVertex accept_v, const stateset &source_nfa_states, + const vector<NFAVertex> &state_mapping, + set<som_report> &out) { + for (size_t i = source_nfa_states.find_first(); i != stateset::npos; + i = source_nfa_states.find_next(i)) { + NFAVertex v = state_mapping[i]; + if (!edge(v, accept_v, g).second) { + continue; + } + for (ReportID report_id : g[v].reports) { out.insert(som_report(report_id, getSlotID(g, unused, v))); - } - } -} - -static -void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { - if (is_triggered(g)) { - return; - } - - DEBUG_PRINTF("seeing who creates new som values\n"); - - vector<DepthMinMax> depths = getDistancesFromSOM(g); - - for (auto v : vertices_range(g)) { - if (is_any_start_inc_virtual(v, g)) { + } + } +} + +static +void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { + if (is_triggered(g)) { + return; + } + + DEBUG_PRINTF("seeing who creates new som values\n"); + + vector<DepthMinMax> depths = getDistancesFromSOM(g); + + for (auto v : vertices_range(g)) { + if (is_any_start_inc_virtual(v, g)) { DEBUG_PRINTF("%zu creates new som value\n", g[v].index); - out->emplace(g[v].index, 0U); - continue; - } - - if (is_any_accept(v, g)) { - continue; - } - - const DepthMinMax &d = depths[g[v].index]; - if (d.min == d.max && d.min.is_finite()) { + out->emplace(g[v].index, 0U); + continue; + } + + if (is_any_accept(v, g)) { + continue; + } + + const DepthMinMax &d = depths[g[v].index]; + if (d.min == d.max && d.min.is_finite()) { DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min); - out->emplace(g[v].index, d.min); - } - } -} - -template<class Auto> -static + out->emplace(g[v].index, d.min); + } + } +} + +template<class Auto> +static bool doHaig(const NGHolder &g, som_type som, const vector<vector<CharReach>> &triggers, bool unordered_som, raw_som_dfa *rdfa) { - u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from - a fight */ + u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from + a fight */ using StateSet = typename Auto::StateSet; - vector<StateSet> nfa_state_map; + vector<StateSet> nfa_state_map; Auto n(g, som, triggers, unordered_som); - try { + try { if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { - DEBUG_PRINTF("state limit exceeded\n"); - return false; - } - } catch (haig_too_wide &) { - DEBUG_PRINTF("too many live som states\n"); - return false; - } - - rdfa->start_anchored = n.start_anchored; - rdfa->start_floating = n.start_floating; - rdfa->alpha_size = n.alphasize; - rdfa->alpha_remap = n.alpha; - - rdfa->state_som.reserve(rdfa->states.size()); - for (u32 i = 0; i < rdfa->states.size(); i++) { - rdfa->state_som.push_back(dstate_som()); - const StateSet &source_states = nfa_state_map[i]; - if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) { - DEBUG_PRINTF("too many live states\n"); - return false; - } - - DEBUG_PRINTF("generating som info for %u\n", i); - - haig_do_preds(g, source_states, n.v_by_index, - rdfa->state_som.back().preds); - + DEBUG_PRINTF("state limit exceeded\n"); + return false; + } + } catch (haig_too_wide &) { + DEBUG_PRINTF("too many live som states\n"); + return false; + } + + rdfa->start_anchored = n.start_anchored; + rdfa->start_floating = n.start_floating; + rdfa->alpha_size = n.alphasize; + rdfa->alpha_remap = n.alpha; + + rdfa->state_som.reserve(rdfa->states.size()); + for (u32 i = 0; i < rdfa->states.size(); i++) { + rdfa->state_som.push_back(dstate_som()); + const StateSet &source_states = nfa_state_map[i]; + if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) { + DEBUG_PRINTF("too many live states\n"); + return false; + } + + DEBUG_PRINTF("generating som info for %u\n", i); + + haig_do_preds(g, source_states, n.v_by_index, + rdfa->state_som.back().preds); + haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, - rdfa->state_som.back().reports); + rdfa->state_som.back().reports); haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, - rdfa->state_som.back().reports_eod); - } - - haig_note_starts(g, &rdfa->new_som_nfa_states); - - return true; -} - + rdfa->state_som.back().reports_eod); + } + + haig_note_starts(g, &rdfa->new_som_nfa_states); + + return true; +} + unique_ptr<raw_som_dfa> attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, const vector<vector<CharReach>> &triggers, const Grey &grey, bool unordered_som) { - assert(is_triggered(g) != triggers.empty()); - assert(!unordered_som || is_triggered(g)); - - if (!grey.allowGough) { - /* must be at least one engine capable of handling raw som dfas */ - return nullptr; - } - - DEBUG_PRINTF("attempting to build haig \n"); - assert(allMatchStatesHaveReports(g)); - assert(hasCorrectlyNumberedVertices(g)); - - u32 numStates = num_vertices(g); - if (numStates > HAIG_MAX_NFA_STATE) { - DEBUG_PRINTF("giving up... looks too big\n"); - return nullptr; - } - - auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START, - somPrecision); - - DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); - bool rv; - if (numStates <= NFA_STATE_LIMIT) { - /* fast path */ + assert(is_triggered(g) != triggers.empty()); + assert(!unordered_som || is_triggered(g)); + + if (!grey.allowGough) { + /* must be at least one engine capable of handling raw som dfas */ + return nullptr; + } + + DEBUG_PRINTF("attempting to build haig \n"); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); + + u32 numStates = num_vertices(g); + if (numStates > HAIG_MAX_NFA_STATE) { + DEBUG_PRINTF("giving up... looks too big\n"); + return nullptr; + } + + auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START, + somPrecision); + + DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); + bool rv; + if (numStates <= NFA_STATE_LIMIT) { + /* fast path */ rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som, - rdfa.get()); - } else { - /* not the fast path */ + rdfa.get()); + } else { + /* not the fast path */ rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get()); - } - - if (!rv) { - return nullptr; - } - - DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n", - rdfa->start_anchored, rdfa->start_floating); - - assert(rdfa->kind == g.kind); - return rdfa; -} - -static -void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas, - const vector<u32> &per_dfa_adj, - const vector<dstate_id_t> &source_nfa_states, - som_tran_info &som_tran) { - for (u32 d = 0; d < dfas.size(); ++d) { - u32 adj = per_dfa_adj[d]; - - const som_tran_info &som_tran_d - = dfas[d]->state_som[source_nfa_states[d]].preds; - for (som_tran_info::const_iterator it = som_tran_d.begin(); - it != som_tran_d.end(); ++it) { - assert(it->first != CREATE_NEW_SOM); - u32 dest_slot = it->first < N_SPECIALS ? it->first - : it->first + adj; - vector<u32> &out = som_tran[dest_slot]; - - if (!out.empty()) { - /* stylised specials already done; it does not matter who builds - the preds */ - assert(dest_slot < N_SPECIALS); - continue; - } - for (vector<u32>::const_iterator jt = it->second.begin(); - jt != it->second.end(); ++jt) { - if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) { - out.push_back(*jt); - } else { - out.push_back(*jt + adj); - } - } - } - } -} - -static -void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas, - const vector<u32> &per_dfa_adj, - map<u32, u32> *out) { - for (u32 d = 0; d < dfas.size(); ++d) { - u32 adj = per_dfa_adj[d]; - const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states; - for (map<u32, u32>::const_iterator it = new_soms.begin(); - it != new_soms.end(); ++it) { - if (it->first < N_SPECIALS) { - assert(!it->second); - out->emplace(it->first, 0U); - } else { - assert(d + 1 >= per_dfa_adj.size() - || it->first + adj < per_dfa_adj[d + 1]); - out->emplace(it->first + adj, it->second); - } - } - } -} - -static never_inline -void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas, - const vector<u32> &per_dfa_adj, - const vector<dstate_id_t> &source_nfa_states, - bool eod, set<som_report> &out) { - for (u32 d = 0; d < dfas.size(); ++d) { - u32 adj = per_dfa_adj[d]; - - const set<som_report> &reps = eod - ? dfas[d]->state_som[source_nfa_states[d]].reports_eod - : dfas[d]->state_som[source_nfa_states[d]].reports; - for (set<som_report>::const_iterator it = reps.begin(); - it != reps.end(); ++it) { - u32 slot = it->slot; - if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) { - slot += adj; - } - out.insert(som_report(it->report, slot)); - } - } -} - -static -u32 total_slots_used(const raw_som_dfa &rdfa) { - u32 rv = 0; - for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin(); - it != rdfa.state_som.end(); ++it) { - for (som_tran_info::const_iterator jt = it->preds.begin(); - jt != it->preds.end(); ++jt) { - assert(jt->first != CREATE_NEW_SOM); - ENSURE_AT_LEAST(&rv, jt->first + 1); - } - } - const map<u32, u32> &new_soms = rdfa.new_som_nfa_states; - for (map<u32, u32>::const_iterator it = new_soms.begin(); - it != new_soms.end(); ++it) { - ENSURE_AT_LEAST(&rv, it->first + 1); - } - return rv; -} - -unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas, - u32 limit) { - assert(!dfas.empty()); - - Automaton_Haig_Merge n(dfas); - - DEBUG_PRINTF("merging %zu dfas\n", dfas.size()); - - bool unordered_som = false; - for (const auto &haig : dfas) { - assert(haig); - assert(haig->kind == dfas.front()->kind); - unordered_som |= haig->unordered_som_triggers; - if (haig->states.size() > limit) { - DEBUG_PRINTF("too many states!\n"); - return nullptr; - } - } - + } + + if (!rv) { + return nullptr; + } + + DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n", + rdfa->start_anchored, rdfa->start_floating); + + assert(rdfa->kind == g.kind); + return rdfa; +} + +static +void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas, + const vector<u32> &per_dfa_adj, + const vector<dstate_id_t> &source_nfa_states, + som_tran_info &som_tran) { + for (u32 d = 0; d < dfas.size(); ++d) { + u32 adj = per_dfa_adj[d]; + + const som_tran_info &som_tran_d + = dfas[d]->state_som[source_nfa_states[d]].preds; + for (som_tran_info::const_iterator it = som_tran_d.begin(); + it != som_tran_d.end(); ++it) { + assert(it->first != CREATE_NEW_SOM); + u32 dest_slot = it->first < N_SPECIALS ? it->first + : it->first + adj; + vector<u32> &out = som_tran[dest_slot]; + + if (!out.empty()) { + /* stylised specials already done; it does not matter who builds + the preds */ + assert(dest_slot < N_SPECIALS); + continue; + } + for (vector<u32>::const_iterator jt = it->second.begin(); + jt != it->second.end(); ++jt) { + if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) { + out.push_back(*jt); + } else { + out.push_back(*jt + adj); + } + } + } + } +} + +static +void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas, + const vector<u32> &per_dfa_adj, + map<u32, u32> *out) { + for (u32 d = 0; d < dfas.size(); ++d) { + u32 adj = per_dfa_adj[d]; + const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states; + for (map<u32, u32>::const_iterator it = new_soms.begin(); + it != new_soms.end(); ++it) { + if (it->first < N_SPECIALS) { + assert(!it->second); + out->emplace(it->first, 0U); + } else { + assert(d + 1 >= per_dfa_adj.size() + || it->first + adj < per_dfa_adj[d + 1]); + out->emplace(it->first + adj, it->second); + } + } + } +} + +static never_inline +void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas, + const vector<u32> &per_dfa_adj, + const vector<dstate_id_t> &source_nfa_states, + bool eod, set<som_report> &out) { + for (u32 d = 0; d < dfas.size(); ++d) { + u32 adj = per_dfa_adj[d]; + + const set<som_report> &reps = eod + ? dfas[d]->state_som[source_nfa_states[d]].reports_eod + : dfas[d]->state_som[source_nfa_states[d]].reports; + for (set<som_report>::const_iterator it = reps.begin(); + it != reps.end(); ++it) { + u32 slot = it->slot; + if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) { + slot += adj; + } + out.insert(som_report(it->report, slot)); + } + } +} + +static +u32 total_slots_used(const raw_som_dfa &rdfa) { + u32 rv = 0; + for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin(); + it != rdfa.state_som.end(); ++it) { + for (som_tran_info::const_iterator jt = it->preds.begin(); + jt != it->preds.end(); ++jt) { + assert(jt->first != CREATE_NEW_SOM); + ENSURE_AT_LEAST(&rv, jt->first + 1); + } + } + const map<u32, u32> &new_soms = rdfa.new_som_nfa_states; + for (map<u32, u32>::const_iterator it = new_soms.begin(); + it != new_soms.end(); ++it) { + ENSURE_AT_LEAST(&rv, it->first + 1); + } + return rv; +} + +unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas, + u32 limit) { + assert(!dfas.empty()); + + Automaton_Haig_Merge n(dfas); + + DEBUG_PRINTF("merging %zu dfas\n", dfas.size()); + + bool unordered_som = false; + for (const auto &haig : dfas) { + assert(haig); + assert(haig->kind == dfas.front()->kind); + unordered_som |= haig->unordered_som_triggers; + if (haig->states.size() > limit) { + DEBUG_PRINTF("too many states!\n"); + return nullptr; + } + } + using StateSet = Automaton_Haig_Merge::StateSet; - vector<StateSet> nfa_state_map; - auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som, - NODE_START, - dfas[0]->stream_som_loc_width); - + vector<StateSet> nfa_state_map; + auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som, + NODE_START, + dfas[0]->stream_som_loc_width); + if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { DEBUG_PRINTF("state limit (%u) exceeded\n", limit); - return nullptr; /* over state limit */ - } - - rdfa->start_anchored = n.start_anchored; - rdfa->start_floating = n.start_floating; - rdfa->alpha_size = n.alphasize; - rdfa->alpha_remap = n.alpha; - - vector<u32> per_dfa_adj; - u32 curr_adj = 0; - for (const auto &haig : dfas) { - per_dfa_adj.push_back(curr_adj); - curr_adj += total_slots_used(*haig); - if (curr_adj < per_dfa_adj.back()) { - /* overflowed our som slot count */ - return nullptr; - } - } - - rdfa->state_som.reserve(rdfa->states.size()); - for (u32 i = 0; i < rdfa->states.size(); i++) { - rdfa->state_som.push_back(dstate_som()); - const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i]; - DEBUG_PRINTF("finishing state %u\n", i); - - haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states, - rdfa->state_som.back().preds); - - if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) { - DEBUG_PRINTF("som slot limit exceeded (%zu)\n", - rdfa->state_som.back().preds.size()); - return nullptr; - } - - haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, - false /* not eod */, - rdfa->state_som.back().reports); - haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, - true /* eod */, - rdfa->state_som.back().reports_eod); - } - - haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states); - - DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", - rdfa->start_anchored, rdfa->start_floating); - - return rdfa; -} - -} // namespace ue2 + return nullptr; /* over state limit */ + } + + rdfa->start_anchored = n.start_anchored; + rdfa->start_floating = n.start_floating; + rdfa->alpha_size = n.alphasize; + rdfa->alpha_remap = n.alpha; + + vector<u32> per_dfa_adj; + u32 curr_adj = 0; + for (const auto &haig : dfas) { + per_dfa_adj.push_back(curr_adj); + curr_adj += total_slots_used(*haig); + if (curr_adj < per_dfa_adj.back()) { + /* overflowed our som slot count */ + return nullptr; + } + } + + rdfa->state_som.reserve(rdfa->states.size()); + for (u32 i = 0; i < rdfa->states.size(); i++) { + rdfa->state_som.push_back(dstate_som()); + const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i]; + DEBUG_PRINTF("finishing state %u\n", i); + + haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states, + rdfa->state_som.back().preds); + + if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) { + DEBUG_PRINTF("som slot limit exceeded (%zu)\n", + rdfa->state_som.back().preds.size()); + return nullptr; + } + + haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, + false /* not eod */, + rdfa->state_som.back().reports); + haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, + true /* eod */, + rdfa->state_som.back().reports_eod); + } + + haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states); + + DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", + rdfa->start_anchored, rdfa->start_floating); + + return rdfa; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h index baff2f5866..136c2a7ddf 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h @@ -1,68 +1,68 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for Haig SOM DFA. - */ - -#ifndef NG_HAIG_H -#define NG_HAIG_H - -#include "ue2common.h" -#include "som/som.h" - -#include <memory> -#include <vector> - -namespace ue2 { - -class CharReach; -class NGHolder; -struct Grey; -struct raw_som_dfa; - -#define HAIG_FINAL_DFA_STATE_LIMIT 16383 -#define HAIG_HARD_DFA_STATE_LIMIT 8192 - -/* unordered_som_triggers being true indicates that a live haig may be subjected - * to later tops arriving with earlier soms (without the haig going dead in - * between) - */ - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for Haig SOM DFA. + */ + +#ifndef NG_HAIG_H +#define NG_HAIG_H + +#include "ue2common.h" +#include "som/som.h" + +#include <memory> +#include <vector> + +namespace ue2 { + +class CharReach; +class NGHolder; +struct Grey; +struct raw_som_dfa; + +#define HAIG_FINAL_DFA_STATE_LIMIT 16383 +#define HAIG_HARD_DFA_STATE_LIMIT 8192 + +/* unordered_som_triggers being true indicates that a live haig may be subjected + * to later tops arriving with earlier soms (without the haig going dead in + * between) + */ + std::unique_ptr<raw_som_dfa> attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, const std::vector<std::vector<CharReach>> &triggers, const Grey &grey, bool unordered_som_triggers = false); - -std::unique_ptr<raw_som_dfa> -attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas, - u32 limit = HAIG_HARD_DFA_STATE_LIMIT); - -} // namespace ue2 - -#endif + +std::unique_ptr<raw_som_dfa> +attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas, + u32 limit = HAIG_HARD_DFA_STATE_LIMIT); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp index a2fbb28863..cbe2aadc25 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp @@ -1,98 +1,98 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ng_holder.h" - -#include "ng_util.h" -#include "ue2common.h" - -using namespace std; - -namespace ue2 { - -// internal use only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ng_holder.h" + +#include "ng_util.h" +#include "ue2common.h" + +using namespace std; + +namespace ue2 { + +// internal use only static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) { NFAVertex v(add_vertex(g)); - g[v].index = id; - return v; -} - -NGHolder::NGHolder(nfa_kind k) + g[v].index = id; + return v; +} + +NGHolder::NGHolder(nfa_kind k) : kind (k), - // add initial special nodes + // add initial special nodes start(addSpecialVertex(*this, NODE_START)), startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)), accept(addSpecialVertex(*this, NODE_ACCEPT)), acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) { - - // wire up some fake edges for the stylized bits of the NFA - add_edge(start, startDs, *this); - add_edge(startDs, startDs, *this); - add_edge(accept, acceptEod, *this); - + + // wire up some fake edges for the stylized bits of the NFA + add_edge(start, startDs, *this); + add_edge(startDs, startDs, *this); + add_edge(accept, acceptEod, *this); + (*this)[start].char_reach.setall(); (*this)[startDs].char_reach.setall(); -} - -NGHolder::~NGHolder(void) { - DEBUG_PRINTF("destroying holder @ %p\n", this); -} - -void clear_graph(NGHolder &h) { +} + +NGHolder::~NGHolder(void) { + DEBUG_PRINTF("destroying holder @ %p\n", this); +} + +void clear_graph(NGHolder &h) { NGHolder::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(h); vi != ve;) { - NFAVertex v = *vi; - ++vi; - - clear_vertex(v, h); - if (!is_special(v, h)) { - remove_vertex(v, h); - } - } - - assert(num_vertices(h) == N_SPECIALS); + for (tie(vi, ve) = vertices(h); vi != ve;) { + NFAVertex v = *vi; + ++vi; + + clear_vertex(v, h); + if (!is_special(v, h)) { + remove_vertex(v, h); + } + } + + assert(num_vertices(h) == N_SPECIALS); renumber_vertices(h); /* ensure that we reset our next allocated index */ renumber_edges(h); - - // Recreate special stylised edges. - add_edge(h.start, h.startDs, h); - add_edge(h.startDs, h.startDs, h); - add_edge(h.accept, h.acceptEod, h); -} - -NFAVertex NGHolder::getSpecialVertex(u32 id) const { - switch (id) { + + // Recreate special stylised edges. + add_edge(h.start, h.startDs, h); + add_edge(h.startDs, h.startDs, h); + add_edge(h.accept, h.acceptEod, h); +} + +NFAVertex NGHolder::getSpecialVertex(u32 id) const { + switch (id) { case NODE_START: return start; case NODE_START_DOTSTAR: return startDs; case NODE_ACCEPT: return accept; case NODE_ACCEPT_EOD: return acceptEod; default: return null_vertex(); - } -} - -} + } +} + +} diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h index 36cf62447b..8edc534835 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h @@ -1,31 +1,31 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** \file * \brief Definition of the NGHolder type used for to represent general nfa * graphs as well as all associated types (vertex and edge properties, etc). @@ -34,17 +34,17 @@ * accepts. */ -#ifndef NG_HOLDER_H -#define NG_HOLDER_H - -#include "ue2common.h" -#include "nfa/nfa_kind.h" +#ifndef NG_HOLDER_H +#define NG_HOLDER_H + +#include "ue2common.h" +#include "nfa/nfa_kind.h" #include "util/charreach.h" #include "util/flat_containers.h" #include "util/ue2_graph.h" - -namespace ue2 { - + +namespace ue2 { + /** \brief Properties associated with each vertex in an NFAGraph. */ struct NFAGraphVertexProps { /** \brief Set of characters on which this vertex is reachable. */ @@ -95,139 +95,139 @@ enum SpecialNodes { N_SPECIALS }; -/** \brief Encapsulates an NFAGraph, stores special vertices and other - * metadata. - * - * When constructed, the graph will have the following stylised "special" - * edges: - * - * - (start, startDs) - * - (startDs, startDs) (self-loop) - * - (accept, acceptEod) - */ +/** \brief Encapsulates an NFAGraph, stores special vertices and other + * metadata. + * + * When constructed, the graph will have the following stylised "special" + * edges: + * + * - (start, startDs) + * - (startDs, startDs) (self-loop) + * - (accept, acceptEod) + */ class NGHolder : public ue2_graph<NGHolder, NFAGraphVertexProps, NFAGraphEdgeProps> { -public: - explicit NGHolder(nfa_kind kind); +public: + explicit NGHolder(nfa_kind kind); NGHolder(void) : NGHolder(NFA_OUTFIX) {}; - virtual ~NGHolder(void); - + virtual ~NGHolder(void); + nfa_kind kind; /* Role that this plays in Rose */ - + static const size_t N_SPECIAL_VERTICES = N_SPECIALS; public: const vertex_descriptor start; //!< Anchored start vertex. const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex. const vertex_descriptor accept; //!< Accept vertex. const vertex_descriptor acceptEod; //!< Accept at EOD vertex. - + vertex_descriptor getSpecialVertex(u32 id) const; }; - + typedef NGHolder::vertex_descriptor NFAVertex; typedef NGHolder::edge_descriptor NFAEdge; - -/** \brief True if the vertex \p v is one of our special vertices. */ -template <typename GraphT> + +/** \brief True if the vertex \p v is one of our special vertices. */ +template <typename GraphT> bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) { - return g[v].index < N_SPECIALS; -} - -/** - * \brief Clears all non-special vertices and edges from the graph. - * - * Note: not the same as the BGL's clear() function, which removes all vertices - * and edges. - */ -void clear_graph(NGHolder &h); - -/* - * \brief Clear and remove all of the vertices pointed to by the given iterator - * range. - * - * If renumber is false, no renumbering of vertex indices is done. - * - * Note: should not be called with iterators that will be invalidated by vertex - * removal (such as NFAGraph::vertex_iterator). - */ -template <class Iter> -void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) { - if (begin == end) { - return; - } - - for (Iter it = begin; it != end; ++it) { - NFAVertex v = *it; - if (!is_special(v, h)) { - clear_vertex(v, h); - remove_vertex(v, h); - } else { - assert(0); - } - } - - if (renumber) { + return g[v].index < N_SPECIALS; +} + +/** + * \brief Clears all non-special vertices and edges from the graph. + * + * Note: not the same as the BGL's clear() function, which removes all vertices + * and edges. + */ +void clear_graph(NGHolder &h); + +/* + * \brief Clear and remove all of the vertices pointed to by the given iterator + * range. + * + * If renumber is false, no renumbering of vertex indices is done. + * + * Note: should not be called with iterators that will be invalidated by vertex + * removal (such as NFAGraph::vertex_iterator). + */ +template <class Iter> +void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) { + if (begin == end) { + return; + } + + for (Iter it = begin; it != end; ++it) { + NFAVertex v = *it; + if (!is_special(v, h)) { + clear_vertex(v, h); + remove_vertex(v, h); + } else { + assert(0); + } + } + + if (renumber) { renumber_edges(h); renumber_vertices(h); - } -} - -/** \brief Clear and remove all of the vertices pointed to by the vertex - * descriptors in the given container. - * - * This is a convenience wrapper around the iterator variant above. - */ -template <class Container> -void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) { - remove_vertices(c.begin(), c.end(), h, renumber); -} - -/* - * \brief Clear and remove all of the edges pointed to by the given iterator - * range. - * - * If renumber is false, no renumbering of vertex indices is done. - * - * Note: should not be called with iterators that will be invalidated by vertex - * removal (such as NFAGraph::edge_iterator). - */ -template <class Iter> -void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { - if (begin == end) { - return; - } - - for (Iter it = begin; it != end; ++it) { - const NFAEdge &e = *it; - remove_edge(e, h); - } - - if (renumber) { + } +} + +/** \brief Clear and remove all of the vertices pointed to by the vertex + * descriptors in the given container. + * + * This is a convenience wrapper around the iterator variant above. + */ +template <class Container> +void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) { + remove_vertices(c.begin(), c.end(), h, renumber); +} + +/* + * \brief Clear and remove all of the edges pointed to by the given iterator + * range. + * + * If renumber is false, no renumbering of vertex indices is done. + * + * Note: should not be called with iterators that will be invalidated by vertex + * removal (such as NFAGraph::edge_iterator). + */ +template <class Iter> +void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { + if (begin == end) { + return; + } + + for (Iter it = begin; it != end; ++it) { + const NFAEdge &e = *it; + remove_edge(e, h); + } + + if (renumber) { renumber_edges(h); - } -} - + } +} + #define DEFAULT_TOP 0U -/** \brief Clear and remove all of the edges pointed to by the edge descriptors - * in the given container. - * - * This is a convenience wrapper around the iterator variant above. - */ -template <class Container> -void remove_edges(const Container &c, NGHolder &h, bool renumber = true) { - remove_edges(c.begin(), c.end(), h, renumber); -} - +/** \brief Clear and remove all of the edges pointed to by the edge descriptors + * in the given container. + * + * This is a convenience wrapper around the iterator variant above. + */ +template <class Container> +void remove_edges(const Container &c, NGHolder &h, bool renumber = true) { + remove_edges(c.begin(), c.end(), h, renumber); +} + inline -bool is_triggered(const NGHolder &g) { - return is_triggered(g.kind); -} - +bool is_triggered(const NGHolder &g) { + return is_triggered(g.kind); +} + inline -bool generates_callbacks(const NGHolder &g) { - return generates_callbacks(g.kind); -} +bool generates_callbacks(const NGHolder &g) { + return generates_callbacks(g.kind); +} inline bool has_managed_reports(const NGHolder &g) { @@ -239,6 +239,6 @@ bool inspects_states_for_accepts(const NGHolder &g) { return inspects_states_for_accepts(g.kind); } -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp index 35a09d0ea2..3e013ad5cc 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp @@ -1,78 +1,78 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Loose equality testing for NGHolder graphs. - * - * Loose equality check for holders' graph structure and vertex_index, - * vertex_char_reach and (optionally reports). - */ -#include "ng_is_equal.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Loose equality testing for NGHolder graphs. + * + * Loose equality check for holders' graph structure and vertex_index, + * vertex_char_reach and (optionally reports). + */ +#include "ng_is_equal.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" -#include "util/make_unique.h" - -using namespace std; - -namespace ue2 { - -namespace { -struct check_report { - virtual ~check_report() {} - virtual bool operator()(const flat_set<ReportID> &reports_a, - const flat_set<ReportID> &reports_b) const = 0; -}; - -struct full_check_report : public check_report { - bool operator()(const flat_set<ReportID> &reports_a, - const flat_set<ReportID> &reports_b) const override { - return reports_a == reports_b; - } -}; - -struct equiv_check_report : public check_report { - equiv_check_report(ReportID a_in, ReportID b_in) - : a_rep(a_in), b_rep(b_in) {} - - bool operator()(const flat_set<ReportID> &reports_a, - const flat_set<ReportID> &reports_b) const override { - return contains(reports_a, a_rep) == contains(reports_b, b_rep); - } -private: - ReportID a_rep; - ReportID b_rep; -}; +#include "util/graph_range.h" +#include "util/make_unique.h" + +using namespace std; + +namespace ue2 { + +namespace { +struct check_report { + virtual ~check_report() {} + virtual bool operator()(const flat_set<ReportID> &reports_a, + const flat_set<ReportID> &reports_b) const = 0; +}; + +struct full_check_report : public check_report { + bool operator()(const flat_set<ReportID> &reports_a, + const flat_set<ReportID> &reports_b) const override { + return reports_a == reports_b; + } +}; + +struct equiv_check_report : public check_report { + equiv_check_report(ReportID a_in, ReportID b_in) + : a_rep(a_in), b_rep(b_in) {} + + bool operator()(const flat_set<ReportID> &reports_a, + const flat_set<ReportID> &reports_b) const override { + return contains(reports_a, a_rep) == contains(reports_b, b_rep); + } +private: + ReportID a_rep; + ReportID b_rep; +}; /** Comparison functor used to sort by vertex_index. */ template<typename Graph> @@ -91,141 +91,141 @@ template<typename Graph> static VertexIndexOrdering<Graph> make_index_ordering(const Graph &g) { return VertexIndexOrdering<Graph>(g); +} + } -} - -static -bool is_equal_i(const NGHolder &a, const NGHolder &b, - const check_report &check_rep) { - assert(hasCorrectlyNumberedVertices(a)); - assert(hasCorrectlyNumberedVertices(b)); - - size_t num_verts = num_vertices(a); - if (num_verts != num_vertices(b)) { - return false; - } - - vector<NFAVertex> vert_a; - vector<NFAVertex> vert_b; - vector<NFAVertex> adj_a; - vector<NFAVertex> adj_b; - - vert_a.reserve(num_verts); - vert_b.reserve(num_verts); - adj_a.reserve(num_verts); - adj_b.reserve(num_verts); - - insert(&vert_a, vert_a.end(), vertices(a)); - insert(&vert_b, vert_b.end(), vertices(b)); - - sort(vert_a.begin(), vert_a.end(), make_index_ordering(a)); - sort(vert_b.begin(), vert_b.end(), make_index_ordering(b)); - - for (size_t i = 0; i < vert_a.size(); i++) { - NFAVertex va = vert_a[i]; - NFAVertex vb = vert_b[i]; +static +bool is_equal_i(const NGHolder &a, const NGHolder &b, + const check_report &check_rep) { + assert(hasCorrectlyNumberedVertices(a)); + assert(hasCorrectlyNumberedVertices(b)); + + size_t num_verts = num_vertices(a); + if (num_verts != num_vertices(b)) { + return false; + } + + vector<NFAVertex> vert_a; + vector<NFAVertex> vert_b; + vector<NFAVertex> adj_a; + vector<NFAVertex> adj_b; + + vert_a.reserve(num_verts); + vert_b.reserve(num_verts); + adj_a.reserve(num_verts); + adj_b.reserve(num_verts); + + insert(&vert_a, vert_a.end(), vertices(a)); + insert(&vert_b, vert_b.end(), vertices(b)); + + sort(vert_a.begin(), vert_a.end(), make_index_ordering(a)); + sort(vert_b.begin(), vert_b.end(), make_index_ordering(b)); + + for (size_t i = 0; i < vert_a.size(); i++) { + NFAVertex va = vert_a[i]; + NFAVertex vb = vert_b[i]; DEBUG_PRINTF("vertex %zu\n", a[va].index); - - // Vertex index must be the same. - if (a[va].index != b[vb].index) { - DEBUG_PRINTF("bad index\n"); - return false; - } - - // Reach must be the same. - if (a[va].char_reach != b[vb].char_reach) { - DEBUG_PRINTF("bad reach\n"); - return false; - } - - if (!check_rep(a[va].reports, b[vb].reports)) { - DEBUG_PRINTF("bad reports\n"); - return false; - } - - // Other vertex properties may vary. - - /* Check successors */ - adj_a.clear(); - adj_b.clear(); - insert(&adj_a, adj_a.end(), adjacent_vertices(va, a)); - insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b)); - - if (adj_a.size() != adj_b.size()) { - DEBUG_PRINTF("bad adj\n"); - return false; - } - - sort(adj_a.begin(), adj_a.end(), make_index_ordering(a)); - sort(adj_b.begin(), adj_b.end(), make_index_ordering(b)); - - for (size_t j = 0; j < adj_a.size(); j++) { - if (a[adj_a[j]].index != b[adj_b[j]].index) { - DEBUG_PRINTF("bad adj\n"); - return false; - } - } - } - - /* check top for edges out of start */ + + // Vertex index must be the same. + if (a[va].index != b[vb].index) { + DEBUG_PRINTF("bad index\n"); + return false; + } + + // Reach must be the same. + if (a[va].char_reach != b[vb].char_reach) { + DEBUG_PRINTF("bad reach\n"); + return false; + } + + if (!check_rep(a[va].reports, b[vb].reports)) { + DEBUG_PRINTF("bad reports\n"); + return false; + } + + // Other vertex properties may vary. + + /* Check successors */ + adj_a.clear(); + adj_b.clear(); + insert(&adj_a, adj_a.end(), adjacent_vertices(va, a)); + insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b)); + + if (adj_a.size() != adj_b.size()) { + DEBUG_PRINTF("bad adj\n"); + return false; + } + + sort(adj_a.begin(), adj_a.end(), make_index_ordering(a)); + sort(adj_b.begin(), adj_b.end(), make_index_ordering(b)); + + for (size_t j = 0; j < adj_a.size(); j++) { + if (a[adj_a[j]].index != b[adj_b[j]].index) { + DEBUG_PRINTF("bad adj\n"); + return false; + } + } + } + + /* check top for edges out of start */ vector<pair<u32, flat_set<u32>>> top_a; vector<pair<u32, flat_set<u32>>> top_b; - - for (const auto &e : out_edges_range(a.start, a)) { + + for (const auto &e : out_edges_range(a.start, a)) { top_a.emplace_back(a[target(e, a)].index, a[e].tops); - } - for (const auto &e : out_edges_range(b.start, b)) { + } + for (const auto &e : out_edges_range(b.start, b)) { top_b.emplace_back(b[target(e, b)].index, b[e].tops); - } - - sort(top_a.begin(), top_a.end()); - sort(top_b.begin(), top_b.end()); - - if (top_a != top_b) { - DEBUG_PRINTF("bad top\n"); - return false; - } - - DEBUG_PRINTF("good\n"); - return true; -} - -/** \brief loose hash of an NGHolder; equal if is_equal would return true. */ -u64a hash_holder(const NGHolder &g) { - size_t rv = 0; - - for (auto v : vertices_range(g)) { + } + + sort(top_a.begin(), top_a.end()); + sort(top_b.begin(), top_b.end()); + + if (top_a != top_b) { + DEBUG_PRINTF("bad top\n"); + return false; + } + + DEBUG_PRINTF("good\n"); + return true; +} + +/** \brief loose hash of an NGHolder; equal if is_equal would return true. */ +u64a hash_holder(const NGHolder &g) { + size_t rv = 0; + + for (auto v : vertices_range(g)) { hash_combine(rv, g[v].index); hash_combine(rv, g[v].char_reach); - - for (auto w : adjacent_vertices_range(v, g)) { + + for (auto w : adjacent_vertices_range(v, g)) { hash_combine(rv, g[w].index); - } - } - - return rv; -} - -bool is_equal(const NGHolder &a, const NGHolder &b) { - DEBUG_PRINTF("testing %p %p\n", &a, &b); - - if (&a == &b) { - return true; - } - - return is_equal_i(a, b, full_check_report()); -} - -bool is_equal(const NGHolder &a, ReportID a_rep, - const NGHolder &b, ReportID b_rep) { - DEBUG_PRINTF("testing %p %p\n", &a, &b); - - if (&a == &b && a_rep == b_rep) { - return true; - } - - return is_equal_i(a, b, equiv_check_report(a_rep, b_rep)); -} - -} // namespace ue2 + } + } + + return rv; +} + +bool is_equal(const NGHolder &a, const NGHolder &b) { + DEBUG_PRINTF("testing %p %p\n", &a, &b); + + if (&a == &b) { + return true; + } + + return is_equal_i(a, b, full_check_report()); +} + +bool is_equal(const NGHolder &a, ReportID a_rep, + const NGHolder &b, ReportID b_rep) { + DEBUG_PRINTF("testing %p %p\n", &a, &b); + + if (&a == &b && a_rep == b_rep) { + return true; + } + + return is_equal_i(a, b, equiv_check_report(a_rep, b_rep)); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h index d8046270ff..baddc494b0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h @@ -1,54 +1,54 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Loose equality testing for NGHolder graphs. - * - * Loose equality check for holders' graph structure and vertex_index, - * vertex_char_reach and (optionally reports). - */ - -#ifndef NG_IS_EQUAL_H -#define NG_IS_EQUAL_H - -#include "ue2common.h" - -#include <memory> - -namespace ue2 { - -class NGHolder; - -bool is_equal(const NGHolder &a, const NGHolder &b); -bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r); - -u64a hash_holder(const NGHolder &g); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Loose equality testing for NGHolder graphs. + * + * Loose equality check for holders' graph structure and vertex_index, + * vertex_char_reach and (optionally reports). + */ + +#ifndef NG_IS_EQUAL_H +#define NG_IS_EQUAL_H + +#include "ue2common.h" + +#include <memory> + +namespace ue2 { + +class NGHolder; + +bool is_equal(const NGHolder &a, const NGHolder &b); +bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r); + +u64a hash_holder(const NGHolder &g); + // Util Functors struct NGHolderHasher { size_t operator()(const std::shared_ptr<const NGHolder> &h) const { @@ -59,13 +59,13 @@ struct NGHolderHasher { } }; -struct NGHolderEqual { - bool operator()(const std::shared_ptr<const NGHolder> &a, - const std::shared_ptr<const NGHolder> &b) const { - return is_equal(*a, *b); - } -}; - -} // namespace ue2 - -#endif // NG_IS_EQUAL_H +struct NGHolderEqual { + bool operator()(const std::shared_ptr<const NGHolder> &a, + const std::shared_ptr<const NGHolder> &b) const { + return is_equal(*a, *b); + } +}; + +} // namespace ue2 + +#endif // NG_IS_EQUAL_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp index d8ba503ce6..e6526a2414 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp @@ -1,349 +1,349 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Large Bounded Repeat (LBR) engine build code. - */ - -#include "ng_lbr.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_repeat.h" -#include "ng_reports.h" + * \brief Large Bounded Repeat (LBR) engine build code. + */ + +#include "ng_lbr.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_repeat.h" +#include "ng_reports.h" #include "nfa/castlecompile.h" -#include "nfa/lbr_internal.h" -#include "nfa/nfa_internal.h" -#include "nfa/repeatcompile.h" +#include "nfa/lbr_internal.h" +#include "nfa/nfa_internal.h" +#include "nfa/repeatcompile.h" #include "nfa/shufticompile.h" #include "nfa/trufflecompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" // for lg2 -#include "util/compile_context.h" -#include "util/container.h" -#include "util/depth.h" -#include "util/dump_charclass.h" +#include "util/alloc.h" +#include "util/bitutils.h" // for lg2 +#include "util/compile_context.h" +#include "util/container.h" +#include "util/depth.h" +#include "util/dump_charclass.h" #include "util/report_manager.h" -#include "util/verify_types.h" - -using namespace std; - -namespace ue2 { - -static -u32 depth_to_u32(const depth &d) { - assert(d.is_reachable()); - if (d.is_infinite()) { - return REPEAT_INF; - } - - u32 d_val = d; - assert(d_val < REPEAT_INF); - return d_val; -} - -template<class LbrStruct> static -u64a* getTable(NFA *nfa) { - char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) + - sizeof(RepeatInfo); - ptr = ROUNDUP_PTR(ptr, alignof(u64a)); - return (u64a *)ptr; -} - -template <class LbrStruct> static -void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) { - assert(nfa); - - RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod); - - DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n", - repeatTypeName(rtype), repeatMin.str().c_str(), - repeatMax.str().c_str()); - - // Fill the lbr_common structure first. Note that the RepeatInfo structure - // directly follows the LbrStruct. - const u32 info_offset = sizeof(LbrStruct); - c->repeatInfoOffset = info_offset; - c->report = report; - - RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset); - info->type = verify_u8(rtype); - info->repeatMin = depth_to_u32(repeatMin); - info->repeatMax = depth_to_u32(repeatMax); - info->stateSize = rsi.stateSize; - info->packedCtrlSize = rsi.packedCtrlSize; - info->horizon = rsi.horizon; - info->minPeriod = minPeriod; - copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); - info->patchCount = rsi.patchCount; - info->patchSize = rsi.patchSize; - info->encodingSize = rsi.encodingSize; - info->patchesOffset = rsi.patchesOffset; - - // Fill the NFA structure. - nfa->nPositions = repeatMin; - nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); - nfa->scratchStateSize = (u32)sizeof(lbr_state); - nfa->minWidth = verify_u32(repeatMin); - nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0; - - // Fill the lbr table for sparse lbr model. - if (rtype == REPEAT_SPARSE_OPTIMAL_P) { - u64a *table = getTable<LbrStruct>(nfa); - // Adjust table length according to the optimal patch length. - size_t len = nfa->length; - assert((u32)repeatMax >= rsi.patchSize); - len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize); - nfa->length = verify_u32(len); - info->length = verify_u32(sizeof(RepeatInfo) - + sizeof(u64a) * (rsi.patchSize + 1)); - copy_bytes(table, rsi.table); - } -} - -template <class LbrStruct> static +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static +u32 depth_to_u32(const depth &d) { + assert(d.is_reachable()); + if (d.is_infinite()) { + return REPEAT_INF; + } + + u32 d_val = d; + assert(d_val < REPEAT_INF); + return d_val; +} + +template<class LbrStruct> static +u64a* getTable(NFA *nfa) { + char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) + + sizeof(RepeatInfo); + ptr = ROUNDUP_PTR(ptr, alignof(u64a)); + return (u64a *)ptr; +} + +template <class LbrStruct> static +void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) { + assert(nfa); + + RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod); + + DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n", + repeatTypeName(rtype), repeatMin.str().c_str(), + repeatMax.str().c_str()); + + // Fill the lbr_common structure first. Note that the RepeatInfo structure + // directly follows the LbrStruct. + const u32 info_offset = sizeof(LbrStruct); + c->repeatInfoOffset = info_offset; + c->report = report; + + RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset); + info->type = verify_u8(rtype); + info->repeatMin = depth_to_u32(repeatMin); + info->repeatMax = depth_to_u32(repeatMax); + info->stateSize = rsi.stateSize; + info->packedCtrlSize = rsi.packedCtrlSize; + info->horizon = rsi.horizon; + info->minPeriod = minPeriod; + copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); + info->patchCount = rsi.patchCount; + info->patchSize = rsi.patchSize; + info->encodingSize = rsi.encodingSize; + info->patchesOffset = rsi.patchesOffset; + + // Fill the NFA structure. + nfa->nPositions = repeatMin; + nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); + nfa->scratchStateSize = (u32)sizeof(lbr_state); + nfa->minWidth = verify_u32(repeatMin); + nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0; + + // Fill the lbr table for sparse lbr model. + if (rtype == REPEAT_SPARSE_OPTIMAL_P) { + u64a *table = getTable<LbrStruct>(nfa); + // Adjust table length according to the optimal patch length. + size_t len = nfa->length; + assert((u32)repeatMax >= rsi.patchSize); + len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize); + nfa->length = verify_u32(len); + info->length = verify_u32(sizeof(RepeatInfo) + + sizeof(u64a) * (rsi.patchSize + 1)); + copy_bytes(table, rsi.table); + } +} + +template <class LbrStruct> static bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, const depth &repeatMax) { - size_t tableLen = 0; - if (rtype == REPEAT_SPARSE_OPTIMAL_P) { - tableLen = sizeof(u64a) * (repeatMax + 1); - } - size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + - tableLen + sizeof(u64a); + size_t tableLen = 0; + if (rtype == REPEAT_SPARSE_OPTIMAL_P) { + tableLen = sizeof(u64a) * (repeatMax + 1); + } + size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + + tableLen + sizeof(u64a); auto nfa = make_zeroed_bytecode_ptr<NFA>(len); - nfa->type = verify_u8(nfa_type); - nfa->length = verify_u32(len); - return nfa; -} - -static + nfa->type = verify_u8(nfa_type); + nfa->length = verify_u32(len); + return nfa; +} + +static bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - if (!cr.all()) { - return nullptr; - } - - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); + if (!cr.all()) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax); - struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); - - fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax, - minPeriod, rtype); - - DEBUG_PRINTF("built dot lbr\n"); - return nfa; -} - -static + struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built dot lbr\n"); + return nfa; +} + +static bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - const CharReach escapes(~cr); - - if (escapes.count() != 1) { - return nullptr; - } - - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); + const CharReach escapes(~cr); + + if (escapes.count() != 1) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax); - struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); - lv->c = escapes.find_first(); - - fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, - minPeriod, rtype); - - DEBUG_PRINTF("built verm lbr\n"); - return nfa; -} - -static + struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); + lv->c = escapes.find_first(); + + fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built verm lbr\n"); + return nfa; +} + +static bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - const CharReach escapes(cr); - - if (escapes.count() != 1) { - return nullptr; - } - - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); + const CharReach escapes(cr); + + if (escapes.count() != 1) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax); - struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); - lv->c = escapes.find_first(); - - fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, - minPeriod, rtype); - - DEBUG_PRINTF("built negated verm lbr\n"); - return nfa; -} - -static + struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); + lv->c = escapes.find_first(); + + fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built negated verm lbr\n"); + return nfa; +} + +static bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax); - struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); - - fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax, - minPeriod, rtype); - + struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax, + minPeriod, rtype); + if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { - return nullptr; - } - - DEBUG_PRINTF("built shuf lbr\n"); - return nfa; -} - -static + return nullptr; + } + + DEBUG_PRINTF("built shuf lbr\n"); + return nfa; +} + +static bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, - is_reset); + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax); - struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); - - fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax, - minPeriod, rtype); - + struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax, + minPeriod, rtype); + truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); - - DEBUG_PRINTF("built truffle lbr\n"); - return nfa; -} - -static + + DEBUG_PRINTF("built truffle lbr\n"); + return nfa; +} + +static bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { - DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", - repeatMin.str().c_str(), repeatMax.str().c_str(), - describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), - report); - assert(repeatMin <= repeatMax); - assert(repeatMax.is_reachable()); - + DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", + repeatMin.str().c_str(), repeatMax.str().c_str(), + describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), + report); + assert(repeatMin <= repeatMax); + assert(repeatMax.is_reachable()); + auto nfa = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); - - if (!nfa) { - nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, - report); - } - if (!nfa) { - nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, - report); - } - if (!nfa) { - nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, - report); - } - if (!nfa) { - nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset, - report); - } - - if (!nfa) { - assert(0); - return nullptr; - } - - return nfa; -} - + + if (!nfa) { + nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + + if (!nfa) { + assert(0); + return nullptr; + } + + return nfa; +} + bytecode_ptr<NFA> constructLBR(const CastleProto &proto, const vector<vector<CharReach>> &triggers, const CompileContext &cc, const ReportManager &rm) { - if (!cc.grey.allowLbr) { - return nullptr; - } - + if (!cc.grey.allowLbr) { + return nullptr; + } + if (proto.repeats.size() != 1) { return nullptr; } const PureRepeat &repeat = proto.repeats.begin()->second; - assert(!repeat.reach.none()); - - if (repeat.reports.size() != 1) { - DEBUG_PRINTF("too many reports\n"); - return nullptr; - } - - bool is_reset; - u32 min_period = minPeriod(triggers, repeat.reach, &is_reset); - - if (depth(min_period) > repeat.bounds.max) { - DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n"); - is_reset = true; - } - - ReportID report = *repeat.reports.begin(); + assert(!repeat.reach.none()); + + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return nullptr; + } + + bool is_reset; + u32 min_period = minPeriod(triggers, repeat.reach, &is_reset); + + if (depth(min_period) > repeat.bounds.max) { + DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n"); + is_reset = true; + } + + ReportID report = *repeat.reports.begin(); if (has_managed_reports(proto.kind)) { report = rm.getProgramOffset(report); } - - DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); - return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, - min_period, is_reset, report); -} - -/** \brief Construct an LBR engine from the given graph \p g. */ + + DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); + return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, + min_period, is_reset, report); +} + +/** \brief Construct an LBR engine from the given graph \p g. */ bytecode_ptr<NFA> constructLBR(const NGHolder &g, const vector<vector<CharReach>> &triggers, const CompileContext &cc, const ReportManager &rm) { - if (!cc.grey.allowLbr) { - return nullptr; - } - - PureRepeat repeat; - if (!isPureRepeat(g, repeat)) { + if (!cc.grey.allowLbr) { + return nullptr; + } + + PureRepeat repeat; + if (!isPureRepeat(g, repeat)) { + return nullptr; + } + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); return nullptr; - } - if (repeat.reports.size() != 1) { - DEBUG_PRINTF("too many reports\n"); - return nullptr; - } - + } + CastleProto proto(g.kind, repeat); return constructLBR(proto, triggers, cc, rm); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h index c181dbb9e7..55a77fcd1e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h @@ -1,71 +1,71 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Large Bounded Repeat (LBR) engine build code. - */ - -#ifndef NG_LBR_H -#define NG_LBR_H - -#include "ue2common.h" + * \brief Large Bounded Repeat (LBR) engine build code. + */ + +#ifndef NG_LBR_H +#define NG_LBR_H + +#include "ue2common.h" #include "util/bytecode_ptr.h" - -#include <memory> -#include <vector> - -struct NFA; - -namespace ue2 { - -class CharReach; -class NGHolder; -class ReportManager; + +#include <memory> +#include <vector> + +struct NFA; + +namespace ue2 { + +class CharReach; +class NGHolder; +class ReportManager; struct CastleProto; -struct CompileContext; -struct Grey; - -/** \brief Construct an LBR engine from the given graph \p g. */ +struct CompileContext; +struct Grey; + +/** \brief Construct an LBR engine from the given graph \p g. */ bytecode_ptr<NFA> -constructLBR(const NGHolder &g, - const std::vector<std::vector<CharReach>> &triggers, +constructLBR(const NGHolder &g, + const std::vector<std::vector<CharReach>> &triggers, const CompileContext &cc, const ReportManager &rm); - + /** * \brief Construct an LBR engine from the given CastleProto, which should * contain only one repeat. */ bytecode_ptr<NFA> constructLBR(const CastleProto &proto, - const std::vector<std::vector<CharReach>> &triggers, + const std::vector<std::vector<CharReach>> &triggers, const CompileContext &cc, const ReportManager &rm); - -} // namespace ue2 - -#endif // NG_LBR_H + +} // namespace ue2 + +#endif // NG_LBR_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp index 2f0a55eab9..0f939f122f 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp @@ -1,198 +1,198 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Limex NFA construction code. - */ - -#include "ng_limex.h" - -#include "grey.h" -#include "ng_equivalence.h" -#include "ng_holder.h" -#include "ng_misc_opt.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_repeat.h" -#include "ng_reports.h" -#include "ng_restructuring.h" -#include "ng_squash.h" -#include "ng_util.h" -#include "ng_width.h" -#include "ue2common.h" -#include "nfa/limex_compile.h" -#include "nfa/limex_limits.h" -#include "nfa/nfa_internal.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph_range.h" + * \brief Limex NFA construction code. + */ + +#include "ng_limex.h" + +#include "grey.h" +#include "ng_equivalence.h" +#include "ng_holder.h" +#include "ng_misc_opt.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_repeat.h" +#include "ng_reports.h" +#include "ng_restructuring.h" +#include "ng_squash.h" +#include "ng_util.h" +#include "ng_width.h" +#include "ue2common.h" +#include "nfa/limex_compile.h" +#include "nfa/limex_limits.h" +#include "nfa/nfa_internal.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph_range.h" #include "util/report_manager.h" #include "util/flat_containers.h" -#include "util/verify_types.h" - +#include "util/verify_types.h" + #include <algorithm> -#include <map> +#include <map> #include <unordered_map> #include <unordered_set> -#include <vector> - +#include <vector> + #include <boost/range/adaptor/map.hpp> -using namespace std; +using namespace std; using boost::adaptors::map_values; using boost::adaptors::map_keys; - -namespace ue2 { - -#ifndef NDEBUG -// Some sanity checking for the graph; returns false if something is wrong. -// Only used in assertions. -static -bool sanityCheckGraph(const NGHolder &g, + +namespace ue2 { + +#ifndef NDEBUG +// Some sanity checking for the graph; returns false if something is wrong. +// Only used in assertions. +static +bool sanityCheckGraph(const NGHolder &g, const unordered_map<NFAVertex, u32> &state_ids) { unordered_set<u32> seen_states; - - for (auto v : vertices_range(g)) { - // Non-specials should have non-empty reachability. - if (!is_special(v, g)) { - if (g[v].char_reach.none()) { + + for (auto v : vertices_range(g)) { + // Non-specials should have non-empty reachability. + if (!is_special(v, g)) { + if (g[v].char_reach.none()) { DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index); - return false; - } - } - + return false; + } + } + // Vertices with edges to accept or acceptEod must have reports and // other vertices must not have them. - if (is_match_vertex(v, g) && v != g.accept) { - if (g[v].reports.empty()) { + if (is_match_vertex(v, g) && v != g.accept) { + if (g[v].reports.empty()) { DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index); - return false; - } + return false; + } } else if (!g[v].reports.empty()) { DEBUG_PRINTF("vertex %zu has reports but no accept edge\n", g[v].index); return false; - } - - // Participant vertices should have distinct state indices. - if (!contains(state_ids, v)) { + } + + // Participant vertices should have distinct state indices. + if (!contains(state_ids, v)) { DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index); - return false; - } - u32 s = state_ids.at(v); - if (s != NO_STATE && !seen_states.insert(s).second) { + return false; + } + u32 s = state_ids.at(v); + if (s != NO_STATE && !seen_states.insert(s).second) { DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s); - return false; - } - } - - return true; -} -#endif - -static + return false; + } + } + + return true; +} +#endif + +static unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g, const vector<BoundedRepeatData> &repeats) { auto squashMap = findSquashers(g); - filterSquashers(g, squashMap); - - /* We also filter out the cyclic states representing bounded repeats, as + filterSquashers(g, squashMap); + + /* We also filter out the cyclic states representing bounded repeats, as * they are not really cyclic -- they may turn off unexpectedly. */ - for (const auto &br : repeats) { + for (const auto &br : repeats) { if (br.repeatMax.is_finite()) { squashMap.erase(br.cyclic); } - } + } return squashMap; -} - -/** - * \brief Drop edges from start to vertices that also have an edge from - * startDs. - * - * Note that this also includes the (start, startDs) edge, which is not - * necessary for actual NFA implementation (and is actually something we don't - * want to affect state numbering, etc). - */ -static -void dropRedundantStartEdges(NGHolder &g) { - remove_out_edge_if(g.start, [&](const NFAEdge &e) { - return edge(g.startDs, target(e, g), g).second; - }, g); - - // Ensure that we always remove (start, startDs), even if startDs has had - // its self-loop removed as an optimization. - remove_edge(g.start, g.startDs, g); -} - -static +} + +/** + * \brief Drop edges from start to vertices that also have an edge from + * startDs. + * + * Note that this also includes the (start, startDs) edge, which is not + * necessary for actual NFA implementation (and is actually something we don't + * want to affect state numbering, etc). + */ +static +void dropRedundantStartEdges(NGHolder &g) { + remove_out_edge_if(g.start, [&](const NFAEdge &e) { + return edge(g.startDs, target(e, g), g).second; + }, g); + + // Ensure that we always remove (start, startDs), even if startDs has had + // its self-loop removed as an optimization. + remove_edge(g.start, g.startDs, g); +} + +static CharReach calcTopVertexReach(const flat_set<u32> &tops, const map<u32, CharReach> &top_reach) { CharReach top_cr; for (u32 t : tops) { - if (contains(top_reach, t)) { + if (contains(top_reach, t)) { top_cr |= top_reach.at(t); - } else { - top_cr = CharReach::dot(); + } else { + top_cr = CharReach::dot(); break; - } + } } return top_cr; } - + static NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops, const flat_set<NFAVertex> &succs, const map<u32, CharReach> &top_reach) { assert(!succs.empty()); assert(!tops.empty()); - + bool reporter = false; - + NFAVertex u = add_vertex(g[g.start], g); CharReach top_cr = calcTopVertexReach(tops, top_reach); g[u].char_reach = top_cr; - + for (auto v : succs) { if (v == g.accept || v == g.acceptEod) { reporter = true; } add_edge(u, v, g); } - + // Only retain reports (which we copied on add_vertex above) for new top // vertices connected to accepts. if (!reporter) { g[u].reports.clear(); } - + return u; } @@ -208,11 +208,11 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs, if (best == top_succs.end() || it->second.size() < best->second.size()) { best = it; - } - } + } + } assert(best != top_succs.end()); assert(!best->second.empty()); /* should already been pruned */ - + *picked_tops = { best->first }; *picked_succs = best->second; } else { @@ -224,16 +224,16 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs, || (it->second.size() == best->second.size() && it->second < best->second)) { best = it; - } - } + } + } assert(best != succ_tops.end()); assert(!best->second.empty()); /* should already been pruned */ *picked_succs = { best->first }; *picked_tops = best->second; - } + } } - + static void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs, const map<u32, flat_set<NFAVertex>> &top_succs, @@ -462,153 +462,153 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out, } assert(unhandled_top_succs.empty()); - // We are completely replacing the start vertex, so clear its reports. - clear_out_edges(g.start, g); - add_edge(g.start, g.startDs, g); - g[g.start].reports.clear(); -} - -static -set<NFAVertex> findZombies(const NGHolder &h, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + // We are completely replacing the start vertex, so clear its reports. + clear_out_edges(g.start, g); + add_edge(g.start, g.startDs, g); + g[g.start].reports.clear(); +} + +static +set<NFAVertex> findZombies(const NGHolder &h, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, const unordered_map<NFAVertex, u32> &state_ids, - const CompileContext &cc) { - set<NFAVertex> zombies; - if (!cc.grey.allowZombies) { - return zombies; - } - - // We only use zombie masks in streaming mode. - if (!cc.streaming) { - return zombies; - } - - if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) { + const CompileContext &cc) { + set<NFAVertex> zombies; + if (!cc.grey.allowZombies) { + return zombies; + } + + // We only use zombie masks in streaming mode. + if (!cc.streaming) { + return zombies; + } + + if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) { DEBUG_PRINTF("cannot be made undead - bad reports\n"); - return zombies; - } - - for (auto u : inv_adjacent_vertices_range(h.accept, h)) { - assert(h[u].reports.size() == 1); - for (auto v : adjacent_vertices_range(u, h)) { - if (edge(v, h.accept, h).second - && h[v].char_reach.all()) { - if (!contains(br_cyclic, v)) { - goto ok; - } - - const BoundedRepeatSummary &sum = br_cyclic.at(v); - - if (u == v && sum.repeatMax.is_infinite()) { - goto ok; - } - - } - } - DEBUG_PRINTF("does not go to dot accept\n"); - return zombies; - ok:; - } - - for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) { - if (state_ids.at(v) != NO_STATE) { - zombies.insert(v); - } - } - return zombies; -} - -static + return zombies; + } + + for (auto u : inv_adjacent_vertices_range(h.accept, h)) { + assert(h[u].reports.size() == 1); + for (auto v : adjacent_vertices_range(u, h)) { + if (edge(v, h.accept, h).second + && h[v].char_reach.all()) { + if (!contains(br_cyclic, v)) { + goto ok; + } + + const BoundedRepeatSummary &sum = br_cyclic.at(v); + + if (u == v && sum.repeatMax.is_infinite()) { + goto ok; + } + + } + } + DEBUG_PRINTF("does not go to dot accept\n"); + return zombies; + ok:; + } + + for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) { + if (state_ids.at(v) != NO_STATE) { + zombies.insert(v); + } + } + return zombies; +} + +static void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) { - vector<NFAVertex> ordering; - for (auto &e : state_ids) { - if (e.second == NO_STATE) { - continue; - } - ordering.push_back(e.first); - } - - // Sort in reverse order by state ID. - sort(ordering.begin(), ordering.end(), - [&state_ids](NFAVertex a, NFAVertex b) { - return state_ids.at(a) > state_ids.at(b); - }); - - u32 stateNum = 0; - - for (const auto &v : ordering) { - DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum); - state_ids[v] = stateNum++; - } -} - -static -map<u32, CharReach> -findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) { - map<u32, CharReach> top_reach; - - for (const auto &m : triggers) { - const auto top = m.first; - CharReach cr; - for (const auto &trigger : m.second) { - if (trigger.empty()) { - // We don't know anything about this trigger. Assume it can - // have any reach. - cr.setall(); - break; - } - cr |= *trigger.rbegin(); - } - - top_reach.emplace(top, cr); - } - - return top_reach; -} - -static -unique_ptr<NGHolder> -prepareGraph(const NGHolder &h_in, const ReportManager *rm, - const map<u32, u32> &fixed_depth_tops, - const map<u32, vector<vector<CharReach>>> &triggers, - bool impl_test_only, const CompileContext &cc, + vector<NFAVertex> ordering; + for (auto &e : state_ids) { + if (e.second == NO_STATE) { + continue; + } + ordering.push_back(e.first); + } + + // Sort in reverse order by state ID. + sort(ordering.begin(), ordering.end(), + [&state_ids](NFAVertex a, NFAVertex b) { + return state_ids.at(a) > state_ids.at(b); + }); + + u32 stateNum = 0; + + for (const auto &v : ordering) { + DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum); + state_ids[v] = stateNum++; + } +} + +static +map<u32, CharReach> +findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) { + map<u32, CharReach> top_reach; + + for (const auto &m : triggers) { + const auto top = m.first; + CharReach cr; + for (const auto &trigger : m.second) { + if (trigger.empty()) { + // We don't know anything about this trigger. Assume it can + // have any reach. + cr.setall(); + break; + } + cr |= *trigger.rbegin(); + } + + top_reach.emplace(top, cr); + } + + return top_reach; +} + +static +unique_ptr<NGHolder> +prepareGraph(const NGHolder &h_in, const ReportManager *rm, + const map<u32, u32> &fixed_depth_tops, + const map<u32, vector<vector<CharReach>>> &triggers, + bool impl_test_only, const CompileContext &cc, unordered_map<NFAVertex, u32> &state_ids, vector<BoundedRepeatData> &repeats, map<u32, set<NFAVertex>> &tops) { - assert(is_triggered(h_in) || fixed_depth_tops.empty()); - - unique_ptr<NGHolder> h = cloneHolder(h_in); - - // Bounded repeat handling. - analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming, - impl_test_only, cc.grey); - - // If we're building a rose/suffix, do the top dance. + assert(is_triggered(h_in) || fixed_depth_tops.empty()); + + unique_ptr<NGHolder> h = cloneHolder(h_in); + + // Bounded repeat handling. + analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming, + impl_test_only, cc.grey); + + // If we're building a rose/suffix, do the top dance. flat_set<NFAVertex> topVerts; - if (is_triggered(*h)) { - makeTopStates(*h, tops, findTopReach(triggers)); + if (is_triggered(*h)) { + makeTopStates(*h, tops, findTopReach(triggers)); for (const auto &vv : tops | map_values) { insert(&topVerts, vv); } - } - - dropRedundantStartEdges(*h); - - // Do state numbering + } + + dropRedundantStartEdges(*h); + + // Do state numbering state_ids = numberStates(*h, topVerts); - - // In debugging, we sometimes like to reverse the state numbering to stress - // the NFA construction code. - if (cc.grey.numberNFAStatesWrong) { - reverseStateOrdering(state_ids); - } - - assert(sanityCheckGraph(*h, state_ids)); - return h; -} - -static + + // In debugging, we sometimes like to reverse the state numbering to stress + // the NFA construction code. + if (cc.grey.numberNFAStatesWrong) { + reverseStateOrdering(state_ids); + } + + assert(sanityCheckGraph(*h, state_ids)); + return h; +} + +static void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { for (const auto &v : vertices_range(h)) { auto &reports = h[v].reports; @@ -629,234 +629,234 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { static bytecode_ptr<NFA> -constructNFA(const NGHolder &h_in, const ReportManager *rm, - const map<u32, u32> &fixed_depth_tops, - const map<u32, vector<vector<CharReach>>> &triggers, +constructNFA(const NGHolder &h_in, const ReportManager *rm, + const map<u32, u32> &fixed_depth_tops, + const map<u32, vector<vector<CharReach>>> &triggers, bool compress_state, bool do_accel, bool impl_test_only, bool &fast, u32 hint, const CompileContext &cc) { if (!has_managed_reports(h_in)) { - rm = nullptr; - } else { - assert(rm); - } - + rm = nullptr; + } else { + assert(rm); + } + unordered_map<NFAVertex, u32> state_ids; - vector<BoundedRepeatData> repeats; + vector<BoundedRepeatData> repeats; map<u32, set<NFAVertex>> tops; - unique_ptr<NGHolder> h - = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, - state_ids, repeats, tops); - - // Quick exit: if we've got an embarrassment of riches, i.e. more states - // than we can implement in our largest NFA model, bail here. + unique_ptr<NGHolder> h + = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, + state_ids, repeats, tops); + + // Quick exit: if we've got an embarrassment of riches, i.e. more states + // than we can implement in our largest NFA model, bail here. u32 numStates = countStates(state_ids); - if (numStates > NFA_MAX_STATES) { - DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); - return nullptr; - } - - map<NFAVertex, BoundedRepeatSummary> br_cyclic; - for (const auto &br : repeats) { - br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); - } - + if (numStates > NFA_MAX_STATES) { + DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); + return nullptr; + } + + map<NFAVertex, BoundedRepeatSummary> br_cyclic; + for (const auto &br : repeats) { + br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); + } + unordered_map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> squashMap; - - // build map of squashed and squashers - if (cc.grey.squashNFA) { + + // build map of squashed and squashers + if (cc.grey.squashNFA) { squashMap = findSquashStates(*h, repeats); - - if (rm && cc.grey.highlanderSquash) { - reportSquashMap = findHighlanderSquashers(*h, *rm); - } - } - - set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc); - + + if (rm && cc.grey.highlanderSquash) { + reportSquashMap = findHighlanderSquashers(*h, *rm); + } + } + + set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc); + if (has_managed_reports(*h)) { assert(rm); remapReportsToPrograms(*h, *rm); } - if (!cc.streaming || !cc.grey.compressNFAState) { - compress_state = false; - } - - return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops, + if (!cc.streaming || !cc.grey.compressNFAState) { + compress_state = false; + } + + return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, do_accel, compress_state, fast, hint, cc); -} - +} + bytecode_ptr<NFA> -constructNFA(const NGHolder &h_in, const ReportManager *rm, - const map<u32, u32> &fixed_depth_tops, - const map<u32, vector<vector<CharReach>>> &triggers, +constructNFA(const NGHolder &h_in, const ReportManager *rm, + const map<u32, u32> &fixed_depth_tops, + const map<u32, vector<vector<CharReach>>> &triggers, bool compress_state, bool &fast, const CompileContext &cc) { - const u32 hint = INVALID_NFA; - const bool do_accel = cc.grey.accelerateNFA; - const bool impl_test_only = false; - return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state, + const u32 hint = INVALID_NFA; + const bool do_accel = cc.grey.accelerateNFA; + const bool impl_test_only = false; + return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state, do_accel, impl_test_only, fast, hint, cc); -} - -#ifndef RELEASE_BUILD -// Variant that allows a hint to be specified. +} + +#ifndef RELEASE_BUILD +// Variant that allows a hint to be specified. bytecode_ptr<NFA> -constructNFA(const NGHolder &h_in, const ReportManager *rm, - const map<u32, u32> &fixed_depth_tops, - const map<u32, vector<vector<CharReach>>> &triggers, +constructNFA(const NGHolder &h_in, const ReportManager *rm, + const map<u32, u32> &fixed_depth_tops, + const map<u32, vector<vector<CharReach>>> &triggers, bool compress_state, bool &fast, u32 hint, const CompileContext &cc) { - const bool do_accel = cc.grey.accelerateNFA; - const bool impl_test_only = false; + const bool do_accel = cc.grey.accelerateNFA; + const bool impl_test_only = false; return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state, do_accel, impl_test_only, fast, hint, cc); -} -#endif // RELEASE_BUILD - -static +} +#endif // RELEASE_BUILD + +static bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint, const CompileContext &cc) { - // Make a mutable copy of the graph that we can renumber etc. - NGHolder h; - cloneHolder(h, h_in); - assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ - - // Do state numbering. + // Make a mutable copy of the graph that we can renumber etc. + NGHolder h; + cloneHolder(h, h_in); + assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ + + // Do state numbering. auto state_ids = numberStates(h, {}); - - // Quick exit: if we've got an embarrassment of riches, i.e. more states - // than we can implement in our largest NFA model, bail here. + + // Quick exit: if we've got an embarrassment of riches, i.e. more states + // than we can implement in our largest NFA model, bail here. u32 numStates = countStates(state_ids); - if (numStates > NFA_MAX_STATES) { - DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); - return nullptr; - } - - assert(sanityCheckGraph(h, state_ids)); - + if (numStates > NFA_MAX_STATES) { + DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); + return nullptr; + } + + assert(sanityCheckGraph(h, state_ids)); + map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */ - set<NFAVertex> zombies; - vector<BoundedRepeatData> repeats; + set<NFAVertex> zombies; + vector<BoundedRepeatData> repeats; unordered_map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> squashMap; UNUSED bool fast = false; - - return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops, + + return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, false, false, fast, hint, cc); -} - +} + bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, const CompileContext &cc) { - u32 hint = INVALID_NFA; // no hint - return constructReversedNFA_i(h_in, hint, cc); -} - -#ifndef RELEASE_BUILD -// Variant that allows a hint to be specified. + u32 hint = INVALID_NFA; // no hint + return constructReversedNFA_i(h_in, hint, cc); +} + +#ifndef RELEASE_BUILD +// Variant that allows a hint to be specified. bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint, const CompileContext &cc) { - return constructReversedNFA_i(h_in, hint, cc); -} -#endif // RELEASE_BUILD - -u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, - const CompileContext &cc) { + return constructReversedNFA_i(h_in, hint, cc); +} +#endif // RELEASE_BUILD + +u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, + const CompileContext &cc) { if (!cc.grey.allowLimExNFA) { return false; } assert(!can_never_match(g)); - // Quick check: we can always implement an NFA with less than NFA_MAX_STATES - // states. Note that top masks can generate extra states, so we account for - // those here too. + // Quick check: we can always implement an NFA with less than NFA_MAX_STATES + // states. Note that top masks can generate extra states, so we account for + // those here too. if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { - return true; - } - + return true; + } + if (!has_managed_reports(g)) { - rm = nullptr; - } else { - assert(rm); - } - - // The BEST way to tell if an NFA is implementable is to implement it! - const bool impl_test_only = true; - const map<u32, u32> fixed_depth_tops; // empty - const map<u32, vector<vector<CharReach>>> triggers; // empty - - /* Perform the first part of the construction process and see if the - * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can - * implement it as an NFA. */ - + rm = nullptr; + } else { + assert(rm); + } + + // The BEST way to tell if an NFA is implementable is to implement it! + const bool impl_test_only = true; + const map<u32, u32> fixed_depth_tops; // empty + const map<u32, vector<vector<CharReach>>> triggers; // empty + + /* Perform the first part of the construction process and see if the + * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can + * implement it as an NFA. */ + unordered_map<NFAVertex, u32> state_ids; - vector<BoundedRepeatData> repeats; + vector<BoundedRepeatData> repeats; map<u32, set<NFAVertex>> tops; - unique_ptr<NGHolder> h - = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, - state_ids, repeats, tops); - assert(h); + unique_ptr<NGHolder> h + = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, + state_ids, repeats, tops); + assert(h); u32 numStates = countStates(state_ids); - if (numStates <= NFA_MAX_STATES) { - return numStates; - } - - return 0; -} - -void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm, - const CompileContext &cc) { - NGHolder g_pristine; - cloneHolder(g_pristine, g); - - reduceGraphEquivalences(g, cc); - - removeRedundancy(g, som); - + if (numStates <= NFA_MAX_STATES) { + return numStates; + } + + return 0; +} + +void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm, + const CompileContext &cc) { + NGHolder g_pristine; + cloneHolder(g_pristine, g); + + reduceGraphEquivalences(g, cc); + + removeRedundancy(g, som); + if (rm && has_managed_reports(g)) { - pruneHighlanderDominated(g, *rm); - } - - if (!isImplementableNFA(g, rm, cc)) { - DEBUG_PRINTF("reductions made graph unimplementable, roll back\n"); - clear_graph(g); - cloneHolder(g, g_pristine); - } -} - -u32 countAccelStates(const NGHolder &g, const ReportManager *rm, - const CompileContext &cc) { + pruneHighlanderDominated(g, *rm); + } + + if (!isImplementableNFA(g, rm, cc)) { + DEBUG_PRINTF("reductions made graph unimplementable, roll back\n"); + clear_graph(g); + cloneHolder(g, g_pristine); + } +} + +u32 countAccelStates(const NGHolder &g, const ReportManager *rm, + const CompileContext &cc) { if (!has_managed_reports(g)) { - rm = nullptr; - } else { - assert(rm); - } - - const bool impl_test_only = true; - const map<u32, u32> fixed_depth_tops; // empty - const map<u32, vector<vector<CharReach>>> triggers; // empty - + rm = nullptr; + } else { + assert(rm); + } + + const bool impl_test_only = true; + const map<u32, u32> fixed_depth_tops; // empty + const map<u32, vector<vector<CharReach>>> triggers; // empty + unordered_map<NFAVertex, u32> state_ids; - vector<BoundedRepeatData> repeats; + vector<BoundedRepeatData> repeats; map<u32, set<NFAVertex>> tops; - unique_ptr<NGHolder> h - = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, - state_ids, repeats, tops); - + unique_ptr<NGHolder> h + = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, + state_ids, repeats, tops); + if (!h || countStates(state_ids) > NFA_MAX_STATES) { - DEBUG_PRINTF("not constructible\n"); - return NFA_MAX_ACCEL_STATES + 1; - } - - assert(h->kind == g.kind); - - // Should have no bearing on accel calculation, so we leave these empty. - const set<NFAVertex> zombies; + DEBUG_PRINTF("not constructible\n"); + return NFA_MAX_ACCEL_STATES + 1; + } + + assert(h->kind == g.kind); + + // Should have no bearing on accel calculation, so we leave these empty. + const set<NFAVertex> zombies; unordered_map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> squashMap; - - return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, - tops, zombies, cc); -} - -} // namespace ue2 + + return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, + tops, zombies, cc); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h index 7eba2eff06..58a05ecb3e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h @@ -1,147 +1,147 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Limex NFA construction code. - */ - -#ifndef NG_LIMEX_H -#define NG_LIMEX_H - -#include "ue2common.h" -#include "som/som.h" + * \brief Limex NFA construction code. + */ + +#ifndef NG_LIMEX_H +#define NG_LIMEX_H + +#include "ue2common.h" +#include "som/som.h" #include "util/bytecode_ptr.h" - -#include <map> -#include <memory> -#include <vector> - -struct NFA; - -namespace ue2 { - -class CharReach; -class NG; -class NGHolder; -class ReportManager; -struct CompileContext; - + +#include <map> +#include <memory> +#include <vector> + +struct NFA; + +namespace ue2 { + +class CharReach; +class NG; +class NGHolder; +class ReportManager; +struct CompileContext; + /** * \brief Determine if the given graph is implementable as an NFA. - * - * Returns zero if the NFA is not implementable (usually because it has too - * many states for any of our models). Otherwise returns the number of states. - * - * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and - * NFA_INFIX use unmanaged rose-local reports. - */ -u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, - const CompileContext &cc); - + * + * Returns zero if the NFA is not implementable (usually because it has too + * many states for any of our models). Otherwise returns the number of states. + * + * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and + * NFA_INFIX use unmanaged rose-local reports. + */ +u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, + const CompileContext &cc); + /** * \brief Late-stage graph reductions. - * - * This will call \ref removeRedundancy and apply its changes to the given + * + * This will call \ref removeRedundancy and apply its changes to the given * holder only if it is implementable afterwards. */ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm, - const CompileContext &cc); - -/** - * \brief For a given graph, count the number of accel states it will have in - * an implementation. - * - * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an - * implementation would not be constructible. - */ -u32 countAccelStates(const NGHolder &g, const ReportManager *rm, - const CompileContext &cc); - + const CompileContext &cc); + +/** + * \brief For a given graph, count the number of accel states it will have in + * an implementation. + * + * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an + * implementation would not be constructible. + */ +u32 countAccelStates(const NGHolder &g, const ReportManager *rm, + const CompileContext &cc); + /** * \brief Construct an NFA from the given graph. - * - * Returns zero if the NFA is not implementable (usually because it has too - * many states for any of our models). Otherwise returns the number of states. - * - * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and - * NFA_INFIX use unmanaged rose-local reports. - * - * Note: this variant of the function allows a model to be specified with the - * \a hint parameter. - */ + * + * Returns zero if the NFA is not implementable (usually because it has too + * many states for any of our models). Otherwise returns the number of states. + * + * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and + * NFA_INFIX use unmanaged rose-local reports. + * + * Note: this variant of the function allows a model to be specified with the + * \a hint parameter. + */ bytecode_ptr<NFA> -constructNFA(const NGHolder &g, const ReportManager *rm, - const std::map<u32, u32> &fixed_depth_tops, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, +constructNFA(const NGHolder &g, const ReportManager *rm, + const std::map<u32, u32> &fixed_depth_tops, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, bool compress_state, bool &fast, const CompileContext &cc); - + /** * \brief Build a reverse NFA from the graph given, which should have already - * been reversed. - * - * Used for reverse NFAs used in SOM mode. - */ + * been reversed. + * + * Used for reverse NFAs used in SOM mode. + */ bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, const CompileContext &cc); - -#ifndef RELEASE_BUILD - + +#ifndef RELEASE_BUILD + /** * \brief Construct an NFA (with model type hint) from the given graph. - * - * Returns zero if the NFA is not implementable (usually because it has too - * many states for any of our models). Otherwise returns the number of states. - * - * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and - * NFA_INFIX use unmanaged rose-local reports. - * - * Note: this variant of the function allows a model to be specified with the - * \a hint parameter. - */ + * + * Returns zero if the NFA is not implementable (usually because it has too + * many states for any of our models). Otherwise returns the number of states. + * + * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and + * NFA_INFIX use unmanaged rose-local reports. + * + * Note: this variant of the function allows a model to be specified with the + * \a hint parameter. + */ bytecode_ptr<NFA> -constructNFA(const NGHolder &g, const ReportManager *rm, - const std::map<u32, u32> &fixed_depth_tops, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, +constructNFA(const NGHolder &g, const ReportManager *rm, + const std::map<u32, u32> &fixed_depth_tops, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, bool compress_state, bool &fast, u32 hint, const CompileContext &cc); - + /** * \brief Build a reverse NFA (with model type hint) from the graph given, - * which should have already been reversed. - * - * Used for reverse NFAs used in SOM mode. - */ + * which should have already been reversed. + * + * Used for reverse NFAs used in SOM mode. + */ bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint, const CompileContext &cc); - -#endif // RELEASE_BUILD - -} // namespace ue2 - -#endif // NG_METEOR_H + +#endif // RELEASE_BUILD + +} // namespace ue2 + +#endif // NG_METEOR_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp index f1f829f2c1..ca393131bc 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp @@ -1,141 +1,141 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA acceleration analysis code. - */ -#include "ng_limex_accel.h" - -#include "ng_holder.h" -#include "ng_misc_opt.h" -#include "ng_util.h" -#include "ue2common.h" - -#include "nfa/accel.h" - -#include "util/bitutils.h" // for CASE_CLEAR -#include "util/charreach.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA acceleration analysis code. + */ +#include "ng_limex_accel.h" + +#include "ng_holder.h" +#include "ng_misc_opt.h" +#include "ng_util.h" +#include "ue2common.h" + +#include "nfa/accel.h" + +#include "util/bitutils.h" // for CASE_CLEAR +#include "util/charreach.h" #include "util/compile_context.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" #include "util/small_vector.h" #include "util/target_info.h" - -#include <algorithm> -#include <map> - + +#include <algorithm> +#include <map> + #include <boost/range/adaptor/map.hpp> -using namespace std; +using namespace std; using boost::adaptors::map_keys; - -namespace ue2 { - -#define WIDE_FRIEND_MIN 200 - -static -void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, - const flat_set<NFAVertex> &cands, - const flat_set<NFAVertex> &preds, - flat_set<NFAVertex> *next_cands, - flat_set<NFAVertex> *next_preds, - flat_set<NFAVertex> *friends) { - for (auto v : cands) { - if (contains(preds, v)) { - continue; - } - - const CharReach &acr = g[v].char_reach; + +namespace ue2 { + +#define WIDE_FRIEND_MIN 200 + +static +void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, + const flat_set<NFAVertex> &cands, + const flat_set<NFAVertex> &preds, + flat_set<NFAVertex> *next_cands, + flat_set<NFAVertex> *next_preds, + flat_set<NFAVertex> *friends) { + for (auto v : cands) { + if (contains(preds, v)) { + continue; + } + + const CharReach &acr = g[v].char_reach; DEBUG_PRINTF("checking %zu\n", g[v].index); - - if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { - DEBUG_PRINTF("bad reach %zu\n", acr.count()); - continue; - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(preds, u)) { - DEBUG_PRINTF("bad pred\n"); - goto next_cand; - } - } - - next_preds->insert(v); - insert(next_cands, adjacent_vertices(v, g)); - + + if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { + DEBUG_PRINTF("bad reach %zu\n", acr.count()); + continue; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!contains(preds, u)) { + DEBUG_PRINTF("bad pred\n"); + goto next_cand; + } + } + + next_preds->insert(v); + insert(next_cands, adjacent_vertices(v, g)); + DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); - friends->insert(v); - next_cand:; - } -} - -void findAccelFriends(const NGHolder &g, NFAVertex v, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - u32 offset, flat_set<NFAVertex> *friends) { - /* A friend of an accel state is a successor state which can only be on when - * the accel is on. This requires that it has a subset of the accel state's - * preds and a charreach which is a subset of the accel state. - * - * A friend can be safely ignored when accelerating provided there is - * sufficient back-off. A friend is useful if it has a wide reach. - */ - - /* BR cyclic states which may go stale cannot have friends as they may - * suddenly turn off leading their so-called friends stranded and alone. - * TODO: restrict to only stale going BR cyclics - */ - if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) { - return; - } - - u32 friend_depth = offset + 1; - - flat_set<NFAVertex> preds; - insert(&preds, inv_adjacent_vertices(v, g)); - const CharReach &cr = g[v].char_reach; - - flat_set<NFAVertex> cands; - insert(&cands, adjacent_vertices(v, g)); - - flat_set<NFAVertex> next_preds; - flat_set<NFAVertex> next_cands; - for (u32 i = 0; i < friend_depth; i++) { - findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds, - friends); - preds.insert(next_preds.begin(), next_preds.end()); - next_preds.clear(); - cands.swap(next_cands); - next_cands.clear(); - } -} - -static + friends->insert(v); + next_cand:; + } +} + +void findAccelFriends(const NGHolder &g, NFAVertex v, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + u32 offset, flat_set<NFAVertex> *friends) { + /* A friend of an accel state is a successor state which can only be on when + * the accel is on. This requires that it has a subset of the accel state's + * preds and a charreach which is a subset of the accel state. + * + * A friend can be safely ignored when accelerating provided there is + * sufficient back-off. A friend is useful if it has a wide reach. + */ + + /* BR cyclic states which may go stale cannot have friends as they may + * suddenly turn off leading their so-called friends stranded and alone. + * TODO: restrict to only stale going BR cyclics + */ + if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) { + return; + } + + u32 friend_depth = offset + 1; + + flat_set<NFAVertex> preds; + insert(&preds, inv_adjacent_vertices(v, g)); + const CharReach &cr = g[v].char_reach; + + flat_set<NFAVertex> cands; + insert(&cands, adjacent_vertices(v, g)); + + flat_set<NFAVertex> next_preds; + flat_set<NFAVertex> next_cands; + for (u32 i = 0; i < friend_depth; i++) { + findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds, + friends); + preds.insert(next_preds.begin(), next_preds.end()); + next_preds.clear(); + cands.swap(next_cands); + next_cands.clear(); + } +} + +static void findPaths(const NGHolder &g, NFAVertex v, const vector<CharReach> &refined_cr, vector<vector<CharReach>> *paths, @@ -149,30 +149,30 @@ void findPaths(const NGHolder &g, NFAVertex v, paths->push_back({}); if (!generates_callbacks(g) || v == g.acceptEod) { paths->back().push_back(CharReach()); /* red tape options */ - } + } return; - } - + } + /* for the escape 'literals' we want to use the minimal cr so we * can be more selective */ const CharReach &cr = refined_cr[g[v].index]; - + if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER || hasSelfLoop(v, g)) { /* give up on pushing past this point */ paths->push_back({cr}); return; - } - + } + vector<vector<CharReach>> curr; - for (auto w : adjacent_vertices_range(v, g)) { + for (auto w : adjacent_vertices_range(v, g)) { if (contains(forbidden, w)) { /* path has looped back to one of the active+boring acceleration * states. We can ignore this path if we have sufficient back- * off. */ paths->push_back({cr}); - continue; - } + continue; + } u32 new_depth = depth - 1; do { @@ -183,55 +183,55 @@ void findPaths(const NGHolder &g, NFAVertex v, for (auto &c : curr) { c.push_back(cr); paths->push_back(std::move(c)); - } - } -} - + } + } +} + namespace { struct SAccelScheme { SAccelScheme(CharReach cr_in, u32 offset_in) : cr(std::move(cr_in)), offset(offset_in) { assert(offset <= MAX_ACCEL_DEPTH); - } - + } + SAccelScheme() {} - + bool operator<(const SAccelScheme &b) const { const SAccelScheme &a = *this; const size_t a_count = cr.count(), b_count = b.cr.count(); if (a_count != b_count) { return a_count < b_count; - } - + } + /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(offset); ORDER_CHECK(cr); - return false; - } - + return false; + } + CharReach cr = CharReach::dot(); u32 offset = MAX_ACCEL_DEPTH + 1; }; -} - +} + /** * \brief Limit on the number of (recursive) calls to findBestInternal(). */ static constexpr size_t MAX_FINDBEST_CALLS = 1000000; -static +static void findBestInternal(vector<vector<CharReach>>::const_iterator pb, vector<vector<CharReach>>::const_iterator pe, size_t *num_calls, const SAccelScheme &curr, SAccelScheme *best) { assert(curr.offset <= MAX_ACCEL_DEPTH); - + if (++(*num_calls) > MAX_FINDBEST_CALLS) { DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); return; - } - + } + DEBUG_PRINTF("paths left %zu\n", pe - pb); if (pb == pe) { if (curr < *best) { @@ -241,10 +241,10 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb, best->offset); } return; - } - + } + DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - + small_vector<SAccelScheme, 10> priority_path; priority_path.reserve(pb->size()); u32 i = 0; @@ -255,8 +255,8 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb, continue; } priority_path.push_back(move(as)); - } - + } + sort(priority_path.begin(), priority_path.end()); for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { auto jt = next(it); @@ -267,9 +267,9 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb, } priority_path.erase(next(it), jt); DEBUG_PRINTF("||%zu\n", it->cr.count()); - } + } DEBUG_PRINTF("---\n"); - + for (const SAccelScheme &in : priority_path) { DEBUG_PRINTF("in: count %zu\n", in.cr.count()); if (*best < in) { @@ -277,14 +277,14 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb, continue; } findBestInternal(pb + 1, pe, num_calls, in, best); - + if (curr.cr == best->cr) { return; /* could only get better by offset */ } - } -} - -static + } +} + +static SAccelScheme findBest(const vector<vector<CharReach>> &paths, const CharReach &terminating) { SAccelScheme curr(terminating, 0U); @@ -296,52 +296,52 @@ SAccelScheme findBest(const vector<vector<CharReach>> &paths, best.cr.count(), describeClass(best.cr).c_str(), best.offset); return best; } - + namespace { struct DAccelScheme { DAccelScheme(CharReach cr_in, u32 offset_in) : double_cr(std::move(cr_in)), double_offset(offset_in) { assert(double_offset <= MAX_ACCEL_DEPTH); } - + bool operator<(const DAccelScheme &b) const { const DAccelScheme &a = *this; - + size_t a_dcount = a.double_cr.count(); size_t b_dcount = b.double_cr.count(); - + assert(!a.double_byte.empty() || a_dcount || a.double_offset); assert(!b.double_byte.empty() || b_dcount || b.double_offset); - + if (a_dcount != b_dcount) { return a_dcount < b_dcount; } - + if (!a_dcount) { bool cd_a = buildDvermMask(a.double_byte); bool cd_b = buildDvermMask(b.double_byte); if (cd_a != cd_b) { return cd_a > cd_b; - } - } - + } + } + ORDER_CHECK(double_byte.size()); ORDER_CHECK(double_offset); - + /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(double_byte); ORDER_CHECK(double_cr); - + return false; - } - + } + flat_set<pair<u8, u8>> double_byte; CharReach double_cr; u32 double_offset = 0; }; -} - -static +} + +static DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, const CharReach &cr_2_in, u32 offset_in) { cr_1 &= ~as.double_cr; @@ -352,29 +352,29 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, DEBUG_PRINTF("empty first element\n"); ENSURE_AT_LEAST(&as.double_offset, offset); return as; - } + } if (cr_2_in != cr_2 || cr_2.none()) { offset = offset_in + 1; - } - + } + size_t two_count = cr_1.count() * cr_2.count(); - + DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); if (!two_count) { DEBUG_PRINTF("empty element\n"); ENSURE_AT_LEAST(&as.double_offset, offset); return as; - } - + } + if (two_count > DOUBLE_SHUFTI_LIMIT) { if (cr_2.count() < cr_1.count()) { as.double_cr |= cr_2; offset = offset_in + 1; } else { as.double_cr |= cr_1; - } + } } else { for (auto i = cr_1.find_first(); i != CharReach::npos; i = cr_1.find_next(i)) { @@ -382,145 +382,145 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, j = cr_2.find_next(j)) { as.double_byte.emplace(i, j); } - } - } - + } + } + ENSURE_AT_LEAST(&as.double_offset, offset); DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", as.double_byte.size(), as.double_cr.count(), as.double_offset); return as; -} - -static +} + +static void findDoubleBest(vector<vector<CharReach> >::const_iterator pb, - vector<vector<CharReach> >::const_iterator pe, + vector<vector<CharReach> >::const_iterator pe, const DAccelScheme &curr, DAccelScheme *best) { assert(curr.double_offset <= MAX_ACCEL_DEPTH); - DEBUG_PRINTF("paths left %zu\n", pe - pb); + DEBUG_PRINTF("paths left %zu\n", pe - pb); DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", curr.double_byte.size(), curr.double_cr.count(), curr.double_offset); - if (pb == pe) { + if (pb == pe) { if (curr < *best) { *best = curr; DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", best->double_byte.size(), best->double_cr.count(), best->double_offset); } - return; - } - - DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - + return; + } + + DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); + small_vector<DAccelScheme, 10> priority_path; priority_path.reserve(pb->size()); - u32 i = 0; + u32 i = 0; for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); - ++p, i++) { + ++p, i++) { DAccelScheme as = make_double_accel(curr, *p, *next(p), i); if (*best < as) { DEBUG_PRINTF("worse\n"); continue; } priority_path.push_back(move(as)); - } - - sort(priority_path.begin(), priority_path.end()); + } + + sort(priority_path.begin(), priority_path.end()); DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", best->double_byte.size(), best->double_cr.count(), best->double_offset); - + for (const DAccelScheme &in : priority_path) { DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", in.double_byte.size(), in.double_cr.count(), in.double_offset); if (*best < in) { - DEBUG_PRINTF("worse\n"); - continue; - } + DEBUG_PRINTF("worse\n"); + continue; + } findDoubleBest(pb + 1, pe, in, best); - } -} - -#ifdef DEBUG -static + } +} + +#ifdef DEBUG +static void dumpPaths(const vector<vector<CharReach>> &paths) { for (const auto &path : paths) { - DEBUG_PRINTF("path: ["); + DEBUG_PRINTF("path: ["); for (const auto &cr : path) { - printf(" ["); + printf(" ["); describeClass(stdout, cr, 20, CC_OUT_TEXT); - printf("]"); - } - printf(" ]\n"); - } -} -#endif - -static + printf("]"); + } + printf(" ]\n"); + } +} +#endif + +static void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) { - /* paths segments which are a superset of an earlier segment should never be - * picked as an acceleration segment -> to improve processing just replace - * with dot */ + /* paths segments which are a superset of an earlier segment should never be + * picked as an acceleration segment -> to improve processing just replace + * with dot */ for (auto &p : paths) { for (auto it = p.begin(); it != p.end(); ++it) { for (auto jt = next(it); jt != p.end(); ++jt) { - if (it->isSubsetOf(*jt)) { - *jt = CharReach::dot(); - } - } - } - } -} - -static + if (it->isSubsetOf(*jt)) { + *jt = CharReach::dot(); + } + } + } + } +} + +static void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { - /* try to unify paths which only differ in the last segment */ + /* try to unify paths which only differ in the last segment */ for (vector<vector<CharReach> >::iterator p = paths.begin(); p != paths.end() && p + 1 != paths.end();) { - vector<CharReach> &a = *p; - vector<CharReach> &b = *(p + 1); - - if (a.size() != b.size()) { - ++p; - continue; - } - - u32 i = 0; - for (; i < a.size() - 1; i++) { - if (a[i] != b[i]) { - break; - } - } - if (i == a.size() - 1) { - /* we can unify these paths */ - a[i] |= b[i]; + vector<CharReach> &a = *p; + vector<CharReach> &b = *(p + 1); + + if (a.size() != b.size()) { + ++p; + continue; + } + + u32 i = 0; + for (; i < a.size() - 1; i++) { + if (a[i] != b[i]) { + break; + } + } + if (i == a.size() - 1) { + /* we can unify these paths */ + a[i] |= b[i]; paths.erase(p + 1); - } else { - ++p; - } - } -} - -static + } else { + ++p; + } + } +} + +static void improvePaths(vector<vector<CharReach> > &paths) { -#ifdef DEBUG - DEBUG_PRINTF("orig paths\n"); +#ifdef DEBUG + DEBUG_PRINTF("orig paths\n"); dumpPaths(paths); -#endif - blowoutPathsLessStrictSegment(paths); - +#endif + blowoutPathsLessStrictSegment(paths); + sort(paths.begin(), paths.end()); - - unifyPathsLastSegment(paths); - -#ifdef DEBUG - DEBUG_PRINTF("opt paths\n"); + + unifyPathsLastSegment(paths); + +#ifdef DEBUG + DEBUG_PRINTF("opt paths\n"); dumpPaths(paths); -#endif -} - +#endif +} + #define MAX_DOUBLE_ACCEL_PATHS 10 static @@ -611,227 +611,227 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths, return rv; } -AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts, - const vector<CharReach> &refined_cr, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, +AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts, + const vector<CharReach> &refined_cr, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, bool allow_wide, bool look_for_double_byte) { - CharReach terminating; - for (auto v : verts) { - if (!hasSelfLoop(v, g)) { - DEBUG_PRINTF("no self loop\n"); - return AccelScheme(); /* invalid scheme */ - } - - // check that this state is reachable on most characters - terminating |= ~g[v].char_reach; - } - - DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count()); - size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR - : ACCEL_MAX_STOP_CHAR; - if (terminating.count() > limit) { - return AccelScheme(); /* invalid scheme */ - } - + CharReach terminating; + for (auto v : verts) { + if (!hasSelfLoop(v, g)) { + DEBUG_PRINTF("no self loop\n"); + return AccelScheme(); /* invalid scheme */ + } + + // check that this state is reachable on most characters + terminating |= ~g[v].char_reach; + } + + DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count()); + size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR + : ACCEL_MAX_STOP_CHAR; + if (terminating.count() > limit) { + return AccelScheme(); /* invalid scheme */ + } + vector<vector<CharReach>> paths; - flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end()); - - /* Note: we can not in general (TODO: ignore when possible) ignore entries - * into the bounded repeat cyclic states as that is when the magic happens - */ + flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end()); + + /* Note: we can not in general (TODO: ignore when possible) ignore entries + * into the bounded repeat cyclic states as that is when the magic happens + */ for (auto v : br_cyclic | map_keys) { - /* TODO: can allow if repeatMin <= 1 ? */ + /* TODO: can allow if repeatMin <= 1 ? */ ignore_vert_set.erase(v); - } - - for (auto v : verts) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w != v) { - findPaths(g, w, refined_cr, &paths, ignore_vert_set, - MAX_ACCEL_DEPTH); - } - } - } - - /* paths built wrong: reverse them */ + } + + for (auto v : verts) { + for (auto w : adjacent_vertices_range(v, g)) { + if (w != v) { + findPaths(g, w, refined_cr, &paths, ignore_vert_set, + MAX_ACCEL_DEPTH); + } + } + } + + /* paths built wrong: reverse them */ for (auto &path : paths) { reverse(path.begin(), path.end()); - } - + } + return findBestAccelScheme(std::move(paths), terminating, look_for_double_byte); -} - -NFAVertex get_sds_or_proxy(const NGHolder &g) { - DEBUG_PRINTF("looking for sds proxy\n"); - if (proper_out_degree(g.startDs, g)) { - return g.startDs; - } - +} + +NFAVertex get_sds_or_proxy(const NGHolder &g) { + DEBUG_PRINTF("looking for sds proxy\n"); + if (proper_out_degree(g.startDs, g)) { + return g.startDs; + } + NFAVertex v = NGHolder::null_vertex(); - for (auto w : adjacent_vertices_range(g.start, g)) { - if (w != g.startDs) { - if (!v) { - v = w; - } else { - return g.startDs; - } - } - } - - if (!v) { - return g.startDs; - } - - while (true) { - if (hasSelfLoop(v, g)) { + for (auto w : adjacent_vertices_range(g.start, g)) { + if (w != g.startDs) { + if (!v) { + v = w; + } else { + return g.startDs; + } + } + } + + if (!v) { + return g.startDs; + } + + while (true) { + if (hasSelfLoop(v, g)) { DEBUG_PRINTF("woot %zu\n", g[v].index); - return v; - } - if (out_degree(v, g) != 1) { - break; - } - NFAVertex u = getSoleDestVertex(g, v); - if (!g[u].char_reach.all()) { - break; - } - v = u; - } - - return g.startDs; -} - -/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ -bool nfaCheckAccel(const NGHolder &g, NFAVertex v, - const vector<CharReach> &refined_cr, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - AccelScheme *as, bool allow_wide) { - // For a state to be accelerable, our current criterion is that it be a - // large character class with a self-loop and narrow set of possible other - // successors (i.e. no special successors, union of successor reachability - // is small). - if (!hasSelfLoop(v, g)) { - return false; - } - - // check that this state is reachable on most characters - /* we want to use the maximal reach here (in the graph) */ - CharReach terminating = g[v].char_reach; - terminating.flip(); - + return v; + } + if (out_degree(v, g) != 1) { + break; + } + NFAVertex u = getSoleDestVertex(g, v); + if (!g[u].char_reach.all()) { + break; + } + v = u; + } + + return g.startDs; +} + +/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ +bool nfaCheckAccel(const NGHolder &g, NFAVertex v, + const vector<CharReach> &refined_cr, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + AccelScheme *as, bool allow_wide) { + // For a state to be accelerable, our current criterion is that it be a + // large character class with a self-loop and narrow set of possible other + // successors (i.e. no special successors, union of successor reachability + // is small). + if (!hasSelfLoop(v, g)) { + return false; + } + + // check that this state is reachable on most characters + /* we want to use the maximal reach here (in the graph) */ + CharReach terminating = g[v].char_reach; + terminating.flip(); + DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", - g[v].index, terminating.count(), - allow_wide ? " (w)" : ""); - - size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR - : ACCEL_MAX_STOP_CHAR; - if (terminating.count() > limit) { - DEBUG_PRINTF("too leaky\n"); - return false; - } - - flat_set<NFAVertex> curr, next; - - insert(&curr, adjacent_vertices(v, g)); - curr.erase(v); // erase self-loop - - // We consider offsets of zero through three; this is fairly arbitrary at - // present and could probably be increased (FIXME) - /* WARNING: would/could do horrible things to compile time */ - bool stop = false; - vector<CharReach> depthReach(MAX_ACCEL_DEPTH); - unsigned int depth; - for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) { - CharReach &cr = depthReach[depth]; - for (auto t : curr) { - if (is_special(t, g)) { - // We've bumped into the edge of the graph, so we should stop - // searching. - // Exception: iff our cyclic state is not a dot, than we can - // safely accelerate towards an EOD accept. - - /* Exception: nfas that don't generate callbacks so accepts are - * fine too */ - if (t == g.accept && !generates_callbacks(g)) { - stop = true; // don't search beyond this depth - continue; - } else if (t == g.accept) { - goto depth_done; - } - - assert(t == g.acceptEod); - stop = true; // don't search beyond this depth - } else { - // Non-special vertex - insert(&next, adjacent_vertices(t, g)); - /* for the escape 'literals' we want to use the minimal cr so we - * can be more selective */ - cr |= refined_cr[g[t].index]; - } - } - - cr |= terminating; - DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count()); - - curr.swap(next); - next.clear(); - } - -depth_done: - - if (depth == 0) { - return false; - } - - DEBUG_PRINTF("selecting from depth 0..%u\n", depth); - - /* Look for the most awesome acceleration evar */ - for (unsigned int i = 0; i < depth; i++) { - if (depthReach[i].none()) { - DEBUG_PRINTF("red tape acceleration engine depth %u\n", i); + g[v].index, terminating.count(), + allow_wide ? " (w)" : ""); + + size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR + : ACCEL_MAX_STOP_CHAR; + if (terminating.count() > limit) { + DEBUG_PRINTF("too leaky\n"); + return false; + } + + flat_set<NFAVertex> curr, next; + + insert(&curr, adjacent_vertices(v, g)); + curr.erase(v); // erase self-loop + + // We consider offsets of zero through three; this is fairly arbitrary at + // present and could probably be increased (FIXME) + /* WARNING: would/could do horrible things to compile time */ + bool stop = false; + vector<CharReach> depthReach(MAX_ACCEL_DEPTH); + unsigned int depth; + for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) { + CharReach &cr = depthReach[depth]; + for (auto t : curr) { + if (is_special(t, g)) { + // We've bumped into the edge of the graph, so we should stop + // searching. + // Exception: iff our cyclic state is not a dot, than we can + // safely accelerate towards an EOD accept. + + /* Exception: nfas that don't generate callbacks so accepts are + * fine too */ + if (t == g.accept && !generates_callbacks(g)) { + stop = true; // don't search beyond this depth + continue; + } else if (t == g.accept) { + goto depth_done; + } + + assert(t == g.acceptEod); + stop = true; // don't search beyond this depth + } else { + // Non-special vertex + insert(&next, adjacent_vertices(t, g)); + /* for the escape 'literals' we want to use the minimal cr so we + * can be more selective */ + cr |= refined_cr[g[t].index]; + } + } + + cr |= terminating; + DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count()); + + curr.swap(next); + next.clear(); + } + +depth_done: + + if (depth == 0) { + return false; + } + + DEBUG_PRINTF("selecting from depth 0..%u\n", depth); + + /* Look for the most awesome acceleration evar */ + for (unsigned int i = 0; i < depth; i++) { + if (depthReach[i].none()) { + DEBUG_PRINTF("red tape acceleration engine depth %u\n", i); *as = AccelScheme(); as->offset = i; as->cr = CharReach(); - return true; - } - } - - // First, loop over our depths and see if we have a suitable 2-byte - // caseful vermicelli option: this is the (second) fastest accel we have - if (depth > 1) { - for (unsigned int i = 0; i < (depth - 1); i++) { - const CharReach &cra = depthReach[i]; - const CharReach &crb = depthReach[i + 1]; - if ((cra.count() == 1 && crb.count() == 1) - || (cra.count() == 2 && crb.count() == 2 - && cra.isBit5Insensitive() && crb.isBit5Insensitive())) { - DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i); + return true; + } + } + + // First, loop over our depths and see if we have a suitable 2-byte + // caseful vermicelli option: this is the (second) fastest accel we have + if (depth > 1) { + for (unsigned int i = 0; i < (depth - 1); i++) { + const CharReach &cra = depthReach[i]; + const CharReach &crb = depthReach[i + 1]; + if ((cra.count() == 1 && crb.count() == 1) + || (cra.count() == 2 && crb.count() == 2 + && cra.isBit5Insensitive() && crb.isBit5Insensitive())) { + DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i); *as = AccelScheme(); as->offset = i; - return true; - } - } - } - - // Second option: a two-byte shufti (i.e. less than eight 2-byte - // literals) - if (depth > 1) { - for (unsigned int i = 0; i < (depth - 1); i++) { + return true; + } + } + } + + // Second option: a two-byte shufti (i.e. less than eight 2-byte + // literals) + if (depth > 1) { + for (unsigned int i = 0; i < (depth - 1); i++) { if (depthReach[i].count() * depthReach[i+1].count() <= DOUBLE_SHUFTI_LIMIT) { - DEBUG_PRINTF("two-byte shufti, depth %u\n", i); + DEBUG_PRINTF("two-byte shufti, depth %u\n", i); *as = AccelScheme(); as->offset = i; - return true; - } - } - } - + return true; + } + } + } + // Look for offset accel schemes verm/shufti; - vector<NFAVertex> verts(1, v); + vector<NFAVertex> verts(1, v); *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); - DEBUG_PRINTF("as width %zu\n", as->cr.count()); - return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; -} - -} // namespace ue2 + DEBUG_PRINTF("as width %zu\n", as->cr.count()); + return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h index f6f7f1b3cb..766cfabbe6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h @@ -1,73 +1,73 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA acceleration analysis code. - */ - -#ifndef NG_LIMEX_ACCEL_H -#define NG_LIMEX_ACCEL_H - -#include "ng_holder.h" -#include "ng_misc_opt.h" -#include "ue2common.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA acceleration analysis code. + */ + +#ifndef NG_LIMEX_ACCEL_H +#define NG_LIMEX_ACCEL_H + +#include "ng_holder.h" +#include "ng_misc_opt.h" +#include "ue2common.h" #include "nfa/accelcompile.h" #include "util/accel_scheme.h" -#include "util/charreach.h" +#include "util/charreach.h" #include "util/flat_containers.h" -#include "util/order_check.h" - -#include <map> -#include <vector> - -namespace ue2 { - -/* compile time accel defs */ -#define MAX_MERGED_ACCEL_STOPS 200 -#define ACCEL_MAX_STOP_CHAR 24 -#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ - +#include "util/order_check.h" + +#include <map> +#include <vector> + +namespace ue2 { + +/* compile time accel defs */ +#define MAX_MERGED_ACCEL_STOPS 200 +#define ACCEL_MAX_STOP_CHAR 24 +#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ + // forward-declaration of CompileContext struct CompileContext; -void findAccelFriends(const NGHolder &g, NFAVertex v, - const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, +void findAccelFriends(const NGHolder &g, NFAVertex v, + const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, u32 offset, flat_set<NFAVertex> *friends); - + #define DOUBLE_SHUFTI_LIMIT 20 - -NFAVertex get_sds_or_proxy(const NGHolder &g); - -AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts, - const std::vector<CharReach> &refined_cr, - const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + +NFAVertex get_sds_or_proxy(const NGHolder &g); + +AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts, + const std::vector<CharReach> &refined_cr, + const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, bool allow_wide, bool look_for_double_byte = false); - + AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths, const CharReach &terminating, bool look_for_double_byte = false); @@ -75,12 +75,12 @@ AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths, /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a * single byte accel scheme is found it is placed into *as */ -bool nfaCheckAccel(const NGHolder &g, NFAVertex v, - const std::vector<CharReach> &refined_cr, - const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - AccelScheme *as, bool allow_wide); - - -} // namespace ue2 - -#endif +bool nfaCheckAccel(const NGHolder &g, NFAVertex v, + const std::vector<CharReach> &refined_cr, + const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + AccelScheme *as, bool allow_wide); + + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp index d25ac43e87..3b8c17eaf9 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp @@ -1,87 +1,87 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Literal analysis and scoring. - */ -#include "ng_literal_analysis.h" - -#include "ng_holder.h" -#include "ng_split.h" -#include "ng_util.h" -#include "ue2common.h" -#include "rose/rose_common.h" -#include "util/compare.h" -#include "util/depth.h" -#include "util/graph.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Literal analysis and scoring. + */ +#include "ng_literal_analysis.h" + +#include "ng_holder.h" +#include "ng_split.h" +#include "ng_util.h" +#include "ue2common.h" +#include "rose/rose_common.h" +#include "util/compare.h" +#include "util/depth.h" +#include "util/graph.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" #include "util/ue2_graph.h" -#include "util/ue2string.h" - -#include <algorithm> -#include <fstream> -#include <queue> - -#include <boost/graph/boykov_kolmogorov_max_flow.hpp> - -using namespace std; - -namespace ue2 { - -/** Maximum number of paths to generate. */ -static const u32 MAX_WIDTH = 11; - -/** Scoring adjustment for 'uniqueness' in literal. */ -static const u64a WEIGHT_OF_UNIQUENESS = 250; - -namespace { - -/* Small literal graph type used for the suffix tree used in - * compressAndScore. */ - -struct LitGraphVertexProps { +#include "util/ue2string.h" + +#include <algorithm> +#include <fstream> +#include <queue> + +#include <boost/graph/boykov_kolmogorov_max_flow.hpp> + +using namespace std; + +namespace ue2 { + +/** Maximum number of paths to generate. */ +static const u32 MAX_WIDTH = 11; + +/** Scoring adjustment for 'uniqueness' in literal. */ +static const u64a WEIGHT_OF_UNIQUENESS = 250; + +namespace { + +/* Small literal graph type used for the suffix tree used in + * compressAndScore. */ + +struct LitGraphVertexProps { LitGraphVertexProps() = default; explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} - ue2_literal::elem c; // string element (char + bool) + ue2_literal::elem c; // string element (char + bool) size_t index = 0; // managed by ue2_graph -}; - -struct LitGraphEdgeProps { +}; + +struct LitGraphEdgeProps { LitGraphEdgeProps() = default; - explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} - u64a score = NO_LITERAL_AT_EDGE_SCORE; + explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} + u64a score = NO_LITERAL_AT_EDGE_SCORE; size_t index = 0; // managed by ue2_graph -}; - +}; + struct LitGraph : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> { - + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} const vertex_descriptor root; @@ -91,399 +91,399 @@ struct LitGraph typedef LitGraph::vertex_descriptor LitVertex; typedef LitGraph::edge_descriptor LitEdge; -typedef pair<LitVertex, NFAVertex> VertexPair; -typedef std::queue<VertexPair> LitVertexQ; - -} // namespace - -#ifdef DUMP_SUPPORT - -/** \brief Dump the literal graph in Graphviz format. */ -static UNUSED +typedef pair<LitVertex, NFAVertex> VertexPair; +typedef std::queue<VertexPair> LitVertexQ; + +} // namespace + +#ifdef DUMP_SUPPORT + +/** \brief Dump the literal graph in Graphviz format. */ +static UNUSED void dumpGraph(const char *filename, const LitGraph &lg) { - ofstream fout(filename); - - fout << "digraph G {" << endl; - - for (auto v : vertices_range(lg)) { + ofstream fout(filename); + + fout << "digraph G {" << endl; + + for (auto v : vertices_range(lg)) { fout << lg[v].index; if (v == lg.root) { - fout << "[label=\"ROOT\"];"; + fout << "[label=\"ROOT\"];"; } else if (v == lg.sink) { - fout << "[label=\"SINK\"];"; - } else { - ue2_literal s; - s.push_back(lg[v].c); - fout << "[label=\"" << dumpString(s) << "\"];"; - } - fout << endl; - } - - for (const auto &e : edges_range(lg)) { - LitVertex u = source(e, lg), v = target(e, lg); + fout << "[label=\"SINK\"];"; + } else { + ue2_literal s; + s.push_back(lg[v].c); + fout << "[label=\"" << dumpString(s) << "\"];"; + } + fout << endl; + } + + for (const auto &e : edges_range(lg)) { + LitVertex u = source(e, lg), v = target(e, lg); fout << lg[u].index << " -> " << lg[v].index << "[label=\"" << lg[e].score << "\"]" << ";" << endl; - } - - fout << "}" << endl; -} - -#endif // DUMP_SUPPORT - -static -bool allowExpand(size_t numItems, size_t totalPathsSoFar) { - if (numItems == 0) { - return false; - } - - if (numItems + totalPathsSoFar > MAX_WIDTH) { - return false; - } - - return true; -} - -static + } + + fout << "}" << endl; +} + +#endif // DUMP_SUPPORT + +static +bool allowExpand(size_t numItems, size_t totalPathsSoFar) { + if (numItems == 0) { + return false; + } + + if (numItems + totalPathsSoFar > MAX_WIDTH) { + return false; + } + + return true; +} + +static LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, const ue2_literal::elem &c) { - // Check if we already have this in the graph. - for (auto v : adjacent_vertices_range(pred, lg)) { + // Check if we already have this in the graph. + for (auto v : adjacent_vertices_range(pred, lg)) { if (v == lg.sink) { - continue; - } - if (lg[v].c == c) { - return v; - } - } - - LitVertex lv = add_vertex(LitGraphVertexProps(c), lg); - add_edge(pred, lv, lg); - return lv; -} - -static + continue; + } + if (lg[v].c == c) { + return v; + } + } + + LitVertex lv = add_vertex(LitGraphVertexProps(c), lg); + add_edge(pred, lv, lg); + return lv; +} + +static void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, const CharReach &cr, NFAVertex v) { for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { - if (myisupper(i) && cr.test(mytolower(i))) { - // ignore upper half of a nocase pair - continue; - } - - bool nocase = myislower(i) && cr.test(mytoupper(i)); - ue2_literal::elem c((char)i, nocase); + if (myisupper(i) && cr.test(mytolower(i))) { + // ignore upper half of a nocase pair + continue; + } + + bool nocase = myislower(i) && cr.test(mytoupper(i)); + ue2_literal::elem c((char)i, nocase); LitVertex lv = addToLitGraph(lg, pred, c); - workQ.push(VertexPair(lv, v)); - } -} - -static + workQ.push(VertexPair(lv, v)); + } +} + +static void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, const NFAEdge &e) { - NFAVertex u = source(e, g); - NFAVertex v = target(e, g); - const CharReach &cr = g[v].char_reach; - - if (!allowExpand(cr.count(), 0)) { - return; - } - + NFAVertex u = source(e, g); + NFAVertex v = target(e, g); + const CharReach &cr = g[v].char_reach; + + if (!allowExpand(cr.count(), 0)) { + return; + } + addToQueue(workQ, lg, lg.root, cr, u); -} - -static -u32 crCardinality(const CharReach &cr) { - // Special-case for handling dots, much faster than running the find_next - // loop below. - if (cr.all()) { - return 230; // [^A-Z] - } - - u32 rv = 0; +} + +static +u32 crCardinality(const CharReach &cr) { + // Special-case for handling dots, much faster than running the find_next + // loop below. + if (cr.all()) { + return 230; // [^A-Z] + } + + u32 rv = 0; for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { - if (myisupper(i) && cr.test(mytolower(i))) { - // ignore upper half of a nocase pair - continue; - } - rv++; - } - - return rv; -} - -/** Filter out literals that include other literals as suffixes. We do this by - * identifying vertices connected to the sink and removing their other - * out-edges. */ -static + if (myisupper(i) && cr.test(mytolower(i))) { + // ignore upper half of a nocase pair + continue; + } + rv++; + } + + return rv; +} + +/** Filter out literals that include other literals as suffixes. We do this by + * identifying vertices connected to the sink and removing their other + * out-edges. */ +static void filterLitGraph(LitGraph &lg) { for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { remove_out_edge_if(v, [&lg](const LitEdge &e) { return target(e, lg) != lg.sink; - }, lg); - } - - // We could do a DFS-and-prune here, if we wanted. Right now, we just - // handle it in extractLiterals by throwing away paths that don't run all - // the way from sink to root. -} - -/** Extracts all the literals from the given literal graph. Walks the graph - * from each predecessor of the sink (note: it's a suffix tree except for this - * convenience) towards the source, storing each string as we go. */ -static + }, lg); + } + + // We could do a DFS-and-prune here, if we wanted. Right now, we just + // handle it in extractLiterals by throwing away paths that don't run all + // the way from sink to root. +} + +/** Extracts all the literals from the given literal graph. Walks the graph + * from each predecessor of the sink (note: it's a suffix tree except for this + * convenience) towards the source, storing each string as we go. */ +static void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) { - ue2_literal lit; - + ue2_literal lit; + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { - lit.clear(); + lit.clear(); while (u != lg.root) { - lit.push_back(lg[u].c); - assert(in_degree(u, lg) <= 1); - LitGraph::inv_adjacency_iterator ai2, ae2; - tie(ai2, ae2) = inv_adjacent_vertices(u, lg); - if (ai2 == ae2) { - // Path has been cut, time for the next literal. - goto next_literal; - } - u = *ai2; - } - s.insert(lit); -next_literal: - ; - } -} - -#ifndef NDEBUG -static -bool hasSuffixLiterals(const set<ue2_literal> &s) { - for (auto it = s.begin(), ite = s.end(); it != ite; ++it) { - for (auto jt = std::next(it); jt != ite; ++jt) { - if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) { - DEBUG_PRINTF("'%s' and '%s' have suffix issues\n", - dumpString(*it).c_str(), - dumpString(*jt).c_str()); - return true; - } - } - } - return false; -} -#endif - -static -void processWorkQueue(const NGHolder &g, const NFAEdge &e, - set<ue2_literal> &s) { - if (is_special(target(e, g), g)) { - return; - } - - LitGraph lg; - - LitVertexQ workQ; + lit.push_back(lg[u].c); + assert(in_degree(u, lg) <= 1); + LitGraph::inv_adjacency_iterator ai2, ae2; + tie(ai2, ae2) = inv_adjacent_vertices(u, lg); + if (ai2 == ae2) { + // Path has been cut, time for the next literal. + goto next_literal; + } + u = *ai2; + } + s.insert(lit); +next_literal: + ; + } +} + +#ifndef NDEBUG +static +bool hasSuffixLiterals(const set<ue2_literal> &s) { + for (auto it = s.begin(), ite = s.end(); it != ite; ++it) { + for (auto jt = std::next(it); jt != ite; ++jt) { + if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) { + DEBUG_PRINTF("'%s' and '%s' have suffix issues\n", + dumpString(*it).c_str(), + dumpString(*jt).c_str()); + return true; + } + } + } + return false; +} +#endif + +static +void processWorkQueue(const NGHolder &g, const NFAEdge &e, + set<ue2_literal> &s) { + if (is_special(target(e, g), g)) { + return; + } + + LitGraph lg; + + LitVertexQ workQ; initWorkQueue(workQ, lg, g, e); - - while (!workQ.empty()) { - const LitVertex lv = workQ.front().first; - const NFAVertex &t = workQ.front().second; - const CharReach &cr = g[t].char_reach; - - u32 cr_card = crCardinality(cr); - size_t numItems = cr_card * in_degree(t, g); + + while (!workQ.empty()) { + const LitVertex lv = workQ.front().first; + const NFAVertex &t = workQ.front().second; + const CharReach &cr = g[t].char_reach; + + u32 cr_card = crCardinality(cr); + size_t numItems = cr_card * in_degree(t, g); size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; - - if (g[t].index == NODE_START) { - // reached start, add to literal set + + if (g[t].index == NODE_START) { + // reached start, add to literal set add_edge_if_not_present(lv, lg.sink, lg); - goto next_work_elem; - } - - // Expand next vertex - if (allowExpand(numItems, committed_count)) { - for (auto u : inv_adjacent_vertices_range(t, g)) { + goto next_work_elem; + } + + // Expand next vertex + if (allowExpand(numItems, committed_count)) { + for (auto u : inv_adjacent_vertices_range(t, g)) { addToQueue(workQ, lg, lv, cr, u); - } - goto next_work_elem; - } - - // Expand this vertex - if (allowExpand(cr_card, committed_count)) { - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { - if (myisupper(i) && cr.test(mytolower(i))) { - // ignore upper half of a nocase pair - continue; - } - - bool nocase = myislower(i) && cr.test(mytoupper(i)); - ue2_literal::elem c((char)i, nocase); + } + goto next_work_elem; + } + + // Expand this vertex + if (allowExpand(cr_card, committed_count)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { + if (myisupper(i) && cr.test(mytolower(i))) { + // ignore upper half of a nocase pair + continue; + } + + bool nocase = myislower(i) && cr.test(mytoupper(i)); + ue2_literal::elem c((char)i, nocase); LitVertex lt = addToLitGraph(lg, lv, c); add_edge_if_not_present(lt, lg.sink, lg); - } - goto next_work_elem; - } - - // add to literal set + } + goto next_work_elem; + } + + // add to literal set add_edge_if_not_present(lv, lg.sink, lg); - next_work_elem: - workQ.pop(); - } - + next_work_elem: + workQ.pop(); + } + filterLitGraph(lg); //dumpGraph("litgraph.dot", lg); extractLiterals(lg, s); - - // Our literal set should contain no literal that is a suffix of another. - assert(!hasSuffixLiterals(s)); - + + // Our literal set should contain no literal that is a suffix of another. + assert(!hasSuffixLiterals(s)); + DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, - g[source(e, g)].index, g[target(e, g)].index, s.size()); -} - + g[source(e, g)].index, g[target(e, g)].index, s.size()); +} + bool bad_mixed_sensitivity(const ue2_literal &s) { /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, * we should be able to handle it */ return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; } -static -u64a litUniqueness(const string &s) { - CharReach seen(s); - return seen.count(); -} - -/** Count the significant bits of this literal (i.e. seven for nocase alpha, - * eight for everything else). */ -static -u64a litCountBits(const ue2_literal &lit) { - u64a n = 0; - for (const auto &c : lit) { - n += c.nocase ? 7 : 8; - } - return n; -} - -/** Returns a fairly arbitrary score for the given literal, used to compare the - * suitability of different candidates. */ -static -u64a scoreLiteral(const ue2_literal &s) { - // old scoring scheme: SUM(s in S: 1/s.len()^2) - // now weight (currently 75/25) with number of unique chars - // in the string - u64a len = litCountBits(s); - u64a lenUnique = litUniqueness(s.get_string()) * 8; - - u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len + - WEIGHT_OF_UNIQUENESS * lenUnique; - weightedLen /= 8; - - DEBUG_PRINTF("scored literal '%s' %llu\n", - escapeString(s.get_string()).c_str(), weightedLen); - - return weightedLen; -} - - -/** - * calculateScore has the following properties: - * - score of literal is the same as the score of the reversed literal; - * - score of substring of literal is worse than the original literal's score; - * - score of any literal should be non-zero. - */ -static -u64a calculateScore(const ue2_literal &s) { - if (s.empty()) { - return NO_LITERAL_AT_EDGE_SCORE; - } - - u64a weightedLen = scoreLiteral(s); - - DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen); - u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen); - - if (!rv) { - rv = 1; - } - DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv); - return rv; -} - -/** Adds a literal in reverse order, building up a suffix tree. */ -static +static +u64a litUniqueness(const string &s) { + CharReach seen(s); + return seen.count(); +} + +/** Count the significant bits of this literal (i.e. seven for nocase alpha, + * eight for everything else). */ +static +u64a litCountBits(const ue2_literal &lit) { + u64a n = 0; + for (const auto &c : lit) { + n += c.nocase ? 7 : 8; + } + return n; +} + +/** Returns a fairly arbitrary score for the given literal, used to compare the + * suitability of different candidates. */ +static +u64a scoreLiteral(const ue2_literal &s) { + // old scoring scheme: SUM(s in S: 1/s.len()^2) + // now weight (currently 75/25) with number of unique chars + // in the string + u64a len = litCountBits(s); + u64a lenUnique = litUniqueness(s.get_string()) * 8; + + u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len + + WEIGHT_OF_UNIQUENESS * lenUnique; + weightedLen /= 8; + + DEBUG_PRINTF("scored literal '%s' %llu\n", + escapeString(s.get_string()).c_str(), weightedLen); + + return weightedLen; +} + + +/** + * calculateScore has the following properties: + * - score of literal is the same as the score of the reversed literal; + * - score of substring of literal is worse than the original literal's score; + * - score of any literal should be non-zero. + */ +static +u64a calculateScore(const ue2_literal &s) { + if (s.empty()) { + return NO_LITERAL_AT_EDGE_SCORE; + } + + u64a weightedLen = scoreLiteral(s); + + DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen); + u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen); + + if (!rv) { + rv = 1; + } + DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv); + return rv; +} + +/** Adds a literal in reverse order, building up a suffix tree. */ +static void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { - DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); - ue2_literal suffix; + DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); + ue2_literal suffix; LitVertex v = lg.root; - for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { - suffix.push_back(*it); - LitVertex w; - for (auto v2 : adjacent_vertices_range(v, lg)) { + for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { + suffix.push_back(*it); + LitVertex w; + for (auto v2 : adjacent_vertices_range(v, lg)) { if (v2 != lg.sink && lg[v2].c == *it) { - w = v2; - goto next_char; - } - } - w = add_vertex(LitGraphVertexProps(*it), lg); - add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg); -next_char: - v = w; - } - - // Wire the last vertex to the sink. + w = v2; + goto next_char; + } + } + w = add_vertex(LitGraphVertexProps(*it), lg); + add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg); +next_char: + v = w; + } + + // Wire the last vertex to the sink. add_edge(v, lg.sink, lg); -} - -static -void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg, +} + +static +void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg, set<ue2_literal> &s) { - for (const auto &e : cutset) { + for (const auto &e : cutset) { LitVertex u = source(e, lg); LitVertex v = target(e, lg); - ue2_literal lit; - lit.push_back(lg[v].c); + ue2_literal lit; + lit.push_back(lg[v].c); while (u != lg.root) { - lit.push_back(lg[u].c); - assert(in_degree(u, lg) == 1); - LitGraph::inv_adjacency_iterator ai, ae; - tie(ai, ae) = inv_adjacent_vertices(u, lg); - if (ai == ae) { - // Path has been cut, time for the next literal. - goto next_literal; - } - u = *ai; - } - DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str()); - s.insert(lit); -next_literal: - ; - } -} - -#ifdef DEBUG -static UNUSED + lit.push_back(lg[u].c); + assert(in_degree(u, lg) == 1); + LitGraph::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(u, lg); + if (ai == ae) { + // Path has been cut, time for the next literal. + goto next_literal; + } + u = *ai; + } + DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str()); + s.insert(lit); +next_literal: + ; + } +} + +#ifdef DEBUG +static UNUSED const char *describeColor(small_color c) { - switch (c) { + switch (c) { case small_color::white: - return "white"; + return "white"; case small_color::gray: - return "gray"; + return "gray"; case small_color::black: - return "black"; - default: - return "unknown"; - } -} -#endif - -/** - * The BGL's boykov_kolmogorov_max_flow requires that all edges have their + return "black"; + default: + return "unknown"; + } +} +#endif + +/** + * The BGL's boykov_kolmogorov_max_flow requires that all edges have their * reverse edge in the graph. This function adds them, returning a vector * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there * should be no existing reverse_edges. - */ -static + */ +static vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { const size_t edge_count = num_edges(lg); vector<LitEdge> fwd_edges; @@ -491,137 +491,137 @@ vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { for (const auto &e : edges_range(lg)) { fwd_edges.push_back(e); } - + vector<LitEdge> rev_map(2 * edge_count); - + for (const auto &e : fwd_edges) { LitVertex u = source(e, lg); LitVertex v = target(e, lg); - + assert(!edge(v, u, lg).second); - + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; rev_map[lg[e].index] = rev; rev_map[lg[rev].index] = e; - } - + } + return rev_map; -} - -static +} + +static void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) { - cutset.clear(); - + cutset.clear(); + //dumpGraph("litgraph.dot", lg); - + assert(!in_degree(lg.root, lg)); assert(!out_degree(lg.sink, lg)); size_t num_real_edges = num_edges(lg); - - // Add reverse edges for the convenience of the BGL's max flow algorithm. + + // Add reverse edges for the convenience of the BGL's max flow algorithm. vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg); - + const auto v_index_map = get(&LitGraphVertexProps::index, lg); const auto e_index_map = get(&LitGraphEdgeProps::index, lg); - const size_t num_verts = num_vertices(lg); + const size_t num_verts = num_vertices(lg); auto colors = make_small_color_map(lg); - vector<s32> distances(num_verts); - vector<LitEdge> predecessors(num_verts); + vector<s32> distances(num_verts); + vector<LitEdge> predecessors(num_verts); vector<u64a> residuals(num_edges(lg)); - - UNUSED u64a flow = boykov_kolmogorov_max_flow(lg, - get(&LitGraphEdgeProps::score, lg), + + UNUSED u64a flow = boykov_kolmogorov_max_flow(lg, + get(&LitGraphEdgeProps::score, lg), make_iterator_property_map(residuals.begin(), e_index_map), make_iterator_property_map(rev_edges.begin(), e_index_map), - make_iterator_property_map(predecessors.begin(), v_index_map), + make_iterator_property_map(predecessors.begin(), v_index_map), colors, - make_iterator_property_map(distances.begin(), v_index_map), + make_iterator_property_map(distances.begin(), v_index_map), v_index_map, lg.root, lg.sink); - DEBUG_PRINTF("done, flow = %llu\n", flow); - + DEBUG_PRINTF("done, flow = %llu\n", flow); + /* remove reverse edges */ remove_edge_if([&](const LitEdge &e) { return lg[e].index >= num_real_edges; }, lg); - - vector<LitEdge> white_cut, black_cut; - u64a white_flow = 0, black_flow = 0; - - for (const auto &e : edges_range(lg)) { - const LitVertex u = source(e, lg), v = target(e, lg); + + vector<LitEdge> white_cut, black_cut; + u64a white_flow = 0, black_flow = 0; + + for (const auto &e : edges_range(lg)) { + const LitVertex u = source(e, lg), v = target(e, lg); const auto ucolor = get(colors, u); const auto vcolor = get(colors, v); - + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, describeColor(ucolor), lg[v].index, describeColor(vcolor), - lg[e].score); - + lg[e].score); + if (ucolor != small_color::white && vcolor == small_color::white) { assert(v != lg.sink); - white_cut.push_back(e); - white_flow += lg[e].score; - } + white_cut.push_back(e); + white_flow += lg[e].score; + } if (ucolor == small_color::black && vcolor != small_color::black) { assert(v != lg.sink); - black_cut.push_back(e); - black_flow += lg[e].score; - } - } - - DEBUG_PRINTF("white flow = %llu, black flow = %llu\n", - white_flow, black_flow); - assert(white_flow && black_flow); - - if (white_flow <= black_flow) { - DEBUG_PRINTF("selected white cut\n"); - cutset.swap(white_cut); - } else { - DEBUG_PRINTF("selected black cut\n"); - cutset.swap(black_cut); - } - - DEBUG_PRINTF("min cut has %zu edges\n", cutset.size()); - assert(!cutset.empty()); -} - -/** Takes a set of literals and derives a better one from them, returning its - * score. Literals with a common suffix S will be replaced with S. (for - * example, {foobar, fooobar} -> {oobar}). - */ -u64a compressAndScore(set<ue2_literal> &s) { - if (s.empty()) { - return NO_LITERAL_AT_EDGE_SCORE; - } - - if (s.size() == 1) { - return calculateScore(*s.begin()); - } - - UNUSED u64a initialScore = scoreSet(s); - DEBUG_PRINTF("begin, initial literals have score %llu\n", - initialScore); - - LitGraph lg; - - for (const auto &lit : s) { + black_cut.push_back(e); + black_flow += lg[e].score; + } + } + + DEBUG_PRINTF("white flow = %llu, black flow = %llu\n", + white_flow, black_flow); + assert(white_flow && black_flow); + + if (white_flow <= black_flow) { + DEBUG_PRINTF("selected white cut\n"); + cutset.swap(white_cut); + } else { + DEBUG_PRINTF("selected black cut\n"); + cutset.swap(black_cut); + } + + DEBUG_PRINTF("min cut has %zu edges\n", cutset.size()); + assert(!cutset.empty()); +} + +/** Takes a set of literals and derives a better one from them, returning its + * score. Literals with a common suffix S will be replaced with S. (for + * example, {foobar, fooobar} -> {oobar}). + */ +u64a compressAndScore(set<ue2_literal> &s) { + if (s.empty()) { + return NO_LITERAL_AT_EDGE_SCORE; + } + + if (s.size() == 1) { + return calculateScore(*s.begin()); + } + + UNUSED u64a initialScore = scoreSet(s); + DEBUG_PRINTF("begin, initial literals have score %llu\n", + initialScore); + + LitGraph lg; + + for (const auto &lit : s) { addReversedLiteral(lit, lg); - } - - DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", - num_vertices(lg), num_edges(lg)); - - vector<LitEdge> cutset; + } + + DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", + num_vertices(lg), num_edges(lg)); + + vector<LitEdge> cutset; findMinCut(lg, cutset); - - s.clear(); + + s.clear(); extractLiterals(cutset, lg, s); - - u64a score = scoreSet(s); - DEBUG_PRINTF("compressed score is %llu\n", score); - assert(score <= initialScore); - return score; -} - + + u64a score = scoreSet(s); + DEBUG_PRINTF("compressed score is %llu\n", score); + assert(score <= initialScore); + return score; +} + /* like compressAndScore, but replaces long mixed sensitivity literals with * something weaker. */ u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { @@ -664,191 +664,191 @@ u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { return compressAndScore(lits); } -u64a scoreSet(const set<ue2_literal> &s) { - if (s.empty()) { - return NO_LITERAL_AT_EDGE_SCORE; - } - - u64a score = 1ULL; - - for (const auto &lit : s) { - score += calculateScore(lit); - } - - return score; -} - -set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) { - set<ue2_literal> s; - processWorkQueue(g, e, s); - return s; -} - -set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, - bool only_first_encounter) { - set<ue2_literal> s; - - if (is_special(v, g)) { - return s; - } - - set<ue2_literal> ls; - - for (const auto &e : in_edges_range(v, g)) { - if (source(e, g) == v && only_first_encounter) { - continue; /* ignore self loop on root vertex as we are interested in - * the first time we visit the vertex on the way to - * accept. In fact, we can ignore any back edges - but - * they would require a bit of effort to discover. */ - } - - ls = getLiteralSet(g, e); - if (ls.empty()) { - s.clear(); - return s; - } else { - s.insert(ls.begin(), ls.end()); - } - } - - return s; -} - +u64a scoreSet(const set<ue2_literal> &s) { + if (s.empty()) { + return NO_LITERAL_AT_EDGE_SCORE; + } + + u64a score = 1ULL; + + for (const auto &lit : s) { + score += calculateScore(lit); + } + + return score; +} + +set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) { + set<ue2_literal> s; + processWorkQueue(g, e, s); + return s; +} + +set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, + bool only_first_encounter) { + set<ue2_literal> s; + + if (is_special(v, g)) { + return s; + } + + set<ue2_literal> ls; + + for (const auto &e : in_edges_range(v, g)) { + if (source(e, g) == v && only_first_encounter) { + continue; /* ignore self loop on root vertex as we are interested in + * the first time we visit the vertex on the way to + * accept. In fact, we can ignore any back edges - but + * they would require a bit of effort to discover. */ + } + + ls = getLiteralSet(g, e); + if (ls.empty()) { + s.clear(); + return s; + } else { + s.insert(ls.begin(), ls.end()); + } + } + + return s; +} + vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { - assert(hasCorrectlyNumberedEdges(g)); - - vector<u64a> scores(num_edges(g)); - - for (const auto &e : edges_range(g)) { - u32 eidx = g[e].index; - assert(eidx < scores.size()); + assert(hasCorrectlyNumberedEdges(g)); + + vector<u64a> scores(num_edges(g)); + + for (const auto &e : edges_range(g)) { + u32 eidx = g[e].index; + assert(eidx < scores.size()); if (contains(known_bad, e)) { scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; } else { set<ue2_literal> ls = getLiteralSet(g, e); scores[eidx] = compressAndScore(ls); } - } - - return scores; -} - + } + + return scores; +} + bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, NGHolder *rhs) { DEBUG_PRINTF("looking for leading floating literal\n"); set<NFAVertex> s_succ; insert(&s_succ, adjacent_vertices(g.start, g)); - + set<NFAVertex> sds_succ; insert(&sds_succ, adjacent_vertices(g.startDs, g)); - + bool floating = is_subset_of(s_succ, sds_succ); if (!floating) { DEBUG_PRINTF("not floating\n"); return false; } - + sds_succ.erase(g.startDs); if (sds_succ.size() != 1) { DEBUG_PRINTF("branchy root\n"); return false; } - + NFAVertex u = g.startDs; NFAVertex v = *sds_succ.begin(); - - while (true) { + + while (true) { DEBUG_PRINTF("validating vertex %zu\n", g[v].index); - - assert(v != g.acceptEod && v != g.accept); - - const CharReach &cr = g[v].char_reach; - if (cr.count() != 1 && !cr.isCaselessChar()) { - break; - } - - // Rose can only handle mixed-sensitivity literals up to the max mask - // length. - if (lit_out->length() >= MAX_MASK2_WIDTH) { - if (mixed_sensitivity(*lit_out)) { - DEBUG_PRINTF("long and mixed sensitivity\n"); - break; - } - if (ourisalpha((char)cr.find_first())) { - if (cr.isCaselessChar() != lit_out->any_nocase()) { - DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n", - (char)cr.find_first()); - break; - } - } - } - - if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("connection to accept\n"); - break; - } - - lit_out->push_back(cr.find_first(), cr.isCaselessChar()); - u = v; - - if (out_degree(v, g) != 1) { - DEBUG_PRINTF("out_degree != 1\n"); - break; - } - - v = *adjacent_vertices(v, g).first; - - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path - * to case accept (large cycles), - * ensures term */ - break; - } - } - - if (lit_out->empty()) { - return false; - } - assert(u != g.startDs); - + + assert(v != g.acceptEod && v != g.accept); + + const CharReach &cr = g[v].char_reach; + if (cr.count() != 1 && !cr.isCaselessChar()) { + break; + } + + // Rose can only handle mixed-sensitivity literals up to the max mask + // length. + if (lit_out->length() >= MAX_MASK2_WIDTH) { + if (mixed_sensitivity(*lit_out)) { + DEBUG_PRINTF("long and mixed sensitivity\n"); + break; + } + if (ourisalpha((char)cr.find_first())) { + if (cr.isCaselessChar() != lit_out->any_nocase()) { + DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n", + (char)cr.find_first()); + break; + } + } + } + + if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { + DEBUG_PRINTF("connection to accept\n"); + break; + } + + lit_out->push_back(cr.find_first(), cr.isCaselessChar()); + u = v; + + if (out_degree(v, g) != 1) { + DEBUG_PRINTF("out_degree != 1\n"); + break; + } + + v = *adjacent_vertices(v, g).first; + + if (in_degree(v, g) != 1) { + DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path + * to case accept (large cycles), + * ensures term */ + break; + } + } + + if (lit_out->empty()) { + return false; + } + assert(u != g.startDs); + unordered_map<NFAVertex, NFAVertex> rhs_map; vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g)); - splitRHS(g, pivots, rhs, &rhs_map); - - DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), - lit_out->length()); - assert(is_triggered(*rhs)); - return true; -} - -bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { - if (in_degree(g.acceptEod, g) != 1) { - return false; - } - - NFAVertex v = getSoleSourceVertex(g, g.accept); - - if (!v) { - return false; - } - - set<ue2_literal> s = getLiteralSet(g, v, false); - - if (s.size() != 1) { - return false; - } - - const ue2_literal &lit = *s.begin(); - - if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) { - DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n"); - return false; - } - - *lit_out = lit; - return true; -} - + splitRHS(g, pivots, rhs, &rhs_map); + + DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), + lit_out->length()); + assert(is_triggered(*rhs)); + return true; +} + +bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { + if (in_degree(g.acceptEod, g) != 1) { + return false; + } + + NFAVertex v = getSoleSourceVertex(g, g.accept); + + if (!v) { + return false; + } + + set<ue2_literal> s = getLiteralSet(g, v, false); + + if (s.size() != 1) { + return false; + } + + const ue2_literal &lit = *s.begin(); + + if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) { + DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n"); + return false; + } + + *lit_out = lit; + return true; +} + bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { NFAVertex v = g.accept; @@ -894,4 +894,4 @@ bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { return true; } -} // namespace ue2 +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h index 6bb8755610..943a6d33c9 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h @@ -1,62 +1,62 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Literal analysis and scoring. - */ - -#ifndef NG_LITERAL_ANALYSIS_H -#define NG_LITERAL_ANALYSIS_H - -#include <set> -#include <vector> - -#include "ng_holder.h" -#include "util/ue2string.h" - -namespace ue2 { - -#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Literal analysis and scoring. + */ + +#ifndef NG_LITERAL_ANALYSIS_H +#define NG_LITERAL_ANALYSIS_H + +#include <set> +#include <vector> + +#include "ng_holder.h" +#include "util/ue2string.h" + +namespace ue2 { + +#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL #define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */ - -class NGHolder; - -/** - * Fetch the literal set for a given vertex, returning it in \p s. Note: does - * NOT take into account any constraints due to streaming mode requirements. - * - * if only_first_encounter is requested, the output set may drop literals - * generated by revisiting the destination vertex. - */ -std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, - bool only_first_encounter = true); -std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e); - + +class NGHolder; + +/** + * Fetch the literal set for a given vertex, returning it in \p s. Note: does + * NOT take into account any constraints due to streaming mode requirements. + * + * if only_first_encounter is requested, the output set may drop literals + * generated by revisiting the destination vertex. + */ +std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, + bool only_first_encounter = true); +std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e); + /** * Returns true if we are unable to use a mixed sensitivity literal in rose (as * our literal matchers are generally either case sensitive or not). @@ -68,31 +68,31 @@ bool bad_mixed_sensitivity(const ue2_literal &s); /** * Score all the edges in the given graph, returning them in \p scores indexed - * by edge_index. */ + * by edge_index. */ std::vector<u64a> scoreEdges(const NGHolder &h, const flat_set<NFAEdge> &known_bad = {}); - -/** Returns a score for a literal set. Lower scores are better. */ -u64a scoreSet(const std::set<ue2_literal> &s); - -/** Compress a literal set to fewer literals. */ -u64a compressAndScore(std::set<ue2_literal> &s); - + +/** Returns a score for a literal set. Lower scores are better. */ +u64a scoreSet(const std::set<ue2_literal> &s); + +/** Compress a literal set to fewer literals. */ +u64a compressAndScore(std::set<ue2_literal> &s); + /** * Compress a literal set to fewer literals and replace any long mixed * sensitivity literals with supported literals. */ u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s); -bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs); - -bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); - +bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, + NGHolder *rhs); + +bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); + /** \brief Returns true if the given literal is the only thing in the graph, * from (start or startDs) to accept. */ bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp index 4d3965dfe2..4e085d9913 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp @@ -1,227 +1,227 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Literal Component Splitting. Identifies literals that span the - * graph and moves them into Rose. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Literal Component Splitting. Identifies literals that span the + * graph and moves them into Rose. + */ #include "ng_literal_component.h" -#include "grey.h" -#include "ng.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "ue2common.h" +#include "grey.h" +#include "ng.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "ue2common.h" #include "compiler/compiler.h" -#include "rose/rose_build.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/ue2string.h" - +#include "rose/rose_build.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/ue2string.h" + #include <unordered_set> -using namespace std; - -namespace ue2 { - -static +using namespace std; + +namespace ue2 { + +static bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, bool &casefixed) { - const CharReach &cr = g[v].char_reach; - const size_t num = cr.count(); - if (num > 2) { - return false; // char class - } - - if (!casefixed) { - if (num == 2 && cr.isCaselessChar()) { - nocase = true; - casefixed = true; - return true; - } else if (num == 1) { - if (cr.isAlpha()) { - nocase = false; - casefixed = true; - } - // otherwise, still acceptable but we can't fix caselessness yet - return true; - } - } else { - // nocase property is fixed - if (nocase) { - if ((num == 2 && cr.isCaselessChar()) || - (num == 1 && !cr.isAlpha())) { - return true; - } - } else { - return (num == 1); - } - } - - return false; -} - -static -void addToString(string &s, const NGHolder &g, NFAVertex v) { - const CharReach &cr = g[v].char_reach; - assert(cr.count() == 1 || cr.isCaselessChar()); - - char c = (char)cr.find_first(); - s.push_back(c); -} - -static + const CharReach &cr = g[v].char_reach; + const size_t num = cr.count(); + if (num > 2) { + return false; // char class + } + + if (!casefixed) { + if (num == 2 && cr.isCaselessChar()) { + nocase = true; + casefixed = true; + return true; + } else if (num == 1) { + if (cr.isAlpha()) { + nocase = false; + casefixed = true; + } + // otherwise, still acceptable but we can't fix caselessness yet + return true; + } + } else { + // nocase property is fixed + if (nocase) { + if ((num == 2 && cr.isCaselessChar()) || + (num == 1 && !cr.isAlpha())) { + return true; + } + } else { + return (num == 1); + } + } + + return false; +} + +static +void addToString(string &s, const NGHolder &g, NFAVertex v) { + const CharReach &cr = g[v].char_reach; + assert(cr.count() == 1 || cr.isCaselessChar()); + + char c = (char)cr.find_first(); + s.push_back(c); +} + +static bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, - set<NFAVertex> &dead) { + set<NFAVertex> &dead) { DEBUG_PRINTF("examine vertex %zu\n", g[v].index); - bool nocase = false, casefixed = false; - - assert(!is_special(v, g)); - - size_t reqInDegree; - if (anchored) { - reqInDegree = 1; - assert(edge(g.start, v, g).second); - } else { - reqInDegree = 2; - assert(edge(g.start, v, g).second); - assert(edge(g.startDs, v, g).second); - } + bool nocase = false, casefixed = false; + + assert(!is_special(v, g)); + + size_t reqInDegree; + if (anchored) { + reqInDegree = 1; + assert(edge(g.start, v, g).second); + } else { + reqInDegree = 2; + assert(edge(g.start, v, g).second); + assert(edge(g.startDs, v, g).second); + } if (in_degree(v, g) > reqInDegree) { - DEBUG_PRINTF("extra in-edges\n"); - return false; - } - - if (!isLiteralChar(g, v, nocase, casefixed)) { - DEBUG_PRINTF("not literal\n"); - return false; - } - - string literal; - addToString(literal, g, v); - - // Remaining vertices must come in a chain, each with one in-edge and one - // out-edge only. - NFAVertex u; - while (1) { - if (out_degree(v, g) != 1) { - DEBUG_PRINTF("branches, not literal\n"); - return false; - } - - u = v; // previous vertex - v = *(adjacent_vertices(v, g).first); - + DEBUG_PRINTF("extra in-edges\n"); + return false; + } + + if (!isLiteralChar(g, v, nocase, casefixed)) { + DEBUG_PRINTF("not literal\n"); + return false; + } + + string literal; + addToString(literal, g, v); + + // Remaining vertices must come in a chain, each with one in-edge and one + // out-edge only. + NFAVertex u; + while (1) { + if (out_degree(v, g) != 1) { + DEBUG_PRINTF("branches, not literal\n"); + return false; + } + + u = v; // previous vertex + v = *(adjacent_vertices(v, g).first); + DEBUG_PRINTF("loop, v=%zu\n", g[v].index); - - if (is_special(v, g)) { - if (v == g.accept || v == g.acceptEod) { - break; // OK - } else { - assert(0); // start? - return false; - } - } else { - // Ordinary, must be literal - if (!isLiteralChar(g, v, nocase, casefixed)) { - DEBUG_PRINTF("not literal\n"); - return false; - } - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("branches, not literal\n"); - return false; - } - } - - addToString(literal, g, v); - } - - // Successfully found a literal; there might be multiple report IDs, in - // which case we add all the reports. - assert(!is_special(u, g)); - bool eod = v == g.acceptEod; - assert(eod || v == g.accept); - - DEBUG_PRINTF("success: found %s literal '%s'\n", - anchored ? "anchored" : "unanchored", - escapeString(literal).c_str()); - - // Literals of length 1 are better served going through later optimisation - // passes, where they might be combined together into a character class. - if (literal.length() == 1) { - DEBUG_PRINTF("skipping literal of length 1\n"); - return false; - } - - ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports); - - // Remove the terminal vertex. Later, we rely on pruneUseless to remove the - // other vertices in this chain, since they'll no longer lead to an accept. - dead.insert(u); - - return true; -} - -/** \brief Split off literals. True if any changes were made to the graph. */ + + if (is_special(v, g)) { + if (v == g.accept || v == g.acceptEod) { + break; // OK + } else { + assert(0); // start? + return false; + } + } else { + // Ordinary, must be literal + if (!isLiteralChar(g, v, nocase, casefixed)) { + DEBUG_PRINTF("not literal\n"); + return false; + } + if (in_degree(v, g) != 1) { + DEBUG_PRINTF("branches, not literal\n"); + return false; + } + } + + addToString(literal, g, v); + } + + // Successfully found a literal; there might be multiple report IDs, in + // which case we add all the reports. + assert(!is_special(u, g)); + bool eod = v == g.acceptEod; + assert(eod || v == g.accept); + + DEBUG_PRINTF("success: found %s literal '%s'\n", + anchored ? "anchored" : "unanchored", + escapeString(literal).c_str()); + + // Literals of length 1 are better served going through later optimisation + // passes, where they might be combined together into a character class. + if (literal.length() == 1) { + DEBUG_PRINTF("skipping literal of length 1\n"); + return false; + } + + ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports); + + // Remove the terminal vertex. Later, we rely on pruneUseless to remove the + // other vertices in this chain, since they'll no longer lead to an accept. + dead.insert(u); + + return true; +} + +/** \brief Split off literals. True if any changes were made to the graph. */ bool splitOffLiterals(NG &ng, NGHolder &g) { if (!ng.cc.grey.allowLiteral) { - return false; - } - - bool changed = false; - set<NFAVertex> dead; - + return false; + } + + bool changed = false; + set<NFAVertex> dead; + unordered_set<NFAVertex> unanchored; // for faster lookup. - insert(&unanchored, adjacent_vertices(g.startDs, g)); - - // Anchored literals. - for (auto v : adjacent_vertices_range(g.start, g)) { - if (!is_special(v, g) && !contains(unanchored, v)) { - changed |= splitOffLiteral(ng, g, v, true, dead); - } - } - - // Unanchored literals. - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (!is_special(v, g)) { - changed |= splitOffLiteral(ng, g, v, false, dead); - } - } - - if (changed) { - remove_vertices(dead, g); - pruneUseless(g); - return true; - } - - return false; -} - -} // namespace ue2 + insert(&unanchored, adjacent_vertices(g.startDs, g)); + + // Anchored literals. + for (auto v : adjacent_vertices_range(g.start, g)) { + if (!is_special(v, g) && !contains(unanchored, v)) { + changed |= splitOffLiteral(ng, g, v, true, dead); + } + } + + // Unanchored literals. + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (!is_special(v, g)) { + changed |= splitOffLiteral(ng, g, v, false, dead); + } + } + + if (changed) { + remove_vertices(dead, g); + pruneUseless(g); + return true; + } + + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h index 1f284ce367..0cd8422ae7 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h @@ -1,47 +1,47 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Literal Component Splitting. Identifies literals that span the - * graph and moves them into Rose. - */ - -#ifndef NG_LITERAL_COMPONENT_H -#define NG_LITERAL_COMPONENT_H - -namespace ue2 { - -class NG; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Literal Component Splitting. Identifies literals that span the + * graph and moves them into Rose. + */ + +#ifndef NG_LITERAL_COMPONENT_H +#define NG_LITERAL_COMPONENT_H + +namespace ue2 { + +class NG; class NGHolder; - -/** \brief Split off literals. True if any changes were made to the graph. */ + +/** \brief Split off literals. True if any changes were made to the graph. */ bool splitOffLiterals(NG &ng, NGHolder &g); - -} // namespace ue2 - -#endif // NG_LITERAL_COMPONENT_H + +} // namespace ue2 + +#endif // NG_LITERAL_COMPONENT_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp index 61a31dbf34..5d2f4ca5df 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp @@ -1,252 +1,252 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Analysis for literals decorated by leading/trailing assertions or - * character classes. - */ -#include "ng_literal_decorated.h" - -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_util.h" -#include "rose/rose_build.h" -#include "rose/rose_in_graph.h" -#include "rose/rose_in_util.h" -#include "util/compile_context.h" -#include "util/dump_charclass.h" -#include "util/make_unique.h" - -#include <algorithm> -#include <memory> -#include <sstream> - -using namespace std; - -namespace ue2 { - -namespace { - -/** \brief Max fixed-width paths to generate from a graph. */ -static constexpr size_t MAX_PATHS = 10; - -/** \brief Max degree for any non-special vertex in the graph. */ -static constexpr size_t MAX_VERTEX_DEGREE = 6; - -using Path = vector<NFAVertex>; - -} // namespace - -static -bool findPaths(const NGHolder &g, vector<Path> &paths) { - vector<NFAVertex> order = getTopoOrdering(g); - - vector<size_t> read_count(num_vertices(g)); - vector<vector<Path>> built(num_vertices(g)); - - for (auto it = order.rbegin(); it != order.rend(); ++it) { - NFAVertex v = *it; - auto &out = built[g[v].index]; - assert(out.empty()); - - read_count[g[v].index] = out_degree(v, g); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Analysis for literals decorated by leading/trailing assertions or + * character classes. + */ +#include "ng_literal_decorated.h" + +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_util.h" +#include "rose/rose_build.h" +#include "rose/rose_in_graph.h" +#include "rose/rose_in_util.h" +#include "util/compile_context.h" +#include "util/dump_charclass.h" +#include "util/make_unique.h" + +#include <algorithm> +#include <memory> +#include <sstream> + +using namespace std; + +namespace ue2 { + +namespace { + +/** \brief Max fixed-width paths to generate from a graph. */ +static constexpr size_t MAX_PATHS = 10; + +/** \brief Max degree for any non-special vertex in the graph. */ +static constexpr size_t MAX_VERTEX_DEGREE = 6; + +using Path = vector<NFAVertex>; + +} // namespace + +static +bool findPaths(const NGHolder &g, vector<Path> &paths) { + vector<NFAVertex> order = getTopoOrdering(g); + + vector<size_t> read_count(num_vertices(g)); + vector<vector<Path>> built(num_vertices(g)); + + for (auto it = order.rbegin(); it != order.rend(); ++it) { + NFAVertex v = *it; + auto &out = built[g[v].index]; + assert(out.empty()); + + read_count[g[v].index] = out_degree(v, g); + DEBUG_PRINTF("setting read_count to %zu for %zu\n", - read_count[g[v].index], g[v].index); - - if (v == g.start || v == g.startDs) { - out.push_back({v}); - continue; - } - - // The paths to v are the paths to v's predecessors, with v added to - // the end of each. - for (auto u : inv_adjacent_vertices_range(v, g)) { - // We have a stylized connection from start -> startDs, but we - // don't need anchored and unanchored versions of the same path. - if (u == g.start && edge(g.startDs, v, g).second) { - continue; - } - - // Similarly, avoid the accept->acceptEod edge. - if (u == g.accept) { - assert(v == g.acceptEod); - continue; - } - - assert(!built[g[u].index].empty()); - assert(read_count[g[u].index]); - - for (const auto &p : built[g[u].index]) { - out.push_back(p); - out.back().push_back(v); - - if (out.size() > MAX_PATHS) { - // All these paths should eventually end up at a sink, so - // we've blown past our limit. - DEBUG_PRINTF("path limit exceeded\n"); - return false; - } - } - - read_count[g[u].index]--; - if (!read_count[g[u].index]) { + read_count[g[v].index], g[v].index); + + if (v == g.start || v == g.startDs) { + out.push_back({v}); + continue; + } + + // The paths to v are the paths to v's predecessors, with v added to + // the end of each. + for (auto u : inv_adjacent_vertices_range(v, g)) { + // We have a stylized connection from start -> startDs, but we + // don't need anchored and unanchored versions of the same path. + if (u == g.start && edge(g.startDs, v, g).second) { + continue; + } + + // Similarly, avoid the accept->acceptEod edge. + if (u == g.accept) { + assert(v == g.acceptEod); + continue; + } + + assert(!built[g[u].index].empty()); + assert(read_count[g[u].index]); + + for (const auto &p : built[g[u].index]) { + out.push_back(p); + out.back().push_back(v); + + if (out.size() > MAX_PATHS) { + // All these paths should eventually end up at a sink, so + // we've blown past our limit. + DEBUG_PRINTF("path limit exceeded\n"); + return false; + } + } + + read_count[g[u].index]--; + if (!read_count[g[u].index]) { DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); - built[g[u].index].clear(); - built[g[u].index].shrink_to_fit(); - } - } - } - - insert(&paths, paths.end(), built[NODE_ACCEPT]); - insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]); - - DEBUG_PRINTF("%zu paths generated\n", paths.size()); - - return paths.size() <= MAX_PATHS; -} - -static -bool hasLargeDegreeVertex(const NGHolder &g) { - for (const auto &v : vertices_range(g)) { - if (is_special(v, g)) { // specials can have large degree - continue; - } + built[g[u].index].clear(); + built[g[u].index].shrink_to_fit(); + } + } + } + + insert(&paths, paths.end(), built[NODE_ACCEPT]); + insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]); + + DEBUG_PRINTF("%zu paths generated\n", paths.size()); + + return paths.size() <= MAX_PATHS; +} + +static +bool hasLargeDegreeVertex(const NGHolder &g) { + for (const auto &v : vertices_range(g)) { + if (is_special(v, g)) { // specials can have large degree + continue; + } if (degree(v, g) > MAX_VERTEX_DEGREE) { DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index, degree(v, g)); - return true; - } - } - return false; -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpPath(const NGHolder &g, const Path &path) { - ostringstream oss; - for (const auto &v : path) { - switch (g[v].index) { - case NODE_START: - oss << "<start>"; - break; - case NODE_START_DOTSTAR: - oss << "<startDs>"; - break; - case NODE_ACCEPT: - oss << "<accept>"; - break; - case NODE_ACCEPT_EOD: - oss << "<acceptEod>"; - break; - default: - oss << describeClass(g[v].char_reach); - break; - } - } - return oss.str(); -} -#endif - -struct PathMask { - PathMask(const NGHolder &g, const Path &path) - : is_anchored(path.front() == g.start), - is_eod(path.back() == g.acceptEod) { - assert(path.size() >= 2); - mask.reserve(path.size() - 2); - for (const auto &v : path) { - if (is_special(v, g)) { - continue; - } - mask.push_back(g[v].char_reach); - } - - // Reports are attached to the second-to-last vertex. + return true; + } + } + return false; +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpPath(const NGHolder &g, const Path &path) { + ostringstream oss; + for (const auto &v : path) { + switch (g[v].index) { + case NODE_START: + oss << "<start>"; + break; + case NODE_START_DOTSTAR: + oss << "<startDs>"; + break; + case NODE_ACCEPT: + oss << "<accept>"; + break; + case NODE_ACCEPT_EOD: + oss << "<acceptEod>"; + break; + default: + oss << describeClass(g[v].char_reach); + break; + } + } + return oss.str(); +} +#endif + +struct PathMask { + PathMask(const NGHolder &g, const Path &path) + : is_anchored(path.front() == g.start), + is_eod(path.back() == g.acceptEod) { + assert(path.size() >= 2); + mask.reserve(path.size() - 2); + for (const auto &v : path) { + if (is_special(v, g)) { + continue; + } + mask.push_back(g[v].char_reach); + } + + // Reports are attached to the second-to-last vertex. NFAVertex u = *std::next(path.rbegin()); reports = g[u].reports; - assert(!reports.empty()); - } - - vector<CharReach> mask; + assert(!reports.empty()); + } + + vector<CharReach> mask; flat_set<ReportID> reports; - bool is_anchored; - bool is_eod; -}; - -bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, - const CompileContext &cc) { - if (!cc.grey.allowDecoratedLiteral) { - return false; - } - - if (!isAcyclic(g)) { - DEBUG_PRINTF("not acyclic\n"); - return false; - } - + bool is_anchored; + bool is_eod; +}; + +bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, + const CompileContext &cc) { + if (!cc.grey.allowDecoratedLiteral) { + return false; + } + + if (!isAcyclic(g)) { + DEBUG_PRINTF("not acyclic\n"); + return false; + } + if (!hasNarrowReachVertex(g)) { DEBUG_PRINTF("no narrow reach vertices\n"); return false; } - if (hasLargeDegreeVertex(g)) { - DEBUG_PRINTF("large degree\n"); - return false; - } - - vector<Path> paths; - if (!findPaths(g, paths)) { - DEBUG_PRINTF("couldn't split into a small number of paths\n"); - return false; - } - - assert(!paths.empty()); - assert(paths.size() <= MAX_PATHS); - - vector<PathMask> masks; - masks.reserve(paths.size()); - - for (const auto &path : paths) { - DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str()); - PathMask pm(g, path); - if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored, - pm.is_eod)) { - DEBUG_PRINTF("failed validation\n"); - return false; - } - masks.push_back(move(pm)); - } - - for (const auto &pm : masks) { - rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod); - } - - DEBUG_PRINTF("all ok, %zu masks added\n", masks.size()); - return true; -} - -} // namespace ue2 + if (hasLargeDegreeVertex(g)) { + DEBUG_PRINTF("large degree\n"); + return false; + } + + vector<Path> paths; + if (!findPaths(g, paths)) { + DEBUG_PRINTF("couldn't split into a small number of paths\n"); + return false; + } + + assert(!paths.empty()); + assert(paths.size() <= MAX_PATHS); + + vector<PathMask> masks; + masks.reserve(paths.size()); + + for (const auto &path : paths) { + DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str()); + PathMask pm(g, path); + if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored, + pm.is_eod)) { + DEBUG_PRINTF("failed validation\n"); + return false; + } + masks.push_back(move(pm)); + } + + for (const auto &pm : masks) { + rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod); + } + + DEBUG_PRINTF("all ok, %zu masks added\n", masks.size()); + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h index ff18c7d746..603679e809 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h @@ -1,52 +1,52 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Analysis for literals decorated by leading/trailing assertions or - * character classes. - */ - -#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H -#define NFAGRAPH_NG_LITERAL_DECORATED_H - -namespace ue2 { - -class RoseBuild; -class NGHolder; -struct CompileContext; - -/** - * \brief If the graph contains only a decorated literal, feed it to the Rose - * builder. Returns true on success. - */ -bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NFAGRAPH_NG_LITERAL_DECORATED_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Analysis for literals decorated by leading/trailing assertions or + * character classes. + */ + +#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H +#define NFAGRAPH_NG_LITERAL_DECORATED_H + +namespace ue2 { + +class RoseBuild; +class NGHolder; +struct CompileContext; + +/** + * \brief If the graph contains only a decorated literal, feed it to the Rose + * builder. Returns true on success. + */ +bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, + const CompileContext &cc); + +} // namespace ue2 + +#endif // NFAGRAPH_NG_LITERAL_DECORATED_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp index 4ce5dc153b..7d84aabe30 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp @@ -1,352 +1,352 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for McClellan DFA. - */ -#include "ng_mcclellan.h" - -#include "grey.h" -#include "nfa/dfa_min.h" -#include "nfa/rdfa.h" -#include "ng_holder.h" -#include "ng_mcclellan_internal.h" -#include "ng_squash.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/bitfield.h" -#include "util/determinise.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for McClellan DFA. + */ +#include "ng_mcclellan.h" + +#include "grey.h" +#include "nfa/dfa_min.h" +#include "nfa/rdfa.h" +#include "ng_holder.h" +#include "ng_mcclellan_internal.h" +#include "ng_squash.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/bitfield.h" +#include "util/determinise.h" #include "util/flat_containers.h" -#include "util/graph_range.h" +#include "util/graph_range.h" #include "util/hash.h" #include "util/hash_dynamic_bitset.h" -#include "util/make_unique.h" -#include "util/report_manager.h" - -#include <algorithm> -#include <functional> -#include <map> -#include <set> +#include "util/make_unique.h" +#include "util/report_manager.h" + +#include <algorithm> +#include <functional> +#include <map> +#include <set> #include <unordered_map> -#include <vector> - -#include <boost/dynamic_bitset.hpp> - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -#define FINAL_DFA_STATE_LIMIT 16383 -#define DFA_STATE_LIMIT 1024 -#define NFA_STATE_LIMIT 256 - -u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets, - array<u16, ALPHABET_SIZE> &alpha, - array<u16, ALPHABET_SIZE> &unalpha) { - u16 i = 0; - for (; i < esets.size(); i++) { - const CharReach &cr = esets[i]; - -#ifdef DEBUG - DEBUG_PRINTF("eq set: "); - for (size_t s = cr.find_first(); s != CharReach::npos; - s = cr.find_next(s)) { - printf("%02hhx ", (u8)s); - } - printf("-> %u\n", i); -#endif - u16 leader = cr.find_first(); - for (size_t s = cr.find_first(); s != CharReach::npos; - s = cr.find_next(s)) { - alpha[s] = i; - } - unalpha[i] = leader; - } - - for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) { - alpha[j] = i; - unalpha[i] = j; - } - - return i; // alphabet size -} - -void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha, - array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) { - vector<CharReach> esets(1, CharReach::dot()); - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - const CharReach &cr = g[v].char_reach; - - for (size_t i = 0; i < esets.size(); i++) { - if (esets[i].count() == 1) { - continue; - } - - CharReach t = cr & esets[i]; - if (t.any() && t != esets[i]) { - esets[i] &= ~t; - esets.push_back(t); - } - } - } - // for deterministic compiles - sort(esets.begin(), esets.end()); - - assert(alphasize); - *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); -} - -static -bool allExternalReports(const ReportManager &rm, - const flat_set<ReportID> &reports) { - for (auto report_id : reports) { - if (!isExternalReport(rm.getReport(report_id))) { - return false; - } - } - - return true; -} - -static -dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c, - const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) { - return dstates[c].next[alpha[s]]; -} - -void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state, - dstate_id_t *out_table) { - for (u32 i = 0; i < ALPHABET_SIZE; i++) { - out_table[i] = successor(n.states, state, n.alpha_remap, i); - } -} - -template<typename stateset> -static +#include <vector> + +#include <boost/dynamic_bitset.hpp> + +using namespace std; +using boost::dynamic_bitset; + +namespace ue2 { + +#define FINAL_DFA_STATE_LIMIT 16383 +#define DFA_STATE_LIMIT 1024 +#define NFA_STATE_LIMIT 256 + +u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets, + array<u16, ALPHABET_SIZE> &alpha, + array<u16, ALPHABET_SIZE> &unalpha) { + u16 i = 0; + for (; i < esets.size(); i++) { + const CharReach &cr = esets[i]; + +#ifdef DEBUG + DEBUG_PRINTF("eq set: "); + for (size_t s = cr.find_first(); s != CharReach::npos; + s = cr.find_next(s)) { + printf("%02hhx ", (u8)s); + } + printf("-> %u\n", i); +#endif + u16 leader = cr.find_first(); + for (size_t s = cr.find_first(); s != CharReach::npos; + s = cr.find_next(s)) { + alpha[s] = i; + } + unalpha[i] = leader; + } + + for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) { + alpha[j] = i; + unalpha[i] = j; + } + + return i; // alphabet size +} + +void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha, + array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) { + vector<CharReach> esets(1, CharReach::dot()); + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + const CharReach &cr = g[v].char_reach; + + for (size_t i = 0; i < esets.size(); i++) { + if (esets[i].count() == 1) { + continue; + } + + CharReach t = cr & esets[i]; + if (t.any() && t != esets[i]) { + esets[i] &= ~t; + esets.push_back(t); + } + } + } + // for deterministic compiles + sort(esets.begin(), esets.end()); + + assert(alphasize); + *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); +} + +static +bool allExternalReports(const ReportManager &rm, + const flat_set<ReportID> &reports) { + for (auto report_id : reports) { + if (!isExternalReport(rm.getReport(report_id))) { + return false; + } + } + + return true; +} + +static +dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c, + const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) { + return dstates[c].next[alpha[s]]; +} + +void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state, + dstate_id_t *out_table) { + for (u32 i = 0; i < ALPHABET_SIZE; i++) { + out_table[i] = successor(n.states, state, n.alpha_remap, i); + } +} + +template<typename stateset> +static void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, - stateset *init, stateset *init_deep, - vector<NFAVertex> *v_by_index) { - for (auto v : vertices_range(g)) { + stateset *init, stateset *init_deep, + vector<NFAVertex> *v_by_index) { + for (auto v : vertices_range(g)) { if (contains(unused, v)) { - continue; - } - - u32 vert_id = g[v].index; - assert(vert_id < init->size()); - - if (is_any_start(v, g)) { - init->set(vert_id); - if (hasSelfLoop(v, g) || is_triggered(g)) { - DEBUG_PRINTF("setting %u\n", vert_id); - init_deep->set(vert_id); - } - } - } - - v_by_index->clear(); + continue; + } + + u32 vert_id = g[v].index; + assert(vert_id < init->size()); + + if (is_any_start(v, g)) { + init->set(vert_id); + if (hasSelfLoop(v, g) || is_triggered(g)) { + DEBUG_PRINTF("setting %u\n", vert_id); + init_deep->set(vert_id); + } + } + } + + v_by_index->clear(); v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); - - for (auto v : vertices_range(g)) { - u32 vert_id = g[v].index; + + for (auto v : vertices_range(g)) { + u32 vert_id = g[v].index; assert((*v_by_index)[vert_id] == NGHolder::null_vertex()); - (*v_by_index)[vert_id] = v; - } - - if (is_triggered(g)) { - *init_deep = *init; - } -} - -template<typename StateSet> + (*v_by_index)[vert_id] = v; + } + + if (is_triggered(g)) { + *init_deep = *init; + } +} + +template<typename StateSet> void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused, - StateSet *accept, StateSet *acceptEod) { - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + StateSet *accept, StateSet *acceptEod) { + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (contains(unused, v)) { continue; - } + } accept->set(g[v].index); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } if (contains(unused, v)) { continue; - } + } acceptEod->set(g[v].index); - } -} - -static -bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) { - bool seen = false; - u32 ekey = 0; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(v, g)) { - continue; - } - - for (auto report_id : g[v].reports) { - const Report &ir = rm.getReport(report_id); - - if (!isSimpleExhaustible(ir)) { - return false; - } - - if (!seen) { - seen = true; - ekey = ir.ekey; - } else if (ekey != ir.ekey) { - return false; - } - } - } - - /* need to check accept eod does not have any unseen reports as well */ - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (is_special(v, g)) { - continue; - } - - for (auto report_id : g[v].reports) { - const Report &ir = rm.getReport(report_id); - - if (!isSimpleExhaustible(ir)) { - return false; - } - - if (!seen) { - seen = true; - ekey = ir.ekey; - } else if (ekey != ir.ekey) { - return false; - } - } - } - - return true; -} - -static -bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers, - vector<CharReach>::const_reverse_iterator itb, - vector<CharReach>::const_reverse_iterator ite) { - for (const auto &trigger : all_triggers) { - vector<CharReach>::const_reverse_iterator it = itb; - vector<CharReach>::const_reverse_iterator kt = trigger.rbegin(); - for (; it != ite && kt != trigger.rend(); ++it, ++kt) { - if ((*it & *kt).none()) { - /* this trigger does not match the overhang, try next */ - goto try_next_trigger; - } - } - - return true; - try_next_trigger:; - } - - return false; /* no trigger matches the over hang */ -} - -static -bool triggerAllowed(const NGHolder &g, const NFAVertex v, - const vector<vector<CharReach> > &all_triggers, - const vector<CharReach> &trigger) { + } +} + +static +bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) { + bool seen = false; + u32 ekey = 0; + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (is_special(v, g)) { + continue; + } + + for (auto report_id : g[v].reports) { + const Report &ir = rm.getReport(report_id); + + if (!isSimpleExhaustible(ir)) { + return false; + } + + if (!seen) { + seen = true; + ekey = ir.ekey; + } else if (ekey != ir.ekey) { + return false; + } + } + } + + /* need to check accept eod does not have any unseen reports as well */ + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (is_special(v, g)) { + continue; + } + + for (auto report_id : g[v].reports) { + const Report &ir = rm.getReport(report_id); + + if (!isSimpleExhaustible(ir)) { + return false; + } + + if (!seen) { + seen = true; + ekey = ir.ekey; + } else if (ekey != ir.ekey) { + return false; + } + } + } + + return true; +} + +static +bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers, + vector<CharReach>::const_reverse_iterator itb, + vector<CharReach>::const_reverse_iterator ite) { + for (const auto &trigger : all_triggers) { + vector<CharReach>::const_reverse_iterator it = itb; + vector<CharReach>::const_reverse_iterator kt = trigger.rbegin(); + for (; it != ite && kt != trigger.rend(); ++it, ++kt) { + if ((*it & *kt).none()) { + /* this trigger does not match the overhang, try next */ + goto try_next_trigger; + } + } + + return true; + try_next_trigger:; + } + + return false; /* no trigger matches the over hang */ +} + +static +bool triggerAllowed(const NGHolder &g, const NFAVertex v, + const vector<vector<CharReach> > &all_triggers, + const vector<CharReach> &trigger) { flat_set<NFAVertex> curr({v}); flat_set<NFAVertex> next; - - for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) { - next.clear(); - - for (auto u : curr) { - assert(u != g.startDs); /* triggered graphs should not use sds */ - if (u == g.start) { - if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) { - return true; - } - continue; - } - - if ((g[u].char_reach & *it).none()) { - continue; - } - insert(&next, inv_adjacent_vertices(u, g)); - } - - if (next.empty()) { - return false; - } - - next.swap(curr); - } - - return true; -} - + + for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) { + next.clear(); + + for (auto u : curr) { + assert(u != g.startDs); /* triggered graphs should not use sds */ + if (u == g.start) { + if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) { + return true; + } + continue; + } + + if ((g[u].char_reach & *it).none()) { + continue; + } + insert(&next, inv_adjacent_vertices(u, g)); + } + + if (next.empty()) { + return false; + } + + next.swap(curr); + } + + return true; +} + void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, - bool single_trigger, - const vector<vector<CharReach>> &triggers, - dynamic_bitset<> *out) { - if (single_trigger) { - return; /* no live states can lead to new states */ - } - - for (auto v : vertices_range(g)) { + bool single_trigger, + const vector<vector<CharReach>> &triggers, + dynamic_bitset<> *out) { + if (single_trigger) { + return; /* no live states can lead to new states */ + } + + for (auto v : vertices_range(g)) { if (contains(unused, v)) { - continue; - } - for (const auto &trigger : triggers) { - if (triggerAllowed(g, v, triggers, trigger)) { + continue; + } + for (const auto &trigger : triggers) { + if (triggerAllowed(g, v, triggers, trigger)) { DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index); out->set(g[v].index); - break; - } - } - } - - assert(out->test(g[g.start].index)); -} - -namespace { - + break; + } + } + } + + assert(out->test(g[g.start].index)); +} + +namespace { + template<typename Automaton_Traits> class Automaton_Base { -public: +public: using StateSet = typename Automaton_Traits::StateSet; using StateMap = typename Automaton_Traits::StateMap; - + Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, bool single_trigger, const vector<vector<CharReach>> &triggers, bool prunable_in) @@ -362,117 +362,117 @@ public: prunable(prunable_in) { populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, unused, &accept, &acceptEod); - - start_anchored = DEAD_STATE + 1; - if (initDS == init) { - start_floating = start_anchored; - } else if (initDS.any()) { - start_floating = start_anchored + 1; - } else { - start_floating = DEAD_STATE; - } - - calculateAlphabet(graph, alpha, unalpha, &alphasize); - - for (const auto &sq : findSquashers(graph)) { - NFAVertex v = sq.first; - u32 vert_id = graph[v].index; - squash.set(vert_id); + + start_anchored = DEAD_STATE + 1; + if (initDS == init) { + start_floating = start_anchored; + } else if (initDS.any()) { + start_floating = start_anchored + 1; + } else { + start_floating = DEAD_STATE; + } + + calculateAlphabet(graph, alpha, unalpha, &alphasize); + + for (const auto &sq : findSquashers(graph)) { + NFAVertex v = sq.first; + u32 vert_id = graph[v].index; + squash.set(vert_id); squash_mask[vert_id] = Automaton_Traits::copy_states(std::move(sq.second), numStates); - } - - cr_by_index = populateCR(graph, v_by_index, alpha); - if (is_triggered(graph)) { + } + + cr_by_index = populateCR(graph, v_by_index, alpha); + if (is_triggered(graph)) { dynamic_bitset<> temp(numStates); markToppableStarts(graph, unused, single_trigger, triggers, &temp); toppable = Automaton_Traits::copy_states(std::move(temp), numStates); - } - } - -public: - void transition(const StateSet &in, StateSet *next) { - transition_graph(*this, v_by_index, in, next); - } - - const vector<StateSet> initial() { + } + } + +public: + void transition(const StateSet &in, StateSet *next) { + transition_graph(*this, v_by_index, in, next); + } + + const vector<StateSet> initial() { vector<StateSet> rv = {init}; - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(initDS); - } - return rv; - } - -private: - void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { - StateSet acc = in & (eod ? acceptEod : accept); - for (size_t i = acc.find_first(); i != StateSet::npos; - i = acc.find_next(i)) { - NFAVertex v = v_by_index[i]; - DEBUG_PRINTF("marking report\n"); - const auto &my_reports = graph[v].reports; - rv.insert(my_reports.begin(), my_reports.end()); - } - } - -public: - void reports(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, false, rv); - } - void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { - reports_i(in, true, rv); - } - - bool canPrune(const flat_set<ReportID> &test_reports) const { - if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) { - return false; - } - return allExternalReports(*rm, test_reports); - } - -private: - const ReportManager *rm; -public: - const NGHolder &graph; - u32 numStates; + if (start_floating != DEAD_STATE && start_floating != start_anchored) { + rv.push_back(initDS); + } + return rv; + } + +private: + void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { + StateSet acc = in & (eod ? acceptEod : accept); + for (size_t i = acc.find_first(); i != StateSet::npos; + i = acc.find_next(i)) { + NFAVertex v = v_by_index[i]; + DEBUG_PRINTF("marking report\n"); + const auto &my_reports = graph[v].reports; + rv.insert(my_reports.begin(), my_reports.end()); + } + } + +public: + void reports(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, false, rv); + } + void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { + reports_i(in, true, rv); + } + + bool canPrune(const flat_set<ReportID> &test_reports) const { + if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) { + return false; + } + return allExternalReports(*rm, test_reports); + } + +private: + const ReportManager *rm; +public: + const NGHolder &graph; + u32 numStates; const flat_set<NFAVertex> unused; - vector<NFAVertex> v_by_index; - vector<CharReach> cr_by_index; /* pre alpha'ed */ - StateSet init; - StateSet initDS; - StateSet squash; /* states which allow us to mask out other states */ - StateSet accept; - StateSet acceptEod; - StateSet toppable; /* states which are allowed to be on when a top arrives, - * triggered dfas only */ + vector<NFAVertex> v_by_index; + vector<CharReach> cr_by_index; /* pre alpha'ed */ + StateSet init; + StateSet initDS; + StateSet squash; /* states which allow us to mask out other states */ + StateSet accept; + StateSet acceptEod; + StateSet toppable; /* states which are allowed to be on when a top arrives, + * triggered dfas only */ StateSet dead; - map<u32, StateSet> squash_mask; - bool prunable; - array<u16, ALPHABET_SIZE> alpha; - array<u16, ALPHABET_SIZE> unalpha; - u16 alphasize; - - u16 start_anchored; - u16 start_floating; -}; - + map<u32, StateSet> squash_mask; + bool prunable; + array<u16, ALPHABET_SIZE> alpha; + array<u16, ALPHABET_SIZE> unalpha; + u16 alphasize; + + u16 start_anchored; + u16 start_floating; +}; + struct Big_Traits { using StateSet = dynamic_bitset<>; using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; - + static StateSet init_states(u32 num) { return StateSet(num); } - + static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) { assert(in.size() == num); return in; } }; - + class Automaton_Big : public Automaton_Base<Big_Traits> { public: Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, @@ -481,42 +481,42 @@ public: : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; - + struct Graph_Traits { using StateSet = bitfield<NFA_STATE_LIMIT>; using StateMap = unordered_map<StateSet, dstate_id_t>; - + static StateSet init_states(UNUSED u32 num) { assert(num <= NFA_STATE_LIMIT); return StateSet(); - } - + } + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { StateSet out = init_states(num); - for (size_t i = in.find_first(); i != in.npos && i < out.size(); - i = in.find_next(i)) { - out.set(i); - } - return out; - } + for (size_t i = in.find_first(); i != in.npos && i < out.size(); + i = in.find_next(i)) { + out.set(i); + } + return out; + } }; - + class Automaton_Graph : public Automaton_Base<Graph_Traits> { -public: +public: Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, bool single_trigger, const vector<vector<CharReach>> &triggers, bool prunable_in) : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; - + } // namespace - + static bool startIsRedundant(const NGHolder &g) { set<NFAVertex> start; set<NFAVertex> startDs; - + insert(&start, adjacent_vertices(g.start, g)); insert(&startDs, adjacent_vertices(g.startDs, g)); @@ -527,42 +527,42 @@ flat_set<NFAVertex> getRedundantStarts(const NGHolder &g) { flat_set<NFAVertex> dead; if (startIsRedundant(g)) { dead.insert(g.start); - } + } if (proper_out_degree(g.startDs, g) == 0) { dead.insert(g.startDs); - } + } return dead; } - + unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, const ReportManager *rm, bool single_trigger, - const vector<vector<CharReach>> &triggers, - const Grey &grey, bool finalChance) { - if (!grey.allowMcClellan) { - return nullptr; - } - + const vector<vector<CharReach>> &triggers, + const Grey &grey, bool finalChance) { + if (!grey.allowMcClellan) { + return nullptr; + } + DEBUG_PRINTF("attempting to build %s mcclellan\n", to_string(graph.kind).c_str()); - assert(allMatchStatesHaveReports(graph)); - + assert(allMatchStatesHaveReports(graph)); + bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); assert(rm || !has_managed_reports(graph)); if (!has_managed_reports(graph)) { - rm = nullptr; - } - - assert(triggers.empty() == !is_triggered(graph)); - - /* We must be getting desperate if it is an outfix, so use the final chance - * state limit logic */ - u32 state_limit - = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT - : DFA_STATE_LIMIT; - - const u32 numStates = num_vertices(graph); - DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); - + rm = nullptr; + } + + assert(triggers.empty() == !is_triggered(graph)); + + /* We must be getting desperate if it is an outfix, so use the final chance + * state limit logic */ + u32 state_limit + = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT + : DFA_STATE_LIMIT; + + const u32 numStates = num_vertices(graph); + DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); + if (numStates > FINAL_DFA_STATE_LIMIT) { DEBUG_PRINTF("rejecting nfa as too many vertices\n"); return nullptr; @@ -570,47 +570,47 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, auto rdfa = ue2::make_unique<raw_dfa>(graph.kind); - if (numStates <= NFA_STATE_LIMIT) { - /* Fast path. Automaton_Graph uses a bitfield internally to represent - * states and is quicker than Automaton_Big. */ + if (numStates <= NFA_STATE_LIMIT) { + /* Fast path. Automaton_Graph uses a bitfield internally to represent + * states and is quicker than Automaton_Big. */ Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); if (!determinise(n, rdfa->states, state_limit)) { - DEBUG_PRINTF("state limit exceeded\n"); - return nullptr; /* over state limit */ - } - - rdfa->start_anchored = n.start_anchored; - rdfa->start_floating = n.start_floating; - rdfa->alpha_size = n.alphasize; - rdfa->alpha_remap = n.alpha; - } else { - /* Slow path. Too many states to use Automaton_Graph. */ + DEBUG_PRINTF("state limit exceeded\n"); + return nullptr; /* over state limit */ + } + + rdfa->start_anchored = n.start_anchored; + rdfa->start_floating = n.start_floating; + rdfa->alpha_size = n.alphasize; + rdfa->alpha_remap = n.alpha; + } else { + /* Slow path. Too many states to use Automaton_Graph. */ Automaton_Big n(rm, graph, single_trigger, triggers, prunable); if (!determinise(n, rdfa->states, state_limit)) { - DEBUG_PRINTF("state limit exceeded\n"); - return nullptr; /* over state limit */ - } - - rdfa->start_anchored = n.start_anchored; - rdfa->start_floating = n.start_floating; - rdfa->alpha_size = n.alphasize; - rdfa->alpha_remap = n.alpha; - } - - minimize_hopcroft(*rdfa, grey); - - DEBUG_PRINTF("after determinised into %zu states, building impl dfa " - "(a,f) = (%hu,%hu)\n", rdfa->states.size(), - rdfa->start_anchored, rdfa->start_floating); - - return rdfa; -} - -unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm, - const Grey &grey) { - assert(!is_triggered(g)); - vector<vector<CharReach>> triggers; - return buildMcClellan(g, rm, false, triggers, grey); -} - -} // namespace ue2 + DEBUG_PRINTF("state limit exceeded\n"); + return nullptr; /* over state limit */ + } + + rdfa->start_anchored = n.start_anchored; + rdfa->start_floating = n.start_floating; + rdfa->alpha_size = n.alphasize; + rdfa->alpha_remap = n.alpha; + } + + minimize_hopcroft(*rdfa, grey); + + DEBUG_PRINTF("after determinised into %zu states, building impl dfa " + "(a,f) = (%hu,%hu)\n", rdfa->states.size(), + rdfa->start_anchored, rdfa->start_floating); + + return rdfa; +} + +unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm, + const Grey &grey) { + assert(!is_triggered(g)); + vector<vector<CharReach>> triggers; + return buildMcClellan(g, rm, false, triggers, grey); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h index 1a4042ce66..8183a0d2e7 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h @@ -1,81 +1,81 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for McClellan DFA. - */ - -#ifndef NG_MCCLELLAN_H -#define NG_MCCLELLAN_H - -#include "ue2common.h" - -#include <memory> -#include <vector> - -namespace ue2 { - -class CharReach; -class NGHolder; -class ReportManager; -struct Grey; -struct raw_dfa; - -/** - * \brief Determinises an NFA Graph into a raw_dfa. - * - * \param g - * The NGHolder. - * \param rm - * A pointer to the ReportManager, if managed reports are used (e.g. - * for outfixes/suffixes). Otherwise nullptr. - * \param single_trigger - * True if it is known that the nfa will only ever be trigger once. - * \param triggers - * Representing when tops may arrive. Only used by NFA_INFIX and - * NFA_SUFFIX, should be empty for other types. - * \param grey - * Grey box object. - * \param finalChance - * Allows us to build bigger DFAs as the only alternative is an outfix. - * - * \return A raw_dfa, or nullptr on failure (state limit blown). - */ -std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, - const ReportManager *rm, bool single_trigger, - const std::vector<std::vector<CharReach>> &triggers, - const Grey &grey, bool finalChance = false); - -/** Convenience wrapper for non-triggered engines */ -std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, - const ReportManager *rm, - const Grey &grey); - -} // namespace ue2 - -#endif // NG_MCCLELLAN_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for McClellan DFA. + */ + +#ifndef NG_MCCLELLAN_H +#define NG_MCCLELLAN_H + +#include "ue2common.h" + +#include <memory> +#include <vector> + +namespace ue2 { + +class CharReach; +class NGHolder; +class ReportManager; +struct Grey; +struct raw_dfa; + +/** + * \brief Determinises an NFA Graph into a raw_dfa. + * + * \param g + * The NGHolder. + * \param rm + * A pointer to the ReportManager, if managed reports are used (e.g. + * for outfixes/suffixes). Otherwise nullptr. + * \param single_trigger + * True if it is known that the nfa will only ever be trigger once. + * \param triggers + * Representing when tops may arrive. Only used by NFA_INFIX and + * NFA_SUFFIX, should be empty for other types. + * \param grey + * Grey box object. + * \param finalChance + * Allows us to build bigger DFAs as the only alternative is an outfix. + * + * \return A raw_dfa, or nullptr on failure (state limit blown). + */ +std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, + const ReportManager *rm, bool single_trigger, + const std::vector<std::vector<CharReach>> &triggers, + const Grey &grey, bool finalChance = false); + +/** Convenience wrapper for non-triggered engines */ +std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, + const ReportManager *rm, + const Grey &grey); + +} // namespace ue2 + +#endif // NG_MCCLELLAN_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h index f069d7336f..bfe030b0aa 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h @@ -1,73 +1,73 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shared build code for DFAs (McClellan, Haig). - */ - -#ifndef NG_MCCLELLAN_INTERNAL_H -#define NG_MCCLELLAN_INTERNAL_H - -#include "ue2common.h" -#include "nfa/mcclellancompile.h" -#include "nfagraph/ng_holder.h" -#include "util/charreach.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shared build code for DFAs (McClellan, Haig). + */ + +#ifndef NG_MCCLELLAN_INTERNAL_H +#define NG_MCCLELLAN_INTERNAL_H + +#include "ue2common.h" +#include "nfa/mcclellancompile.h" +#include "nfagraph/ng_holder.h" +#include "util/charreach.h" +#include "util/graph_range.h" #include "util/flat_containers.h" - -#include <boost/dynamic_bitset.hpp> - -#include <map> -#include <vector> - -namespace ue2 { - -struct raw_dfa; - -/** Fills alpha, unalpha and returns alphabet size. */ -u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets, - std::array<u16, ALPHABET_SIZE> &alpha, - std::array<u16, ALPHABET_SIZE> &unalpha); - -/** \brief Calculates an alphabet remapping based on the symbols which the - * graph discriminates on. Throws in some special DFA symbols as well. */ -void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha, - std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize); - -void getFullTransitionFromState(const raw_dfa &n, u16 state, - u16 *out_table); - -/** produce a map of states on which it is valid to receive tops */ + +#include <boost/dynamic_bitset.hpp> + +#include <map> +#include <vector> + +namespace ue2 { + +struct raw_dfa; + +/** Fills alpha, unalpha and returns alphabet size. */ +u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets, + std::array<u16, ALPHABET_SIZE> &alpha, + std::array<u16, ALPHABET_SIZE> &unalpha); + +/** \brief Calculates an alphabet remapping based on the symbols which the + * graph discriminates on. Throws in some special DFA symbols as well. */ +void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha, + std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize); + +void getFullTransitionFromState(const raw_dfa &n, u16 state, + u16 *out_table); + +/** produce a map of states on which it is valid to receive tops */ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, - bool single_trigger, - const std::vector<std::vector<CharReach>> &triggers, - boost::dynamic_bitset<> *out); - + bool single_trigger, + const std::vector<std::vector<CharReach>> &triggers, + boost::dynamic_bitset<> *out); + /** * \brief Returns a set of start vertices that will not participate in an * implementation of this graph. These are either starts with no successors or @@ -75,75 +75,75 @@ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, */ flat_set<NFAVertex> getRedundantStarts(const NGHolder &g); -template<typename autom> -void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId, - const typename autom::StateSet &in, - typename autom::StateSet *next) { - typedef typename autom::StateSet StateSet; - const NGHolder &graph = nfa.graph; +template<typename autom> +void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId, + const typename autom::StateSet &in, + typename autom::StateSet *next) { + typedef typename autom::StateSet StateSet; + const NGHolder &graph = nfa.graph; const auto &unused = nfa.unused; - const auto &alpha = nfa.alpha; - const StateSet &squash = nfa.squash; - const std::map<u32, StateSet> &squash_mask = nfa.squash_mask; - const std::vector<CharReach> &cr_by_index = nfa.cr_by_index; - - for (symbol_t s = 0; s < nfa.alphasize; s++) { - next[s].reset(); - } - - /* generate top transitions, false -> top = selfloop */ - bool top_allowed = is_triggered(graph); - - StateSet succ = nfa.dead; - for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { - NFAVertex u = vByStateId[i]; - - for (const auto &v : adjacent_vertices_range(u, graph)) { + const auto &alpha = nfa.alpha; + const StateSet &squash = nfa.squash; + const std::map<u32, StateSet> &squash_mask = nfa.squash_mask; + const std::vector<CharReach> &cr_by_index = nfa.cr_by_index; + + for (symbol_t s = 0; s < nfa.alphasize; s++) { + next[s].reset(); + } + + /* generate top transitions, false -> top = selfloop */ + bool top_allowed = is_triggered(graph); + + StateSet succ = nfa.dead; + for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { + NFAVertex u = vByStateId[i]; + + for (const auto &v : adjacent_vertices_range(u, graph)) { if (contains(unused, v)) { - continue; - } - succ.set(graph[v].index); - } - - if (top_allowed && !nfa.toppable.test(i)) { - /* we don't need to generate a top at this location as we are in - * an nfa state which cannot be on when a trigger arrives. */ - top_allowed = false; - } - } - - StateSet active_squash = succ & squash; - if (active_squash.any()) { - for (size_t j = active_squash.find_first(); j != active_squash.npos; - j = active_squash.find_next(j)) { - succ &= squash_mask.find(j)->second; - } - } - - for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { - const CharReach &cr = cr_by_index[j]; - for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { - next[s].set(j); /* already alpha'ed */ - } - } - - next[alpha[TOP]] = in; - - if (top_allowed) { - /* we don't add in the anchored starts as the only case as the only - * time it is appropriate is if no characters have been consumed.*/ - next[alpha[TOP]] |= nfa.initDS; - - active_squash = next[alpha[TOP]] & squash; - if (active_squash.any()) { - for (size_t j = active_squash.find_first(); j != active_squash.npos; - j = active_squash.find_next(j)) { - next[alpha[TOP]] &= squash_mask.find(j)->second; - } - } - } -} - -} // namespace ue2 - -#endif + continue; + } + succ.set(graph[v].index); + } + + if (top_allowed && !nfa.toppable.test(i)) { + /* we don't need to generate a top at this location as we are in + * an nfa state which cannot be on when a trigger arrives. */ + top_allowed = false; + } + } + + StateSet active_squash = succ & squash; + if (active_squash.any()) { + for (size_t j = active_squash.find_first(); j != active_squash.npos; + j = active_squash.find_next(j)) { + succ &= squash_mask.find(j)->second; + } + } + + for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { + const CharReach &cr = cr_by_index[j]; + for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { + next[s].set(j); /* already alpha'ed */ + } + } + + next[alpha[TOP]] = in; + + if (top_allowed) { + /* we don't add in the anchored starts as the only case as the only + * time it is appropriate is if no characters have been consumed.*/ + next[alpha[TOP]] |= nfa.initDS; + + active_squash = next[alpha[TOP]] & squash; + if (active_squash.any()) { + for (size_t j = active_squash.find_first(); j != active_squash.npos; + j = active_squash.find_next(j)) { + next[alpha[TOP]] &= squash_mask.find(j)->second; + } + } + } +} + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp index 8aaaf99fde..b1c8b9b001 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp @@ -1,556 +1,556 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Miscellaneous optimisations. - * - * We sometimes see patterns of the form: - * - * /^.*<[^<]*foobaz/s - * - * This is bad for Rose as the escapes from the cyclic state are the same as - * the trigger. However, we can transform this into: - * - * /^.*<.*foobaz/s - * - * ... as the first dot star can eat all but the last '<'. - * - * Slightly more formally: - * - * Given a cyclic state v with character reachability v_cr and proper preds - * {p1 .. pn} with character reachability {p1_cr .. pn_cr}. - * - * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr) - * - * v_cr can be replaced with v_cr' without changing the behaviour of the system - * if: - * - * for any given proper pred pi: if pi is set in the nfa then after consuming - * any symbol in v_cr', pi will still be set in the nfa and every successor of - * v is a successor of pi. - * - * The easiest way for this condition to be satisfied is for each proper pred - * pi to have all its preds all have an edge to a pred of pi with a character - * reachability containing v_cr'. There are, however, other ways to establish - * the condition holds. - * - * Note: a similar transformation can be applied in reverse, details left as an - * exercise for the interested reader. */ -#include "ng_misc_opt.h" - -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "util/charreach.h" -#include "util/container.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Miscellaneous optimisations. + * + * We sometimes see patterns of the form: + * + * /^.*<[^<]*foobaz/s + * + * This is bad for Rose as the escapes from the cyclic state are the same as + * the trigger. However, we can transform this into: + * + * /^.*<.*foobaz/s + * + * ... as the first dot star can eat all but the last '<'. + * + * Slightly more formally: + * + * Given a cyclic state v with character reachability v_cr and proper preds + * {p1 .. pn} with character reachability {p1_cr .. pn_cr}. + * + * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr) + * + * v_cr can be replaced with v_cr' without changing the behaviour of the system + * if: + * + * for any given proper pred pi: if pi is set in the nfa then after consuming + * any symbol in v_cr', pi will still be set in the nfa and every successor of + * v is a successor of pi. + * + * The easiest way for this condition to be satisfied is for each proper pred + * pi to have all its preds all have an edge to a pred of pi with a character + * reachability containing v_cr'. There are, however, other ways to establish + * the condition holds. + * + * Note: a similar transformation can be applied in reverse, details left as an + * exercise for the interested reader. */ +#include "ng_misc_opt.h" + +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" #include "util/flat_containers.h" -#include "ue2common.h" - +#include "ue2common.h" + #include <boost/dynamic_bitset.hpp> #include <boost/graph/depth_first_search.hpp> #include <boost/graph/filtered_graph.hpp> -#include <map> -#include <set> -#include <vector> - -using namespace std; +#include <map> +#include <set> +#include <vector> + +using namespace std; using boost::make_filtered_graph; - -namespace ue2 { - -static -void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering, - vector<NFAVertex> *cand) { - for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) { - NFAVertex v = *it; - - if (is_special(v, g) - || !hasSelfLoop(v, g) - || g[v].char_reach.all()) { - continue; - } - - // For `v' to be a candidate, its predecessors must all have the same - // successor set as `v'. - + +namespace ue2 { + +static +void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering, + vector<NFAVertex> *cand) { + for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) { + NFAVertex v = *it; + + if (is_special(v, g) + || !hasSelfLoop(v, g) + || g[v].char_reach.all()) { + continue; + } + + // For `v' to be a candidate, its predecessors must all have the same + // successor set as `v'. + auto succ_v = succs(v, g); flat_set<NFAVertex> succ_u; - - for (auto u : inv_adjacent_vertices_range(v, g)) { - succ_u.clear(); - succ(g, u, &succ_u); - if (succ_v != succ_u) { - goto next_cand; - } - } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + succ_u.clear(); + succ(g, u, &succ_u); + if (succ_v != succ_u) { + goto next_cand; + } + } DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); - cand->push_back(v); - next_cand:; - } -} - -static -void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering, - vector<NFAVertex> *cand) { - for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) { - NFAVertex v = *it; - - if (is_special(v, g) - || !hasSelfLoop(v, g) - || g[v].char_reach.all()) { - continue; - } - - // For `v' to be a candidate, its predecessors must all have the same - // successor set as `v'. - + cand->push_back(v); + next_cand:; + } +} + +static +void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering, + vector<NFAVertex> *cand) { + for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) { + NFAVertex v = *it; + + if (is_special(v, g) + || !hasSelfLoop(v, g) + || g[v].char_reach.all()) { + continue; + } + + // For `v' to be a candidate, its predecessors must all have the same + // successor set as `v'. + auto pred_v = preds(v, g); flat_set<NFAVertex> pred_u; - - for (auto u : adjacent_vertices_range(v, g)) { - pred_u.clear(); - pred(g, u, &pred_u); - if (pred_v != pred_u) { - goto next_cand; - } - } + + for (auto u : adjacent_vertices_range(v, g)) { + pred_u.clear(); + pred(g, u, &pred_u); + if (pred_v != pred_u) { + goto next_cand; + } + } DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); - cand->push_back(v); - next_cand:; - } -} - -/** Find the intersection of the reachability of the predecessors of \p v. */ -static -void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { - add.setall(); - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u != v) { - add &= g[u].char_reach; - } - } -} - -/** Find the intersection of the reachability of the successors of \p v. */ -static -void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { - add.setall(); - for (auto u : adjacent_vertices_range(v, g)) { - if (u != v) { - add &= g[u].char_reach; - } - } -} - -/** The sustain set is used to show that once vertex p is on it stays on given - * the alphabet new_cr. Every vertex pp in the sustain set has the following - * properties: - * -# an edge to p - * -# enough edges to vertices in the sustain set to ensure that a vertex in - * the sustain set will be on after consuming a character. */ -static -set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p, - bool ignore_starts, const CharReach &new_cr) { + cand->push_back(v); + next_cand:; + } +} + +/** Find the intersection of the reachability of the predecessors of \p v. */ +static +void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { + add.setall(); + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u != v) { + add &= g[u].char_reach; + } + } +} + +/** Find the intersection of the reachability of the successors of \p v. */ +static +void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { + add.setall(); + for (auto u : adjacent_vertices_range(v, g)) { + if (u != v) { + add &= g[u].char_reach; + } + } +} + +/** The sustain set is used to show that once vertex p is on it stays on given + * the alphabet new_cr. Every vertex pp in the sustain set has the following + * properties: + * -# an edge to p + * -# enough edges to vertices in the sustain set to ensure that a vertex in + * the sustain set will be on after consuming a character. */ +static +set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p, + bool ignore_starts, const CharReach &new_cr) { auto cand = preds<set<NFAVertex>>(p, g); - if (ignore_starts) { - cand.erase(g.startDs); - } - /* remove elements from cand until the sustain set property holds */ - bool changed; - do { - DEBUG_PRINTF("|cand| %zu\n", cand.size()); - changed = false; - set<NFAVertex>::const_iterator it = cand.begin(); - while (it != cand.end()) { - NFAVertex u = *it; - ++it; - CharReach sus_cr; - for (auto v : adjacent_vertices_range(u, g)) { - if (contains(cand, v)) { - sus_cr |= g[v].char_reach; - } - } - - if (!new_cr.isSubsetOf(sus_cr)) { - cand.erase(u); - changed = true; - } - } - } while (changed); - - /* Note: it may be possible to find a (larger) sustain set for a smaller - * new_cr */ - return cand; -} - -/** Finds the reverse version of the sustain set.. whatever that means. */ -static -set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p, - const CharReach &new_cr) { + if (ignore_starts) { + cand.erase(g.startDs); + } + /* remove elements from cand until the sustain set property holds */ + bool changed; + do { + DEBUG_PRINTF("|cand| %zu\n", cand.size()); + changed = false; + set<NFAVertex>::const_iterator it = cand.begin(); + while (it != cand.end()) { + NFAVertex u = *it; + ++it; + CharReach sus_cr; + for (auto v : adjacent_vertices_range(u, g)) { + if (contains(cand, v)) { + sus_cr |= g[v].char_reach; + } + } + + if (!new_cr.isSubsetOf(sus_cr)) { + cand.erase(u); + changed = true; + } + } + } while (changed); + + /* Note: it may be possible to find a (larger) sustain set for a smaller + * new_cr */ + return cand; +} + +/** Finds the reverse version of the sustain set.. whatever that means. */ +static +set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p, + const CharReach &new_cr) { auto cand = succs<set<NFAVertex>>(p, g); - /* remove elements from cand until the sustain set property holds */ - bool changed; - do { - changed = false; - set<NFAVertex>::const_iterator it = cand.begin(); - while (it != cand.end()) { - NFAVertex u = *it; - ++it; - CharReach sus_cr; - for (auto v : inv_adjacent_vertices_range(u, g)) { - if (contains(cand, v)) { - sus_cr |= g[v].char_reach; - } - } - - if (!new_cr.isSubsetOf(sus_cr)) { - cand.erase(u); - changed = true; - } - } - } while (changed); - - /* Note: it may be possible to find a (larger) sustain set for a smaller - * new_cr */ - return cand; -} - -static -bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { + /* remove elements from cand until the sustain set property holds */ + bool changed; + do { + changed = false; + set<NFAVertex>::const_iterator it = cand.begin(); + while (it != cand.end()) { + NFAVertex u = *it; + ++it; + CharReach sus_cr; + for (auto v : inv_adjacent_vertices_range(u, g)) { + if (contains(cand, v)) { + sus_cr |= g[v].char_reach; + } + } + + if (!new_cr.isSubsetOf(sus_cr)) { + cand.erase(u); + changed = true; + } + } + } while (changed); + + /* Note: it may be possible to find a (larger) sustain set for a smaller + * new_cr */ + return cand; +} + +static +bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { DEBUG_PRINTF("considering vertex %zu\n", g[v].index); - const CharReach &v_cr = g[v].char_reach; - - CharReach add; - predCRIntersection(g, v, add); - - add |= v_cr; - - if (add == v_cr) { - DEBUG_PRINTF("no benefit\n"); - return false; - } - - DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count()); - - for (auto p : inv_adjacent_vertices_range(v, g)) { - if (p == v) { - continue; - } + const CharReach &v_cr = g[v].char_reach; + + CharReach add; + predCRIntersection(g, v, add); + + add |= v_cr; + + if (add == v_cr) { + DEBUG_PRINTF("no benefit\n"); + return false; + } + + DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count()); + + for (auto p : inv_adjacent_vertices_range(v, g)) { + if (p == v) { + continue; + } DEBUG_PRINTF("looking at pred %zu\n", g[p].index); - - bool ignore_sds = som; /* if we are tracking som, entries into a state - from sds are significant. */ - - set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add); - DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); - if (sustain.empty()) { - DEBUG_PRINTF("yawn\n"); - } - - for (auto pp : inv_adjacent_vertices_range(p, g)) { - /* we need to ensure that whenever pp sets p, that a member of the - sustain set is set. Note: p's cr may be not be a subset of - new_cr */ - CharReach sustain_cr; - for (auto pv : adjacent_vertices_range(pp, g)) { - if (contains(sustain, pv)) { - sustain_cr |= g[pv].char_reach; - } - } - if (!g[p].char_reach.isSubsetOf(sustain_cr)) { - DEBUG_PRINTF("unable to establish that preds are forced on\n"); - return false; - } - } - } - - /* the cr can be increased */ - g[v].char_reach = add; + + bool ignore_sds = som; /* if we are tracking som, entries into a state + from sds are significant. */ + + set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add); + DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); + if (sustain.empty()) { + DEBUG_PRINTF("yawn\n"); + } + + for (auto pp : inv_adjacent_vertices_range(p, g)) { + /* we need to ensure that whenever pp sets p, that a member of the + sustain set is set. Note: p's cr may be not be a subset of + new_cr */ + CharReach sustain_cr; + for (auto pv : adjacent_vertices_range(pp, g)) { + if (contains(sustain, pv)) { + sustain_cr |= g[pv].char_reach; + } + } + if (!g[p].char_reach.isSubsetOf(sustain_cr)) { + DEBUG_PRINTF("unable to establish that preds are forced on\n"); + return false; + } + } + } + + /* the cr can be increased */ + g[v].char_reach = add; DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); - return true; -} - -static -bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { + return true; +} + +static +bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { DEBUG_PRINTF("considering vertex %zu\n", g[v].index); - const CharReach &v_cr = g[v].char_reach; - - CharReach add; - succCRIntersection(g, v, add); - - add |= v_cr; - - if (add == v_cr) { - DEBUG_PRINTF("no benefit\n"); - return false; - } - - DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count()); - - for (auto p : adjacent_vertices_range(v, g)) { - if (p == v) { - continue; - } + const CharReach &v_cr = g[v].char_reach; + + CharReach add; + succCRIntersection(g, v, add); + + add |= v_cr; + + if (add == v_cr) { + DEBUG_PRINTF("no benefit\n"); + return false; + } + + DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count()); + + for (auto p : adjacent_vertices_range(v, g)) { + if (p == v) { + continue; + } DEBUG_PRINTF("looking at succ %zu\n", g[p].index); - - set<NFAVertex> sustain = findSustainSet_rev(g, p, add); - DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); - if (sustain.empty()) { - DEBUG_PRINTF("yawn\n"); - } - - for (auto pp : adjacent_vertices_range(p, g)) { - /* we need to ensure something - see fwd ver */ - CharReach sustain_cr; - for (auto pv : inv_adjacent_vertices_range(pp, g)) { - if (contains(sustain, pv)) { - sustain_cr |= g[pv].char_reach; - } - } - if (!g[p].char_reach.isSubsetOf(sustain_cr)) { - DEBUG_PRINTF("unable to establish that succs are thingy\n"); - return false; - } - } - } - - /* the cr can be increased */ - g[v].char_reach = add; + + set<NFAVertex> sustain = findSustainSet_rev(g, p, add); + DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); + if (sustain.empty()) { + DEBUG_PRINTF("yawn\n"); + } + + for (auto pp : adjacent_vertices_range(p, g)) { + /* we need to ensure something - see fwd ver */ + CharReach sustain_cr; + for (auto pv : inv_adjacent_vertices_range(pp, g)) { + if (contains(sustain, pv)) { + sustain_cr |= g[pv].char_reach; + } + } + if (!g[p].char_reach.isSubsetOf(sustain_cr)) { + DEBUG_PRINTF("unable to establish that succs are thingy\n"); + return false; + } + } + } + + /* the cr can be increased */ + g[v].char_reach = add; DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); - return true; -} - -static -bool enlargeCyclicCR(NGHolder &g, som_type som, - const vector<NFAVertex> &ordering) { - DEBUG_PRINTF("hello\n"); - - vector<NFAVertex> candidates; - findCandidates(g, ordering, &candidates); - - bool rv = false; - for (auto v : candidates) { - rv |= enlargeCyclicVertex(g, som, v); - } - - return rv; -} - -static -bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) { - DEBUG_PRINTF("olleh\n"); - - vector<NFAVertex> candidates; - findCandidates_rev(g, ordering, &candidates); - - bool rv = false; - for (auto v : candidates) { - rv |= enlargeCyclicVertex_rev(g, v); - } - - return rv; -} - -bool improveGraph(NGHolder &g, som_type som) { - /* use a topo ordering so that we can get chains of cyclic states - * done in one sweep */ - - const vector<NFAVertex> ordering = getTopoOrdering(g); - - return enlargeCyclicCR(g, som, ordering) - | enlargeCyclicCR_rev(g, ordering); -} - -/** finds a smaller reachability for a state by the reverse transformation of - * enlargeCyclicCR. */ -CharReach reduced_cr(NFAVertex v, const NGHolder &g, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) { + return true; +} + +static +bool enlargeCyclicCR(NGHolder &g, som_type som, + const vector<NFAVertex> &ordering) { + DEBUG_PRINTF("hello\n"); + + vector<NFAVertex> candidates; + findCandidates(g, ordering, &candidates); + + bool rv = false; + for (auto v : candidates) { + rv |= enlargeCyclicVertex(g, som, v); + } + + return rv; +} + +static +bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) { + DEBUG_PRINTF("olleh\n"); + + vector<NFAVertex> candidates; + findCandidates_rev(g, ordering, &candidates); + + bool rv = false; + for (auto v : candidates) { + rv |= enlargeCyclicVertex_rev(g, v); + } + + return rv; +} + +bool improveGraph(NGHolder &g, som_type som) { + /* use a topo ordering so that we can get chains of cyclic states + * done in one sweep */ + + const vector<NFAVertex> ordering = getTopoOrdering(g); + + return enlargeCyclicCR(g, som, ordering) + | enlargeCyclicCR_rev(g, ordering); +} + +/** finds a smaller reachability for a state by the reverse transformation of + * enlargeCyclicCR. */ +CharReach reduced_cr(NFAVertex v, const NGHolder &g, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) { DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index); - CharReach v_cr = g[v].char_reach; - if (proper_in_degree(v, g) != 1) { - return v_cr; - } - - NFAVertex pred = getSoleSourceVertex(g, v); - assert(pred); - - /* require pred to be fed by one vertex OR (start + startDS) */ - NFAVertex predpred; - size_t idp = in_degree(pred, g); - if (hasSelfLoop(pred, g)) { - return v_cr; /* not cliche */ - } else if (idp == 1) { - predpred = getSoleSourceVertex(g, pred); - } else if (idp == 2 - && edge(g.start, pred, g).second - && edge(g.startDs, pred, g).second) { - predpred = g.startDs; - } else { - return v_cr; /* not cliche */ - } - - assert(predpred); - - /* require predpred to be cyclic and its cr to be a superset of - pred and v */ - if (!hasSelfLoop(predpred, g)) { - return v_cr; /* not cliche */ - } - - if (contains(br_cyclic, predpred) - && !br_cyclic.at(predpred).unbounded()) { - return v_cr; /* fake cyclic */ - } - - const CharReach &p_cr = g[pred].char_reach; - const CharReach &pp_cr = g[predpred].char_reach; - if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) { - return v_cr; /* not cliche */ - } - - DEBUG_PRINTF("confirming [x]* prop\n"); - /* we require all of v succs to be succ of p */ - set<NFAVertex> v_succ; - insert(&v_succ, adjacent_vertices(v, g)); - set<NFAVertex> p_succ; - insert(&p_succ, adjacent_vertices(pred, g)); - - if (!is_subset_of(v_succ, p_succ)) { - DEBUG_PRINTF("fail\n"); - return v_cr; /* not cliche */ - } - - if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) { - /* need to check that reports of v are a subset of p's */ - if (!is_subset_of(g[v].reports, - g[pred].reports)) { - DEBUG_PRINTF("fail - reports not subset\n"); - return v_cr; /* not cliche */ - } - } - - DEBUG_PRINTF("woot success\n"); - v_cr &= ~p_cr; - return v_cr; -} - -vector<CharReach> reduced_cr(const NGHolder &g, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) { - assert(hasCorrectlyNumberedVertices(g)); - vector<CharReach> refined_cr(num_vertices(g), CharReach()); - - for (auto v : vertices_range(g)) { - u32 v_idx = g[v].index; - refined_cr[v_idx] = reduced_cr(v, g, br_cyclic); - } - - return refined_cr; -} - -static -bool anyOutSpecial(NFAVertex v, const NGHolder &g) { - for (auto w : adjacent_vertices_range(v, g)) { - if (is_special(w, g) && w != v) { - return true; - } - } - return false; -} - -bool mergeCyclicDotStars(NGHolder &g) { - set<NFAVertex> verticesToRemove; - set<NFAEdge> edgesToRemove; - - // avoid graphs where startDs is not a free spirit - if (out_degree(g.startDs, g) > 1) { - return false; - } - - // check if any of the connected vertices are dots - for (auto v : adjacent_vertices_range(g.start, g)) { - if (is_special(v, g)) { - continue; - } - const CharReach &cr = g[v].char_reach; - - // if this is a cyclic dot - if (cr.all() && edge(v, v, g).second) { - // prevent insane graphs - if (anyOutSpecial(v, g)) { - continue; - } - // we don't know if we're going to remove this vertex yet - vector<NFAEdge> deadEdges; - - // check if all adjacent vertices have edges from start - for (const auto &e : out_edges_range(v, g)) { - NFAVertex t = target(e, g); - // skip self - if (t == v) { - continue; - } - // skip vertices that don't have edges from start - if (!edge(g.start, t, g).second) { - continue; - } - // add an edge from startDs to this vertex - add_edge_if_not_present(g.startDs, t, g); - - // mark this edge for removal - deadEdges.push_back(e); - } - // if the number of edges to be removed equals out degree, vertex - // needs to be removed; else, only remove the edges - if (deadEdges.size() == proper_out_degree(v, g)) { - verticesToRemove.insert(v); - } else { - edgesToRemove.insert(deadEdges.begin(), deadEdges.end()); - } - } - } - - if (verticesToRemove.empty() && edgesToRemove.empty()) { - return false; - } - - DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(), - verticesToRemove.size()); - remove_edges(edgesToRemove, g); - remove_vertices(verticesToRemove, g); - /* some predecessors to the cyclic vertices may no longer be useful (no out - * edges), so we can remove them */ - pruneUseless(g); - return true; -} - + CharReach v_cr = g[v].char_reach; + if (proper_in_degree(v, g) != 1) { + return v_cr; + } + + NFAVertex pred = getSoleSourceVertex(g, v); + assert(pred); + + /* require pred to be fed by one vertex OR (start + startDS) */ + NFAVertex predpred; + size_t idp = in_degree(pred, g); + if (hasSelfLoop(pred, g)) { + return v_cr; /* not cliche */ + } else if (idp == 1) { + predpred = getSoleSourceVertex(g, pred); + } else if (idp == 2 + && edge(g.start, pred, g).second + && edge(g.startDs, pred, g).second) { + predpred = g.startDs; + } else { + return v_cr; /* not cliche */ + } + + assert(predpred); + + /* require predpred to be cyclic and its cr to be a superset of + pred and v */ + if (!hasSelfLoop(predpred, g)) { + return v_cr; /* not cliche */ + } + + if (contains(br_cyclic, predpred) + && !br_cyclic.at(predpred).unbounded()) { + return v_cr; /* fake cyclic */ + } + + const CharReach &p_cr = g[pred].char_reach; + const CharReach &pp_cr = g[predpred].char_reach; + if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) { + return v_cr; /* not cliche */ + } + + DEBUG_PRINTF("confirming [x]* prop\n"); + /* we require all of v succs to be succ of p */ + set<NFAVertex> v_succ; + insert(&v_succ, adjacent_vertices(v, g)); + set<NFAVertex> p_succ; + insert(&p_succ, adjacent_vertices(pred, g)); + + if (!is_subset_of(v_succ, p_succ)) { + DEBUG_PRINTF("fail\n"); + return v_cr; /* not cliche */ + } + + if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) { + /* need to check that reports of v are a subset of p's */ + if (!is_subset_of(g[v].reports, + g[pred].reports)) { + DEBUG_PRINTF("fail - reports not subset\n"); + return v_cr; /* not cliche */ + } + } + + DEBUG_PRINTF("woot success\n"); + v_cr &= ~p_cr; + return v_cr; +} + +vector<CharReach> reduced_cr(const NGHolder &g, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) { + assert(hasCorrectlyNumberedVertices(g)); + vector<CharReach> refined_cr(num_vertices(g), CharReach()); + + for (auto v : vertices_range(g)) { + u32 v_idx = g[v].index; + refined_cr[v_idx] = reduced_cr(v, g, br_cyclic); + } + + return refined_cr; +} + +static +bool anyOutSpecial(NFAVertex v, const NGHolder &g) { + for (auto w : adjacent_vertices_range(v, g)) { + if (is_special(w, g) && w != v) { + return true; + } + } + return false; +} + +bool mergeCyclicDotStars(NGHolder &g) { + set<NFAVertex> verticesToRemove; + set<NFAEdge> edgesToRemove; + + // avoid graphs where startDs is not a free spirit + if (out_degree(g.startDs, g) > 1) { + return false; + } + + // check if any of the connected vertices are dots + for (auto v : adjacent_vertices_range(g.start, g)) { + if (is_special(v, g)) { + continue; + } + const CharReach &cr = g[v].char_reach; + + // if this is a cyclic dot + if (cr.all() && edge(v, v, g).second) { + // prevent insane graphs + if (anyOutSpecial(v, g)) { + continue; + } + // we don't know if we're going to remove this vertex yet + vector<NFAEdge> deadEdges; + + // check if all adjacent vertices have edges from start + for (const auto &e : out_edges_range(v, g)) { + NFAVertex t = target(e, g); + // skip self + if (t == v) { + continue; + } + // skip vertices that don't have edges from start + if (!edge(g.start, t, g).second) { + continue; + } + // add an edge from startDs to this vertex + add_edge_if_not_present(g.startDs, t, g); + + // mark this edge for removal + deadEdges.push_back(e); + } + // if the number of edges to be removed equals out degree, vertex + // needs to be removed; else, only remove the edges + if (deadEdges.size() == proper_out_degree(v, g)) { + verticesToRemove.insert(v); + } else { + edgesToRemove.insert(deadEdges.begin(), deadEdges.end()); + } + } + } + + if (verticesToRemove.empty() && edgesToRemove.empty()) { + return false; + } + + DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(), + verticesToRemove.size()); + remove_edges(edgesToRemove, g); + remove_vertices(verticesToRemove, g); + /* some predecessors to the cyclic vertices may no longer be useful (no out + * edges), so we can remove them */ + pruneUseless(g); + return true; +} + struct PrunePathsInfo { explicit PrunePathsInfo(const NGHolder &g) : color_map(make_small_color_map(g)), bad(num_vertices(g)) {} @@ -725,4 +725,4 @@ bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { return changed; } -} // namespace ue2 +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h index 5ed089dc05..70bc7741cb 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h @@ -1,77 +1,77 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Miscellaneous optimisations. - */ - -#ifndef NG_MISC_OPT_H -#define NG_MISC_OPT_H - -#include <map> -#include <vector> - -#include "ng_holder.h" -#include "som/som.h" -#include "util/depth.h" - -namespace ue2 { - -/** Small structure describing the bounds on a repeat. */ -struct BoundedRepeatSummary { - BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {} - BoundedRepeatSummary(const depth &min_in, const depth &max_in) - : repeatMin(min_in), repeatMax(max_in) { - assert(repeatMin <= repeatMax); - assert(repeatMax.is_reachable()); - } - bool unbounded(void) const { return repeatMax.is_infinite(); } - - depth repeatMin; //!< minimum repeat bound. - depth repeatMax; //!< maximum repeat bound. -}; - -/* returns true if anything changed */ -bool improveGraph(NGHolder &g, som_type som); - -/** Sometimes the reach of a vertex is greater than it needs to be to reduce + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Miscellaneous optimisations. + */ + +#ifndef NG_MISC_OPT_H +#define NG_MISC_OPT_H + +#include <map> +#include <vector> + +#include "ng_holder.h" +#include "som/som.h" +#include "util/depth.h" + +namespace ue2 { + +/** Small structure describing the bounds on a repeat. */ +struct BoundedRepeatSummary { + BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {} + BoundedRepeatSummary(const depth &min_in, const depth &max_in) + : repeatMin(min_in), repeatMax(max_in) { + assert(repeatMin <= repeatMax); + assert(repeatMax.is_reachable()); + } + bool unbounded(void) const { return repeatMax.is_infinite(); } + + depth repeatMin; //!< minimum repeat bound. + depth repeatMax; //!< maximum repeat bound. +}; + +/* returns true if anything changed */ +bool improveGraph(NGHolder &g, som_type som); + +/** Sometimes the reach of a vertex is greater than it needs to be to reduce * stop chars for the benefit of the rest of our code base (accel, etc). In * these circumstances, we can treat the reach as the smaller one as - * the graphs are equivalent. */ -CharReach reduced_cr(NFAVertex v, const NGHolder &g, - const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic); - -std::vector<CharReach> reduced_cr(const NGHolder &g, - const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic); - -/** Remove cyclic stars connected to start */ -bool mergeCyclicDotStars(NGHolder &g); - + * the graphs are equivalent. */ +CharReach reduced_cr(NFAVertex v, const NGHolder &g, + const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic); + +std::vector<CharReach> reduced_cr(const NGHolder &g, + const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic); + +/** Remove cyclic stars connected to start */ +bool mergeCyclicDotStars(NGHolder &g); + /** * Given a cyclic state 'c' with a broad reach and a later state 'v' that is * only reachable if c is still on, then any edges to a successor of a direct @@ -79,6 +79,6 @@ bool mergeCyclicDotStars(NGHolder &g); */ bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp index 780a319f5d..b81b397bd2 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp @@ -1,220 +1,220 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Network flow (min flow, max cut) algorithms. - */ -#include "ng_netflow.h" - -#include "ng_holder.h" -#include "ng_literal_analysis.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Network flow (min flow, max cut) algorithms. + */ +#include "ng_netflow.h" + +#include "ng_holder.h" +#include "ng_literal_analysis.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" - -#include <algorithm> -#include <boost/graph/boykov_kolmogorov_max_flow.hpp> - -using namespace std; -using boost::default_color_type; - -namespace ue2 { - -static -void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge, - NFAEdge fwd, NFAEdge rev) { - u32 fwdIndex = g[fwd].index; - u32 revIndex = g[rev].index; - - // Make sure our vector is big enough. - size_t sz = max(fwdIndex, revIndex) + 1; - if (reverseEdge.size() < sz) { - reverseEdge.resize(sz); - } - - // Add entries to list. - reverseEdge[fwdIndex] = rev; - reverseEdge[revIndex] = fwd; -} - -/** Add temporary reverse edges to the graph \p g, as they are required by the - * BGL's boykov_kolmogorov_max_flow algorithm. */ -static -void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge, - vector<u64a> &capacityMap) { - // We're probably going to need space for 2x edge count. - const size_t numEdges = num_edges(g); - reverseEdge.reserve(numEdges * 2); - capacityMap.reserve(numEdges * 2); - - // To avoid walking the graph for _ages_, we build a temporary map of all - // edges indexed by vertex pair for existence checks. - map<pair<size_t, size_t>, NFAEdge> allEdges; - for (const auto &e : edges_range(g)) { - NFAVertex u = source(e, g), v = target(e, g); - size_t uidx = g[u].index, vidx = g[v].index; - allEdges[make_pair(uidx, vidx)] = e; - } - - // Now we walk over all edges and add their reverse edges to the reverseEdge - // vector, also adding them to the graph when they don't already exist. - for (const auto &m : allEdges) { - const NFAEdge &fwd = m.second; - const size_t uidx = m.first.first, vidx = m.first.second; - - auto it = allEdges.find(make_pair(vidx, uidx)); - if (it == allEdges.end()) { - // No reverse edge, add one. - NFAVertex u = source(fwd, g), v = target(fwd, g); + +#include <algorithm> +#include <boost/graph/boykov_kolmogorov_max_flow.hpp> + +using namespace std; +using boost::default_color_type; + +namespace ue2 { + +static +void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge, + NFAEdge fwd, NFAEdge rev) { + u32 fwdIndex = g[fwd].index; + u32 revIndex = g[rev].index; + + // Make sure our vector is big enough. + size_t sz = max(fwdIndex, revIndex) + 1; + if (reverseEdge.size() < sz) { + reverseEdge.resize(sz); + } + + // Add entries to list. + reverseEdge[fwdIndex] = rev; + reverseEdge[revIndex] = fwd; +} + +/** Add temporary reverse edges to the graph \p g, as they are required by the + * BGL's boykov_kolmogorov_max_flow algorithm. */ +static +void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge, + vector<u64a> &capacityMap) { + // We're probably going to need space for 2x edge count. + const size_t numEdges = num_edges(g); + reverseEdge.reserve(numEdges * 2); + capacityMap.reserve(numEdges * 2); + + // To avoid walking the graph for _ages_, we build a temporary map of all + // edges indexed by vertex pair for existence checks. + map<pair<size_t, size_t>, NFAEdge> allEdges; + for (const auto &e : edges_range(g)) { + NFAVertex u = source(e, g), v = target(e, g); + size_t uidx = g[u].index, vidx = g[v].index; + allEdges[make_pair(uidx, vidx)] = e; + } + + // Now we walk over all edges and add their reverse edges to the reverseEdge + // vector, also adding them to the graph when they don't already exist. + for (const auto &m : allEdges) { + const NFAEdge &fwd = m.second; + const size_t uidx = m.first.first, vidx = m.first.second; + + auto it = allEdges.find(make_pair(vidx, uidx)); + if (it == allEdges.end()) { + // No reverse edge, add one. + NFAVertex u = source(fwd, g), v = target(fwd, g); NFAEdge rev = add_edge(v, u, g); - it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; - // Add to capacity map. - u32 revIndex = g[rev].index; - if (capacityMap.size() < revIndex + 1) { - capacityMap.resize(revIndex + 1); - } - capacityMap[revIndex] = 0; - } - - addReverseEdge(g, reverseEdge, fwd, it->second); - } -} - -/** Remove all edges with indices >= \p idx. */ -static -void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) { - remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g); - capacityMap.resize(idx); + it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; + // Add to capacity map. + u32 revIndex = g[rev].index; + if (capacityMap.size() < revIndex + 1) { + capacityMap.resize(revIndex + 1); + } + capacityMap[revIndex] = 0; + } + + addReverseEdge(g, reverseEdge, fwd, it->second); + } +} + +/** Remove all edges with indices >= \p idx. */ +static +void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) { + remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g); + capacityMap.resize(idx); renumber_edges(g); -} - -/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and - * colour map (from which we can find the min cut). */ -static -u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in, +} + +/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and + * colour map (from which we can find the min cut). */ +static +u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in, decltype(make_small_color_map(NGHolder())) &colorMap) { - vector<u64a> capacityMap = capacityMap_in; - NFAVertex src = h.start; - NFAVertex sink = h.acceptEod; - - // netflow relies on these stylised edges, as all starts should be covered - // by our source and all accepts by our sink. - assert(edge(h.start, h.startDs, h).second); - assert(edge(h.accept, h.acceptEod, h).second); - - // The boykov_kolmogorov_max_flow algorithm requires us to have reverse - // edges for all edges in the graph, so we create them here (and remove - // them after the call). - const unsigned int numRealEdges = num_edges(h); - vector<NFAEdge> reverseEdges; - addReverseEdges(h, reverseEdges, capacityMap); - - const unsigned int numTotalEdges = num_edges(h); - const unsigned int numVertices = num_vertices(h); - - vector<u64a> edgeResiduals(numTotalEdges); - vector<NFAEdge> predecessors(numVertices); - vector<s32> distances(numVertices); - + vector<u64a> capacityMap = capacityMap_in; + NFAVertex src = h.start; + NFAVertex sink = h.acceptEod; + + // netflow relies on these stylised edges, as all starts should be covered + // by our source and all accepts by our sink. + assert(edge(h.start, h.startDs, h).second); + assert(edge(h.accept, h.acceptEod, h).second); + + // The boykov_kolmogorov_max_flow algorithm requires us to have reverse + // edges for all edges in the graph, so we create them here (and remove + // them after the call). + const unsigned int numRealEdges = num_edges(h); + vector<NFAEdge> reverseEdges; + addReverseEdges(h, reverseEdges, capacityMap); + + const unsigned int numTotalEdges = num_edges(h); + const unsigned int numVertices = num_vertices(h); + + vector<u64a> edgeResiduals(numTotalEdges); + vector<NFAEdge> predecessors(numVertices); + vector<s32> distances(numVertices); + auto v_index_map = get(vertex_index, h); auto e_index_map = get(edge_index, h); - + u64a flow = boykov_kolmogorov_max_flow(h, - make_iterator_property_map(capacityMap.begin(), e_index_map), - make_iterator_property_map(edgeResiduals.begin(), e_index_map), - make_iterator_property_map(reverseEdges.begin(), e_index_map), - make_iterator_property_map(predecessors.begin(), v_index_map), + make_iterator_property_map(capacityMap.begin(), e_index_map), + make_iterator_property_map(edgeResiduals.begin(), e_index_map), + make_iterator_property_map(reverseEdges.begin(), e_index_map), + make_iterator_property_map(predecessors.begin(), v_index_map), colorMap, - make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, - src, sink); - - // Remove reverse edges from graph. - removeEdgesFromIndex(h, capacityMap, numRealEdges); + make_iterator_property_map(distances.begin(), v_index_map), + v_index_map, + src, sink); + + // Remove reverse edges from graph. + removeEdgesFromIndex(h, capacityMap, numRealEdges); assert(num_edges(h) == numRealEdges); - - DEBUG_PRINTF("flow = %llu\n", flow); - return flow; -} - -/** Returns a min cut (in \p cutset) for the graph in \p h. */ -vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) { - assert(hasCorrectlyNumberedEdges(h)); - assert(hasCorrectlyNumberedVertices(h)); - + + DEBUG_PRINTF("flow = %llu\n", flow); + return flow; +} + +/** Returns a min cut (in \p cutset) for the graph in \p h. */ +vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) { + assert(hasCorrectlyNumberedEdges(h)); + assert(hasCorrectlyNumberedVertices(h)); + auto colors = make_small_color_map(h); u64a flow = getMaxFlow(h, scores, colors); - - vector<NFAEdge> picked_white; - vector<NFAEdge> picked_black; - u64a observed_black_flow = 0; - u64a observed_white_flow = 0; - - for (const auto &e : edges_range(h)) { - NFAVertex from = source(e, h); - NFAVertex to = target(e, h); - u64a ec = scores[h[e].index]; - if (ec == 0) { - continue; // skips, among other things, reverse edges - } - + + vector<NFAEdge> picked_white; + vector<NFAEdge> picked_black; + u64a observed_black_flow = 0; + u64a observed_white_flow = 0; + + for (const auto &e : edges_range(h)) { + NFAVertex from = source(e, h); + NFAVertex to = target(e, h); + u64a ec = scores[h[e].index]; + if (ec == 0) { + continue; // skips, among other things, reverse edges + } + auto fromColor = get(colors, from); auto toColor = get(colors, to); - + if (fromColor != small_color::white && toColor == small_color::white) { - assert(ec <= INVALID_EDGE_CAP); + assert(ec <= INVALID_EDGE_CAP); DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", - h[from].index, h[to].index, ec); - observed_white_flow += ec; - picked_white.push_back(e); - } + h[from].index, h[to].index, ec); + observed_white_flow += ec; + picked_white.push_back(e); + } if (fromColor == small_color::black && toColor != small_color::black) { - assert(ec <= INVALID_EDGE_CAP); + assert(ec <= INVALID_EDGE_CAP); DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", - h[from].index, h[to].index, ec); - observed_black_flow += ec; - picked_black.push_back(e); - } - } - - DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow, - observed_black_flow, observed_white_flow); + h[from].index, h[to].index, ec); + observed_black_flow += ec; + picked_black.push_back(e); + } + } + + DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow, + observed_black_flow, observed_white_flow); if (min(observed_white_flow, observed_black_flow) != flow) { - DEBUG_PRINTF("bad cut\n"); - } - - if (observed_white_flow < observed_black_flow) { - return picked_white; - } else { - return picked_black; - } -} - -} // namespace ue2 + DEBUG_PRINTF("bad cut\n"); + } + + if (observed_white_flow < observed_black_flow) { + return picked_white; + } else { + return picked_black; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h index d8e00b8e17..9e9b32e2b3 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h @@ -1,49 +1,49 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Network flow (min flow, max cut) algorithms. - */ -#ifndef NG_NETFLOW_H -#define NG_NETFLOW_H - -#include "ng_holder.h" -#include "ue2common.h" - -#include <vector> - -namespace ue2 { - -class NGHolder; - -/** Returns a min cut (in \p cutset) for the graph in \p h. */ -std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Network flow (min flow, max cut) algorithms. + */ +#ifndef NG_NETFLOW_H +#define NG_NETFLOW_H + +#include "ng_holder.h" +#include "ue2common.h" + +#include <vector> + +namespace ue2 { + +class NGHolder; + +/** Returns a min cut (in \p cutset) for the graph in \p h. */ +std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp index 04611872a4..9ad642ad09 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp @@ -1,240 +1,240 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Prefilter Reductions. - * - * This file contains routines for reducing the size of an NFA graph that we - * know will be used as a prefilter. - * - * The approach used is to consider the graph as a chain of region subgraphs, - * and to reduce the size of the graph by replacing regions with constructs - * that can be implemented in fewer states. - * - * Right now, the approach used is to replace a region with a bounded repeat of - * vertices (with bounds derived from the min/max width of the region - * subgraph). These vertices are given the union of the region's character - * reachability. - * - * For regions with bounded max width, this strategy is quite dependent on the - * LimEx NFA's bounded repeat functionality. - */ -#include "ng_prefilter.h" - -#include "ng_holder.h" -#include "ng_region.h" -#include "ng_util.h" -#include "ng_width.h" -#include "ue2common.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" - -#include <queue> + * \brief Prefilter Reductions. + * + * This file contains routines for reducing the size of an NFA graph that we + * know will be used as a prefilter. + * + * The approach used is to consider the graph as a chain of region subgraphs, + * and to reduce the size of the graph by replacing regions with constructs + * that can be implemented in fewer states. + * + * Right now, the approach used is to replace a region with a bounded repeat of + * vertices (with bounds derived from the min/max width of the region + * subgraph). These vertices are given the union of the region's character + * reachability. + * + * For regions with bounded max width, this strategy is quite dependent on the + * LimEx NFA's bounded repeat functionality. + */ +#include "ng_prefilter.h" + +#include "ng_holder.h" +#include "ng_region.h" +#include "ng_util.h" +#include "ng_width.h" +#include "ue2common.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" + +#include <queue> #include <unordered_map> #include <unordered_set> - -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_values; - -namespace ue2 { - -/** Keep attempting to reduce the size of the graph until the number of - * vertices falls below this value. */ -static const size_t MAX_COMPONENT_VERTICES = 128; - -/** Only replace a region with at least this many vertices. */ -static const size_t MIN_REPLACE_VERTICES = 2; - -/** Estimate of how many vertices are required to represent a bounded repeat in - * the implementation NFA. */ -static const size_t BOUNDED_REPEAT_COUNT = 4; - -/** Scoring penalty for boundary regions. */ -static const size_t PENALTY_BOUNDARY = 32; - + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +/** Keep attempting to reduce the size of the graph until the number of + * vertices falls below this value. */ +static const size_t MAX_COMPONENT_VERTICES = 128; + +/** Only replace a region with at least this many vertices. */ +static const size_t MIN_REPLACE_VERTICES = 2; + +/** Estimate of how many vertices are required to represent a bounded repeat in + * the implementation NFA. */ +static const size_t BOUNDED_REPEAT_COUNT = 4; + +/** Scoring penalty for boundary regions. */ +static const size_t PENALTY_BOUNDARY = 32; + /** Regions with max bounds greater than this value will have their max bound * replaced with inf. */ static const size_t MAX_REPLACE_BOUND = 10000; -namespace { - -/** Information describing a region. */ -struct RegionInfo { - explicit RegionInfo(u32 id_in) : id(id_in) {} - u32 id; //!< region id - deque<NFAVertex> vertices; //!< vertices in the region - CharReach reach; //!< union of region reach +namespace { + +/** Information describing a region. */ +struct RegionInfo { + explicit RegionInfo(u32 id_in) : id(id_in) {} + u32 id; //!< region id + deque<NFAVertex> vertices; //!< vertices in the region + CharReach reach; //!< union of region reach depth minWidth{0}; //!< min width of region subgraph depth maxWidth{depth::infinity()}; //!< max width of region subgraph - bool atBoundary = false; //!< region is next to an accept - - // Bigger score is better. - size_t score() const { + bool atBoundary = false; //!< region is next to an accept + + // Bigger score is better. + size_t score() const { // TODO: charreach should be a signal? - size_t numVertices = vertices.size(); - if (atBoundary) { - return numVertices - min(PENALTY_BOUNDARY, numVertices); - } else { - return numVertices; - } - } -}; - -/** Comparator used to order regions for consideration in a priority queue. */ -struct RegionInfoQueueComp { - bool operator()(const RegionInfo &r1, const RegionInfo &r2) const { - size_t score1 = r1.score(), score2 = r2.score(); - if (score1 != score2) { - return score1 < score2; - } - if (r1.reach.count() != r2.reach.count()) { - return r1.reach.count() < r2.reach.count(); - } - return r1.id < r2.id; - } -}; - -} // namespace - -static -void findWidths(const NGHolder &g, + size_t numVertices = vertices.size(); + if (atBoundary) { + return numVertices - min(PENALTY_BOUNDARY, numVertices); + } else { + return numVertices; + } + } +}; + +/** Comparator used to order regions for consideration in a priority queue. */ +struct RegionInfoQueueComp { + bool operator()(const RegionInfo &r1, const RegionInfo &r2) const { + size_t score1 = r1.score(), score2 = r2.score(); + if (score1 != score2) { + return score1 < score2; + } + if (r1.reach.count() != r2.reach.count()) { + return r1.reach.count() < r2.reach.count(); + } + return r1.id < r2.id; + } +}; + +} // namespace + +static +void findWidths(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ion_map, - RegionInfo &ri) { - NGHolder rg; + RegionInfo &ri) { + NGHolder rg; unordered_map<NFAVertex, NFAVertex> mapping; - fillHolder(&rg, g, ri.vertices, &mapping); - - // Wire our entries to start and our exits to accept. - for (auto v : ri.vertices) { - NFAVertex v_new = mapping[v]; + fillHolder(&rg, g, ri.vertices, &mapping); + + // Wire our entries to start and our exits to accept. + for (auto v : ri.vertices) { + NFAVertex v_new = mapping[v]; assert(v_new != NGHolder::null_vertex()); - - if (isRegionEntry(g, v, region_map) && - !edge(rg.start, v_new, rg).second) { - add_edge(rg.start, v_new, rg); - } - if (isRegionExit(g, v, region_map) && - !edge(v_new, rg.accept, rg).second) { - add_edge(v_new, rg.accept, rg); - } - } - - ri.minWidth = findMinWidth(rg); - ri.maxWidth = findMaxWidth(rg); -} - -// acc can be either h.accept or h.acceptEod. -static -void markBoundaryRegions(const NGHolder &h, + + if (isRegionEntry(g, v, region_map) && + !edge(rg.start, v_new, rg).second) { + add_edge(rg.start, v_new, rg); + } + if (isRegionExit(g, v, region_map) && + !edge(v_new, rg.accept, rg).second) { + add_edge(v_new, rg.accept, rg); + } + } + + ri.minWidth = findMinWidth(rg); + ri.maxWidth = findMaxWidth(rg); +} + +// acc can be either h.accept or h.acceptEod. +static +void markBoundaryRegions(const NGHolder &h, const unordered_map<NFAVertex, u32> ®ion_map, - map<u32, RegionInfo> ®ions, NFAVertex acc) { - for (auto v : inv_adjacent_vertices_range(acc, h)) { - if (is_special(v, h)) { - continue; - } - u32 id = region_map.at(v); - + map<u32, RegionInfo> ®ions, NFAVertex acc) { + for (auto v : inv_adjacent_vertices_range(acc, h)) { + if (is_special(v, h)) { + continue; + } + u32 id = region_map.at(v); + auto ri = regions.find(id); - if (ri == regions.end()) { - continue; // Not tracking this region as it's too small. - } - - ri->second.atBoundary = true; - } -} - -static -map<u32, RegionInfo> findRegionInfo(const NGHolder &h, + if (ri == regions.end()) { + continue; // Not tracking this region as it's too small. + } + + ri->second.atBoundary = true; + } +} + +static +map<u32, RegionInfo> findRegionInfo(const NGHolder &h, const unordered_map<NFAVertex, u32> ®ion_map) { - map<u32, RegionInfo> regions; - for (auto v : vertices_range(h)) { - if (is_special(v, h)) { - continue; - } - u32 id = region_map.at(v); + map<u32, RegionInfo> regions; + for (auto v : vertices_range(h)) { + if (is_special(v, h)) { + continue; + } + u32 id = region_map.at(v); RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second; - ri.vertices.push_back(v); - ri.reach |= h[v].char_reach; - } - - // There's no point tracking more information about regions that we won't - // consider replacing, so we remove them from the region map. + ri.vertices.push_back(v); + ri.reach |= h[v].char_reach; + } + + // There's no point tracking more information about regions that we won't + // consider replacing, so we remove them from the region map. for (auto it = regions.begin(); it != regions.end();) { - if (it->second.vertices.size() < MIN_REPLACE_VERTICES) { - regions.erase(it++); - } else { - ++it; - } - } - - DEBUG_PRINTF("%zu regions\n", regions.size()); - - markBoundaryRegions(h, region_map, regions, h.accept); - markBoundaryRegions(h, region_map, regions, h.acceptEod); - - // Determine min/max widths. - for (RegionInfo &ri : regions | map_values) { - findWidths(h, region_map, ri); - DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id, - ri.atBoundary ? "(boundary) " : "", - ri.minWidth.str().c_str(), ri.maxWidth.str().c_str()); - } - - return regions; -} - -static + if (it->second.vertices.size() < MIN_REPLACE_VERTICES) { + regions.erase(it++); + } else { + ++it; + } + } + + DEBUG_PRINTF("%zu regions\n", regions.size()); + + markBoundaryRegions(h, region_map, regions, h.accept); + markBoundaryRegions(h, region_map, regions, h.acceptEod); + + // Determine min/max widths. + for (RegionInfo &ri : regions | map_values) { + findWidths(h, region_map, ri); + DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id, + ri.atBoundary ? "(boundary) " : "", + ri.minWidth.str().c_str(), ri.maxWidth.str().c_str()); + } + + return regions; +} + +static void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) { - for (const auto &e : in_edges_range(from, g)) { - NFAVertex u = source(e, g); + for (const auto &e : in_edges_range(from, g)) { + NFAVertex u = source(e, g); add_edge_if_not_present(u, to, g[e], g); - } -} - -static + } +} + +static void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) { - for (const auto &e : out_edges_range(from, g)) { - NFAVertex t = target(e, g); - add_edge_if_not_present(to, t, g[e], g); - - if (is_any_accept(t, g)) { - const auto &reports = g[from].reports; - g[to].reports.insert(reports.begin(), reports.end()); - } - } -} - -static + for (const auto &e : out_edges_range(from, g)) { + NFAVertex t = target(e, g); + add_edge_if_not_present(to, t, g[e], g); + + if (is_any_accept(t, g)) { + const auto &reports = g[from].reports; + g[to].reports.insert(reports.begin(), reports.end()); + } + } +} + +static void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) { // Set of vertices in region, for quick lookups. const unordered_set<NFAVertex> rverts(ri.vertices.begin(), @@ -250,12 +250,12 @@ void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) { } static -void replaceRegion(NGHolder &g, const RegionInfo &ri, - size_t *verticesAdded, size_t *verticesRemoved) { - // TODO: more complex replacements. - assert(ri.vertices.size() >= MIN_REPLACE_VERTICES); - assert(ri.minWidth.is_finite()); - +void replaceRegion(NGHolder &g, const RegionInfo &ri, + size_t *verticesAdded, size_t *verticesRemoved) { + // TODO: more complex replacements. + assert(ri.vertices.size() >= MIN_REPLACE_VERTICES); + assert(ri.minWidth.is_finite()); + depth minWidth = ri.minWidth; depth maxWidth = ri.maxWidth; @@ -265,129 +265,129 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, maxWidth = depth::infinity(); } - size_t replacementSize; + size_t replacementSize; if (minWidth == maxWidth || maxWidth.is_infinite()) { replacementSize = minWidth; // {N} or {N,} - } else { + } else { replacementSize = maxWidth; // {N,M} case - } - - DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(), - replacementSize); - + } + + DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(), + replacementSize); + vector<NFAVertex> verts; verts.reserve(replacementSize); - for (size_t i = 0; i < replacementSize; i++) { - NFAVertex v = add_vertex(g); - g[v].char_reach = ri.reach; - if (i > 0) { - add_edge(verts.back(), v, g); - } - verts.push_back(v); - } - + for (size_t i = 0; i < replacementSize; i++) { + NFAVertex v = add_vertex(g); + g[v].char_reach = ri.reach; + if (i > 0) { + add_edge(verts.back(), v, g); + } + verts.push_back(v); + } + if (maxWidth.is_infinite()) { - add_edge(verts.back(), verts.back(), g); - } - + add_edge(verts.back(), verts.back(), g); + } + removeInteriorEdges(g, ri); - - for (size_t i = 0; i < replacementSize; i++) { - NFAVertex v_new = verts[i]; - - for (auto v_old : ri.vertices) { - if (i == 0) { + + for (size_t i = 0; i < replacementSize; i++) { + NFAVertex v_new = verts[i]; + + for (auto v_old : ri.vertices) { + if (i == 0) { copyInEdges(g, v_old, v_new); - } - if (i + 1 >= ri.minWidth) { + } + if (i + 1 >= ri.minWidth) { copyOutEdges(g, v_old, v_new); - } - } - } - - remove_vertices(ri.vertices, g, false); - - *verticesAdded = verts.size(); - *verticesRemoved = ri.vertices.size(); -} - -namespace { -struct SourceHasEdgeToAccept { - explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {} - bool operator()(const NFAEdge &e) const { - return edge(source(e, g), g.accept, g).second; - } - const NGHolder &g; -}; -} - -static -void reduceRegions(NGHolder &h) { - map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h)); - - RegionInfoQueueComp cmp; - priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp); - - size_t numVertices = 0; - for (const RegionInfo &ri : regions | map_values) { - numVertices += ri.vertices.size(); - pq.push(ri); - } - - while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) { - const RegionInfo &ri = pq.top(); - DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, " - "widths=[%s,%s]\n", - ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(), - ri.score(), ri.minWidth.str().c_str(), - ri.maxWidth.str().c_str()); - - size_t verticesAdded = 0; - size_t verticesRemoved = 0; - replaceRegion(h, ri, &verticesAdded, &verticesRemoved); - DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n", - verticesRemoved, verticesAdded); - - // We are trusting that implementation NFAs will be able to use the - // LimEx bounded repeat code here. - numVertices -= verticesRemoved; - numVertices += BOUNDED_REPEAT_COUNT; - - DEBUG_PRINTF("numVertices is now %zu\n", numVertices); - pq.pop(); - } - - // We may have vertices that have edges to both accept and acceptEod: in - // this case, we can optimize for performance by removing the acceptEod - // edges. + } + } + } + + remove_vertices(ri.vertices, g, false); + + *verticesAdded = verts.size(); + *verticesRemoved = ri.vertices.size(); +} + +namespace { +struct SourceHasEdgeToAccept { + explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {} + bool operator()(const NFAEdge &e) const { + return edge(source(e, g), g.accept, g).second; + } + const NGHolder &g; +}; +} + +static +void reduceRegions(NGHolder &h) { + map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h)); + + RegionInfoQueueComp cmp; + priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp); + + size_t numVertices = 0; + for (const RegionInfo &ri : regions | map_values) { + numVertices += ri.vertices.size(); + pq.push(ri); + } + + while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) { + const RegionInfo &ri = pq.top(); + DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, " + "widths=[%s,%s]\n", + ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(), + ri.score(), ri.minWidth.str().c_str(), + ri.maxWidth.str().c_str()); + + size_t verticesAdded = 0; + size_t verticesRemoved = 0; + replaceRegion(h, ri, &verticesAdded, &verticesRemoved); + DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n", + verticesRemoved, verticesAdded); + + // We are trusting that implementation NFAs will be able to use the + // LimEx bounded repeat code here. + numVertices -= verticesRemoved; + numVertices += BOUNDED_REPEAT_COUNT; + + DEBUG_PRINTF("numVertices is now %zu\n", numVertices); + pq.pop(); + } + + // We may have vertices that have edges to both accept and acceptEod: in + // this case, we can optimize for performance by removing the acceptEod + // edges. remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h); -} - -void prefilterReductions(NGHolder &h, const CompileContext &cc) { - if (!cc.grey.prefilterReductions) { - return; - } - - if (num_vertices(h) <= MAX_COMPONENT_VERTICES) { - DEBUG_PRINTF("graph is already small enough (%zu vertices)\n", - num_vertices(h)); - return; - } - +} + +void prefilterReductions(NGHolder &h, const CompileContext &cc) { + if (!cc.grey.prefilterReductions) { + return; + } + + if (num_vertices(h) <= MAX_COMPONENT_VERTICES) { + DEBUG_PRINTF("graph is already small enough (%zu vertices)\n", + num_vertices(h)); + return; + } + DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); - + renumber_vertices(h); renumber_edges(h); - - reduceRegions(h); - + + reduceRegions(h); + renumber_vertices(h); renumber_edges(h); DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h index 88cbefd2de..e1f5c13f37 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h @@ -1,45 +1,45 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Prefilter Reductions. - */ - -#ifndef NG_PREFILTER_H -#define NG_PREFILTER_H - -namespace ue2 { - -class NGHolder; -struct CompileContext; - -void prefilterReductions(NGHolder &h, const CompileContext &cc); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Prefilter Reductions. + */ + +#ifndef NG_PREFILTER_H +#define NG_PREFILTER_H + +namespace ue2 { + +class NGHolder; +struct CompileContext; + +void prefilterReductions(NGHolder &h, const CompileContext &cc); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp index adda70312f..997f652d0d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp @@ -1,434 +1,434 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for pruning unreachable vertices or reports from the graph. - */ -#include "ng_prune.h" - -#include "ng_dominators.h" -#include "ng_holder.h" -#include "ng_reports.h" -#include "ng_util.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for pruning unreachable vertices or reports from the graph. + */ +#include "ng_prune.h" + +#include "ng_dominators.h" +#include "ng_holder.h" +#include "ng_reports.h" +#include "ng_util.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/report_manager.h" - -#include <deque> -#include <map> - -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/reverse_graph.hpp> - -using namespace std; -using boost::default_color_type; -using boost::reverse_graph; - -namespace ue2 { - -/** Remove any vertices that can't be reached by traversing the graph in - * reverse from acceptEod. */ -void pruneUnreachable(NGHolder &g) { - deque<NFAVertex> dead; - +#include "util/report_manager.h" + +#include <deque> +#include <map> + +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/reverse_graph.hpp> + +using namespace std; +using boost::default_color_type; +using boost::reverse_graph; + +namespace ue2 { + +/** Remove any vertices that can't be reached by traversing the graph in + * reverse from acceptEod. */ +void pruneUnreachable(NGHolder &g) { + deque<NFAVertex> dead; + if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) && edge(g.accept, g.acceptEod, g).second) { - // Trivial case: there are no in-edges to our accepts (other than - // accept->acceptEod), so all non-specials are unreachable. - for (auto v : vertices_range(g)) { - if (!is_special(v, g)) { - dead.push_back(v); - } - } - } else { - // Walk a reverse graph from acceptEod with Boost's depth_first_visit - // call. + // Trivial case: there are no in-edges to our accepts (other than + // accept->acceptEod), so all non-specials are unreachable. + for (auto v : vertices_range(g)) { + if (!is_special(v, g)) { + dead.push_back(v); + } + } + } else { + // Walk a reverse graph from acceptEod with Boost's depth_first_visit + // call. typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph; RevNFAGraph revg(g); - + map<RevNFAGraph::vertex_descriptor, default_color_type> colours; - - depth_first_visit(revg, g.acceptEod, - make_dfs_visitor(boost::null_visitor()), - make_assoc_property_map(colours)); - - DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size()); - - // All non-special vertices that aren't in the colour map (because they - // weren't reached) can be removed. - for (auto v : vertices_range(revg)) { - if (is_special(v, revg)) { - continue; - } - if (!contains(colours, v)) { - dead.push_back(v); - } - } - } - - if (dead.empty()) { - DEBUG_PRINTF("no unreachable vertices\n"); - return; - } - - remove_vertices(dead, g, false); - DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size()); -} - -template<class nfag_t> -static + + depth_first_visit(revg, g.acceptEod, + make_dfs_visitor(boost::null_visitor()), + make_assoc_property_map(colours)); + + DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size()); + + // All non-special vertices that aren't in the colour map (because they + // weren't reached) can be removed. + for (auto v : vertices_range(revg)) { + if (is_special(v, revg)) { + continue; + } + if (!contains(colours, v)) { + dead.push_back(v); + } + } + } + + if (dead.empty()) { + DEBUG_PRINTF("no unreachable vertices\n"); + return; + } + + remove_vertices(dead, g, false); + DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size()); +} + +template<class nfag_t> +static bool pruneForwardUseless(NGHolder &h, const nfag_t &g, typename nfag_t::vertex_descriptor s, decltype(make_small_color_map(NGHolder())) &colors) { - // Begin with all vertices set to white, as DFV only marks visited - // vertices. + // Begin with all vertices set to white, as DFV only marks visited + // vertices. colors.fill(small_color::white); - + depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors); - - vector<NFAVertex> dead; - - // All non-special vertices that are still white can be removed. - for (auto v : vertices_range(g)) { + + vector<NFAVertex> dead; + + // All non-special vertices that are still white can be removed. + for (auto v : vertices_range(g)) { if (!is_special(v, g) && get(colors, v) == small_color::white) { DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", - g[v].index, g[s].index); + g[v].index, g[s].index); dead.push_back(NFAVertex(v)); - } - } - - if (dead.empty()) { - return false; - } - - DEBUG_PRINTF("removing %zu vertices\n", dead.size()); - remove_vertices(dead, h, false); - return true; -} - -/** Remove any vertices which can't be reached by traversing the graph forward - * from start or in reverse from acceptEod. If \p renumber is false, no - * vertex/edge renumbering is done. */ -void pruneUseless(NGHolder &g, bool renumber) { - DEBUG_PRINTF("pruning useless vertices\n"); - assert(hasCorrectlyNumberedVertices(g)); + } + } + + if (dead.empty()) { + return false; + } + + DEBUG_PRINTF("removing %zu vertices\n", dead.size()); + remove_vertices(dead, h, false); + return true; +} + +/** Remove any vertices which can't be reached by traversing the graph forward + * from start or in reverse from acceptEod. If \p renumber is false, no + * vertex/edge renumbering is done. */ +void pruneUseless(NGHolder &g, bool renumber) { + DEBUG_PRINTF("pruning useless vertices\n"); + assert(hasCorrectlyNumberedVertices(g)); auto colors = make_small_color_map(g); - + bool work_done = pruneForwardUseless(g, g, g.start, colors); work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g), g.acceptEod, colors); - - if (!work_done) { - return; - } - - if (renumber) { + + if (!work_done) { + return; + } + + if (renumber) { renumber_edges(g); renumber_vertices(g); - } -} - -/** This code removes any vertices which do not accept any symbols. Any - * vertices which no longer lie on a path from a start to an accept are also - * pruned. */ -void pruneEmptyVertices(NGHolder &g) { - DEBUG_PRINTF("pruning empty vertices\n"); - vector<NFAVertex> dead; - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - const CharReach &cr = g[v].char_reach; - if (cr.none()) { + } +} + +/** This code removes any vertices which do not accept any symbols. Any + * vertices which no longer lie on a path from a start to an accept are also + * pruned. */ +void pruneEmptyVertices(NGHolder &g) { + DEBUG_PRINTF("pruning empty vertices\n"); + vector<NFAVertex> dead; + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + const CharReach &cr = g[v].char_reach; + if (cr.none()) { DEBUG_PRINTF("empty: %zu\n", g[v].index); - dead.push_back(v); - } - } - - if (dead.empty()) { - return; - } - - remove_vertices(dead, g); - pruneUseless(g); -} - -/** Remove any edges from vertices that generate accepts (for Highlander - * graphs). */ -void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { - // Safety check: all reports must be simple exhaustible reports, or this is - // not safe. This optimisation should be called early enough that no - // internal reports have been added. - for (auto report_id : all_reports(g)) { - const Report &ir = rm.getReport(report_id); - - if (ir.ekey == INVALID_EKEY || ir.hasBounds() || - !isExternalReport(ir)) { - DEBUG_PRINTF("report %u is not external highlander with " - "no bounds\n", report_id); - return; - } - } - - vector<NFAEdge> dead; - for (auto u : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(u, g)) { - continue; - } - - // We can prune any out-edges that aren't accepts - for (const auto &e : out_edges_range(u, g)) { - if (!is_any_accept(target(e, g), g)) { - dead.push_back(e); - } - } - } - - if (dead.empty()) { - return; - } - - DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size()); - remove_edges(dead, g); - pruneUseless(g); -} - -static -bool isDominatedByReporter(const NGHolder &g, + dead.push_back(v); + } + } + + if (dead.empty()) { + return; + } + + remove_vertices(dead, g); + pruneUseless(g); +} + +/** Remove any edges from vertices that generate accepts (for Highlander + * graphs). */ +void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { + // Safety check: all reports must be simple exhaustible reports, or this is + // not safe. This optimisation should be called early enough that no + // internal reports have been added. + for (auto report_id : all_reports(g)) { + const Report &ir = rm.getReport(report_id); + + if (ir.ekey == INVALID_EKEY || ir.hasBounds() || + !isExternalReport(ir)) { + DEBUG_PRINTF("report %u is not external highlander with " + "no bounds\n", report_id); + return; + } + } + + vector<NFAEdge> dead; + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { + if (is_special(u, g)) { + continue; + } + + // We can prune any out-edges that aren't accepts + for (const auto &e : out_edges_range(u, g)) { + if (!is_any_accept(target(e, g), g)) { + dead.push_back(e); + } + } + } + + if (dead.empty()) { + return; + } + + DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size()); + remove_edges(dead, g); + pruneUseless(g); +} + +static +bool isDominatedByReporter(const NGHolder &g, const unordered_map<NFAVertex, NFAVertex> &dom, - NFAVertex v, ReportID report_id) { - for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) { - NFAVertex u = it->second; - // Note: reporters with edges only to acceptEod are not considered to - // dominate. - if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { + NFAVertex v, ReportID report_id) { + for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) { + NFAVertex u = it->second; + // Note: reporters with edges only to acceptEod are not considered to + // dominate. + if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n", - g[v].index, g[u].index, report_id); - return true; - } - v = u; - } - return false; -} - -/** - * True if the vertex has (a) a self-loop, (b) only out-edges to accept and - * itself and (c) only simple exhaustible reports. - */ -static -bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g, - const ReportManager &rm, - NFAVertex v) { - if (!edge(v, v, g).second) { - return false; - } - - for (auto w : adjacent_vertices_range(v, g)) { - if (w != v && w != g.accept) { - return false; - } - } - - for (const auto &report_id : g[v].reports) { - if (!isSimpleExhaustible(rm.getReport(report_id))) { - return false; - } - } - - return true; -} - -void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { - vector<NFAVertex> reporters; - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - for (const auto &report_id : g[v].reports) { - const Report &r = rm.getReport(report_id); - if (isSimpleExhaustible(r)) { - reporters.push_back(v); - break; - } - } - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - for (const auto &report_id : g[v].reports) { - const Report &r = rm.getReport(report_id); - if (isSimpleExhaustible(r)) { - reporters.push_back(v); - break; - } - } - } - - if (reporters.empty()) { - return; - } - - + g[v].index, g[u].index, report_id); + return true; + } + v = u; + } + return false; +} + +/** + * True if the vertex has (a) a self-loop, (b) only out-edges to accept and + * itself and (c) only simple exhaustible reports. + */ +static +bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g, + const ReportManager &rm, + NFAVertex v) { + if (!edge(v, v, g).second) { + return false; + } + + for (auto w : adjacent_vertices_range(v, g)) { + if (w != v && w != g.accept) { + return false; + } + } + + for (const auto &report_id : g[v].reports) { + if (!isSimpleExhaustible(rm.getReport(report_id))) { + return false; + } + } + + return true; +} + +void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { + vector<NFAVertex> reporters; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + for (const auto &report_id : g[v].reports) { + const Report &r = rm.getReport(report_id); + if (isSimpleExhaustible(r)) { + reporters.push_back(v); + break; + } + } + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + for (const auto &report_id : g[v].reports) { + const Report &r = rm.getReport(report_id); + if (isSimpleExhaustible(r)) { + reporters.push_back(v); + break; + } + } + } + + if (reporters.empty()) { + return; + } + + sort(begin(reporters), end(reporters)); - reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); - - DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", - reporters.size()); - - const auto &dom = findDominators(g); - bool modified = false; - - // If a reporter vertex is dominated by another with the same report, we - // can remove that report; if all reports are removed, we can remove the - // vertex entirely. - for (const auto v : reporters) { - const auto reports = g[v].reports; // copy, as we're going to mutate - for (const auto &report_id : reports) { - if (!isSimpleExhaustible(rm.getReport(report_id))) { - continue; - } - if (isDominatedByReporter(g, dom, v, report_id)) { + reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); + + DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", + reporters.size()); + + const auto &dom = findDominators(g); + bool modified = false; + + // If a reporter vertex is dominated by another with the same report, we + // can remove that report; if all reports are removed, we can remove the + // vertex entirely. + for (const auto v : reporters) { + const auto reports = g[v].reports; // copy, as we're going to mutate + for (const auto &report_id : reports) { + if (!isSimpleExhaustible(rm.getReport(report_id))) { + continue; + } + if (isDominatedByReporter(g, dom, v, report_id)) { DEBUG_PRINTF("removed dominated report %u from vertex %zu\n", - report_id, g[v].index); - g[v].reports.erase(report_id); - } - } - - if (g[v].reports.empty()) { + report_id, g[v].index); + g[v].reports.erase(report_id); + } + } + + if (g[v].reports.empty()) { DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n", - g[v].index); - remove_edge(v, g.accept, g); - remove_edge(v, g.acceptEod, g); - modified = true; - } - } - - // If a reporter vertex has a self-loop, but otherwise only leads to accept - // (note: NOT acceptEod) and has simple exhaustible reports, we can delete - // the self-loop. - for (const auto v : reporters) { - if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { - remove_edge(v, v, g); - modified = true; + g[v].index); + remove_edge(v, g.accept, g); + remove_edge(v, g.acceptEod, g); + modified = true; + } + } + + // If a reporter vertex has a self-loop, but otherwise only leads to accept + // (note: NOT acceptEod) and has simple exhaustible reports, we can delete + // the self-loop. + for (const auto v : reporters) { + if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { + remove_edge(v, v, g); + modified = true; DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index); - } - } - - if (!modified) { - return; - } - - pruneUseless(g); - - // We may have only removed self-loops, in which case pruneUseless wouldn't - // renumber, so we do edge renumbering explicitly here. + } + } + + if (!modified) { + return; + } + + pruneUseless(g); + + // We may have only removed self-loops, in which case pruneUseless wouldn't + // renumber, so we do edge renumbering explicitly here. renumber_edges(g); -} - -/** Removes the given Report ID from vertices connected to accept, and then - * prunes useless vertices that have had their report sets reduced to empty. */ -void pruneReport(NGHolder &g, ReportID report) { - set<NFAEdge> dead; - - for (const auto &e : in_edges_range(g.accept, g)) { - NFAVertex u = source(e, g); - auto &reports = g[u].reports; - if (contains(reports, report)) { - reports.erase(report); - if (reports.empty()) { - dead.insert(e); - } - } - } - - for (const auto &e : in_edges_range(g.acceptEod, g)) { - NFAVertex u = source(e, g); - if (u == g.accept) { - continue; - } - auto &reports = g[u].reports; - if (contains(reports, report)) { - reports.erase(report); - if (reports.empty()) { - dead.insert(e); - } - } - } - - if (dead.empty()) { - return; - } - - remove_edges(dead, g); - pruneUnreachable(g); +} + +/** Removes the given Report ID from vertices connected to accept, and then + * prunes useless vertices that have had their report sets reduced to empty. */ +void pruneReport(NGHolder &g, ReportID report) { + set<NFAEdge> dead; + + for (const auto &e : in_edges_range(g.accept, g)) { + NFAVertex u = source(e, g); + auto &reports = g[u].reports; + if (contains(reports, report)) { + reports.erase(report); + if (reports.empty()) { + dead.insert(e); + } + } + } + + for (const auto &e : in_edges_range(g.acceptEod, g)) { + NFAVertex u = source(e, g); + if (u == g.accept) { + continue; + } + auto &reports = g[u].reports; + if (contains(reports, report)) { + reports.erase(report); + if (reports.empty()) { + dead.insert(e); + } + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, g); + pruneUnreachable(g); renumber_vertices(g); renumber_edges(g); -} - -/** Removes all Report IDs bar the given one from vertices connected to accept, - * and then prunes useless vertices that have had their report sets reduced to - * empty. */ -void pruneAllOtherReports(NGHolder &g, ReportID report) { - set<NFAEdge> dead; - - for (const auto &e : in_edges_range(g.accept, g)) { - NFAVertex u = source(e, g); - auto &reports = g[u].reports; - if (contains(reports, report)) { - reports.clear(); - reports.insert(report); - } else { - reports.clear(); - dead.insert(e); - } - } - - for (const auto &e : in_edges_range(g.acceptEod, g)) { - NFAVertex u = source(e, g); - if (u == g.accept) { - continue; - } - auto &reports = g[u].reports; - if (contains(reports, report)) { - reports.clear(); - reports.insert(report); - } else { - reports.clear(); - dead.insert(e); - } - } - - if (dead.empty()) { - return; - } - - remove_edges(dead, g); - pruneUnreachable(g); +} + +/** Removes all Report IDs bar the given one from vertices connected to accept, + * and then prunes useless vertices that have had their report sets reduced to + * empty. */ +void pruneAllOtherReports(NGHolder &g, ReportID report) { + set<NFAEdge> dead; + + for (const auto &e : in_edges_range(g.accept, g)) { + NFAVertex u = source(e, g); + auto &reports = g[u].reports; + if (contains(reports, report)) { + reports.clear(); + reports.insert(report); + } else { + reports.clear(); + dead.insert(e); + } + } + + for (const auto &e : in_edges_range(g.acceptEod, g)) { + NFAVertex u = source(e, g); + if (u == g.accept) { + continue; + } + auto &reports = g[u].reports; + if (contains(reports, report)) { + reports.clear(); + reports.insert(report); + } else { + reports.clear(); + dead.insert(e); + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, g); + pruneUnreachable(g); renumber_vertices(g); renumber_edges(g); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h index 475953be3c..0dcef7c8d5 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h @@ -1,75 +1,75 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for pruning unreachable vertices or reports from the graph. - */ - -#ifndef NG_PRUNE_H -#define NG_PRUNE_H - -#include "ue2common.h" - -namespace ue2 { - -class NGHolder; -class ReportManager; - -/** Remove any vertices that can't be reached by traversing the graph in - * reverse from acceptEod. */ -void pruneUnreachable(NGHolder &g); - -/** Remove any vertices which can't be reached by traversing the graph forward - * from start or in reverse from acceptEod. If \p renumber is false, no - * vertex/edge renumbering is done. */ -void pruneUseless(NGHolder &g, bool renumber = true); - -/** Remove any vertices with empty reachability. */ -void pruneEmptyVertices(NGHolder &g); - -/** Remove any edges from vertices that generate accepts (for Highlander - * graphs). */ -void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm); - -/** - * Prune highlander reports that are dominated by earlier ones in the graph. - */ -void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm); - -/** Removes the given Report ID from vertices connected to accept, and then - * prunes useless vertices that have had their report sets reduced to empty. */ -void pruneReport(NGHolder &g, ReportID report); - -/** Removes all Report IDs bar the given one from vertices connected to accept, - * and then prunes useless vertices that have had their report sets reduced to - * empty. */ -void pruneAllOtherReports(NGHolder &g, ReportID report); - -} // namespace ue2 - -#endif // NG_PRUNE_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for pruning unreachable vertices or reports from the graph. + */ + +#ifndef NG_PRUNE_H +#define NG_PRUNE_H + +#include "ue2common.h" + +namespace ue2 { + +class NGHolder; +class ReportManager; + +/** Remove any vertices that can't be reached by traversing the graph in + * reverse from acceptEod. */ +void pruneUnreachable(NGHolder &g); + +/** Remove any vertices which can't be reached by traversing the graph forward + * from start or in reverse from acceptEod. If \p renumber is false, no + * vertex/edge renumbering is done. */ +void pruneUseless(NGHolder &g, bool renumber = true); + +/** Remove any vertices with empty reachability. */ +void pruneEmptyVertices(NGHolder &g); + +/** Remove any edges from vertices that generate accepts (for Highlander + * graphs). */ +void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm); + +/** + * Prune highlander reports that are dominated by earlier ones in the graph. + */ +void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm); + +/** Removes the given Report ID from vertices connected to accept, and then + * prunes useless vertices that have had their report sets reduced to empty. */ +void pruneReport(NGHolder &g, ReportID report); + +/** Removes all Report IDs bar the given one from vertices connected to accept, + * and then prunes useless vertices that have had their report sets reduced to + * empty. */ +void pruneAllOtherReports(NGHolder &g, ReportID report); + +} // namespace ue2 + +#endif // NG_PRUNE_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp index 984518b0fc..eb1f7114f6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp @@ -1,578 +1,578 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Puff construction from NGHolder. - */ -#include "ng_puff.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_repeat.h" -#include "ng_reports.h" -#include "ng_util.h" -#include "ue2common.h" -#include "nfa/nfa_api_queue.h" -#include "nfa/mpvcompile.h" -#include "rose/rose_build.h" -#include "util/compile_context.h" -#include "util/graph_range.h" -#include "util/report_manager.h" - -#include <vector> - -using namespace std; - -namespace ue2 { - -static const unsigned MIN_PUFF_LENGTH = 16; -static const unsigned HEAD_BACKOFF = 16; - -static -size_t countChain(const NGHolder &g, NFAVertex v) { - size_t count = 0; - while (v) { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Puff construction from NGHolder. + */ +#include "ng_puff.h" + +#include "grey.h" +#include "ng_depth.h" +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_repeat.h" +#include "ng_reports.h" +#include "ng_util.h" +#include "ue2common.h" +#include "nfa/nfa_api_queue.h" +#include "nfa/mpvcompile.h" +#include "rose/rose_build.h" +#include "util/compile_context.h" +#include "util/graph_range.h" +#include "util/report_manager.h" + +#include <vector> + +using namespace std; + +namespace ue2 { + +static const unsigned MIN_PUFF_LENGTH = 16; +static const unsigned HEAD_BACKOFF = 16; + +static +size_t countChain(const NGHolder &g, NFAVertex v) { + size_t count = 0; + while (v) { DEBUG_PRINTF("counting vertex %zu\n", g[v].index); - if (is_special(v, g)) { - break; - } - - count++; - v = getSoleDestVertex(g, v); - } - DEBUG_PRINTF("done %zu\n", count); - return count; -} - -static -void wireNewAccepts(NGHolder &g, NFAVertex head, - const flat_set<ReportID> &chain_reports) { - for (auto u : inv_adjacent_vertices_range(head, g)) { - if (is_special(u, g)) { - continue; - } - + if (is_special(v, g)) { + break; + } + + count++; + v = getSoleDestVertex(g, v); + } + DEBUG_PRINTF("done %zu\n", count); + return count; +} + +static +void wireNewAccepts(NGHolder &g, NFAVertex head, + const flat_set<ReportID> &chain_reports) { + for (auto u : inv_adjacent_vertices_range(head, g)) { + if (is_special(u, g)) { + continue; + } + DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index); - assert(!edge(u, g.accept, g).second); - assert(!edge(u, g.acceptEod, g).second); - add_edge(u, g.accept, g); - - // Replace reports with our chain reports. - auto &u_reports = g[u].reports; - u_reports.clear(); - u_reports.insert(chain_reports.begin(), chain_reports.end()); - } -} - -static -bool isFixedDepth(const NGHolder &g, NFAVertex v) { - // If the vertex is reachable from startDs, it can't be fixed depth. + assert(!edge(u, g.accept, g).second); + assert(!edge(u, g.acceptEod, g).second); + add_edge(u, g.accept, g); + + // Replace reports with our chain reports. + auto &u_reports = g[u].reports; + u_reports.clear(); + u_reports.insert(chain_reports.begin(), chain_reports.end()); + } +} + +static +bool isFixedDepth(const NGHolder &g, NFAVertex v) { + // If the vertex is reachable from startDs, it can't be fixed depth. auto depthFromStartDs = calcDepthsFrom(g, g.startDs); - - u32 idx = g[v].index; - const DepthMinMax &ds = depthFromStartDs.at(idx); - if (ds.min.is_reachable()) { - DEBUG_PRINTF("vertex reachable from startDs\n"); - return false; - } - + + u32 idx = g[v].index; + const DepthMinMax &ds = depthFromStartDs.at(idx); + if (ds.min.is_reachable()) { + DEBUG_PRINTF("vertex reachable from startDs\n"); + return false; + } + auto depthFromStart = calcDepthsFrom(g, g.start); - - /* we can still consider the head of a puff chain as at fixed depth if - * it has a self-loop: so we look at all the preds of v (other than v - * itself) */ - - assert(v && !is_special(v, g)); - - u32 count = 0; - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; // self-loop - } - count++; - - idx = g[u].index; - const DepthMinMax &d = depthFromStart.at(idx); - if (d.min != d.max) { - return false; - } - } - - return count != 0; // at least one fixed-depth pred -} - -static -bool singleStart(const NGHolder &g) { - set<NFAVertex> seen; - - for (auto v : adjacent_vertices_range(g.start, g)) { - if (!is_special(v, g)) { + + /* we can still consider the head of a puff chain as at fixed depth if + * it has a self-loop: so we look at all the preds of v (other than v + * itself) */ + + assert(v && !is_special(v, g)); + + u32 count = 0; + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; // self-loop + } + count++; + + idx = g[u].index; + const DepthMinMax &d = depthFromStart.at(idx); + if (d.min != d.max) { + return false; + } + } + + return count != 0; // at least one fixed-depth pred +} + +static +bool singleStart(const NGHolder &g) { + set<NFAVertex> seen; + + for (auto v : adjacent_vertices_range(g.start, g)) { + if (!is_special(v, g)) { DEBUG_PRINTF("saw %zu\n", g[v].index); - seen.insert(v); - } - } - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (!is_special(v, g)) { + seen.insert(v); + } + } + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (!is_special(v, g)) { DEBUG_PRINTF("saw %zu\n", g[v].index); - seen.insert(v); - } - } - - DEBUG_PRINTF("comp has %zu starts\n", seen.size()); - - return seen.size() == 1; -} - -static -bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { - const CharReach puff_escapes = ~g[head].char_reach; - - for (auto u : inv_adjacent_vertices_range(head, g)) { - if (!g[u].char_reach.isSubsetOf(puff_escapes)) { + seen.insert(v); + } + } + + DEBUG_PRINTF("comp has %zu starts\n", seen.size()); + + return seen.size() == 1; +} + +static +bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { + const CharReach puff_escapes = ~g[head].char_reach; + + for (auto u : inv_adjacent_vertices_range(head, g)) { + if (!g[u].char_reach.isSubsetOf(puff_escapes)) { DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index, - g[head].index); - return false; - } - } - - DEBUG_PRINTF("reset on trigger\n"); - return true; -} - -/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform) - * */ -static -bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { + g[head].index); + return false; + } + } + + DEBUG_PRINTF("reset on trigger\n"); + return true; +} + +/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform) + * */ +static +bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { DEBUG_PRINTF("head = %zu\n", g[head].index); - - const CharReach &puff_cr = g[head].char_reach; - - /* we can use the pred of the head as the base of our check if it the cr - * matches as if - * head cr subsetof pred cr: if head is being pushed on then puff must - * still being pushed on - * pred cr subsetof head cr: if the puff matches then head must be also - * always be on if the is connected to a wide enough cyclic - */ - if (proper_in_degree(head, g) == 1 - && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { - head = getSoleSourceVertex(g, head); + + const CharReach &puff_cr = g[head].char_reach; + + /* we can use the pred of the head as the base of our check if it the cr + * matches as if + * head cr subsetof pred cr: if head is being pushed on then puff must + * still being pushed on + * pred cr subsetof head cr: if the puff matches then head must be also + * always be on if the is connected to a wide enough cyclic + */ + if (proper_in_degree(head, g) == 1 + && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { + head = getSoleSourceVertex(g, head); DEBUG_PRINTF("temp new head = %zu\n", g[head].index); - } - - for (auto s : inv_adjacent_vertices_range(head, g)) { + } + + for (auto s : inv_adjacent_vertices_range(head, g)) { DEBUG_PRINTF("s = %zu\n", g[s].index); - if (!puff_cr.isSubsetOf(g[s].char_reach)) { + if (!puff_cr.isSubsetOf(g[s].char_reach)) { DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index, g[head].index); - return false; - } - - if (!hasSelfLoop(s, g) && s != g.start) { - DEBUG_PRINTF("no self loop\n"); - return false; - } - - if (s == g.start && !edge(g.startDs, head, g).second) { - DEBUG_PRINTF("not float\n"); - return false; - } - } - - DEBUG_PRINTF("reset on trigger\n"); - return true; -} - -static -u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g, - NFAVertex pv, bool prefilter) { - CharReach accept_cr; - DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first()); - - if (prefilter) { - /* a later prefilter stage make weaken the lead up so we can't be sure - * that all the triggers will be squashing the puffette. */ - return 0; - } - - /* TODO: inspect further back in the pattern */ - for (auto u : inv_adjacent_vertices_range(pv, g)) { - accept_cr |= g[u].char_reach; - } - - DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count()); - - if ((accept_cr & cr).any()) { - return 0; /* the accept byte doesn't always kill the puffette. TODO: - * maybe if we look further back we could find something that - * would kill the puffette... */ - } - DEBUG_PRINTF("returning squash distance of %u\n", min_width); - return min_width; -} - -/** Gives a stronger puff trigger when the trigger is connected to a wide - * cyclic state (aside from sds) */ -static -void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) { - DEBUG_PRINTF("attempting to improve puff trigger\n"); - assert(!nodes->empty()); - const CharReach &puff_cr = g[nodes->back()].char_reach; - if (puff_cr.all()) { - return; /* we can't really do much with this one */ - } - - /* add the runway */ - DEBUG_PRINTF("backing off - allowing a decent header\n"); - assert(nodes->size() > HEAD_BACKOFF); - for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) { - nodes->pop_back(); - } - *a = nodes->back(); - nodes->pop_back(); -} - -static -void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, - const CharReach &cr, const ReportID report, u32 width, - bool fixed_depth, bool unbounded, bool auto_restart, - RoseBuild &rose, ReportManager &rm, - flat_set<ReportID> &chain_reports, bool prefilter) { - DEBUG_PRINTF("constructing Puff for report %u\n", report); + return false; + } + + if (!hasSelfLoop(s, g) && s != g.start) { + DEBUG_PRINTF("no self loop\n"); + return false; + } + + if (s == g.start && !edge(g.startDs, head, g).second) { + DEBUG_PRINTF("not float\n"); + return false; + } + } + + DEBUG_PRINTF("reset on trigger\n"); + return true; +} + +static +u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g, + NFAVertex pv, bool prefilter) { + CharReach accept_cr; + DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first()); + + if (prefilter) { + /* a later prefilter stage make weaken the lead up so we can't be sure + * that all the triggers will be squashing the puffette. */ + return 0; + } + + /* TODO: inspect further back in the pattern */ + for (auto u : inv_adjacent_vertices_range(pv, g)) { + accept_cr |= g[u].char_reach; + } + + DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count()); + + if ((accept_cr & cr).any()) { + return 0; /* the accept byte doesn't always kill the puffette. TODO: + * maybe if we look further back we could find something that + * would kill the puffette... */ + } + DEBUG_PRINTF("returning squash distance of %u\n", min_width); + return min_width; +} + +/** Gives a stronger puff trigger when the trigger is connected to a wide + * cyclic state (aside from sds) */ +static +void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) { + DEBUG_PRINTF("attempting to improve puff trigger\n"); + assert(!nodes->empty()); + const CharReach &puff_cr = g[nodes->back()].char_reach; + if (puff_cr.all()) { + return; /* we can't really do much with this one */ + } + + /* add the runway */ + DEBUG_PRINTF("backing off - allowing a decent header\n"); + assert(nodes->size() > HEAD_BACKOFF); + for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) { + nodes->pop_back(); + } + *a = nodes->back(); + nodes->pop_back(); +} + +static +void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, + const CharReach &cr, const ReportID report, u32 width, + bool fixed_depth, bool unbounded, bool auto_restart, + RoseBuild &rose, ReportManager &rm, + flat_set<ReportID> &chain_reports, bool prefilter) { + DEBUG_PRINTF("constructing Puff for report %u\n", report); DEBUG_PRINTF("a = %zu\n", g[a].index); - + const Report &puff_report = rm.getReport(report); const bool simple_exhaust = isSimpleExhaustible(puff_report); - const bool pureAnchored = a == g.start && singleStart(g); - if (!pureAnchored) { - if (a == g.startDs || a == g.start) { - DEBUG_PRINTF("add outfix ar(false)\n"); - + const bool pureAnchored = a == g.start && singleStart(g); + if (!pureAnchored) { + if (a == g.startDs || a == g.start) { + DEBUG_PRINTF("add outfix ar(false)\n"); + raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); - rose.addOutfix(rp); - return; - } - - DEBUG_PRINTF("add chain tail\n"); - u32 qi = ~0U; - u32 event = MQE_TOP; - raw_puff rp(width, unbounded, report, cr); - rose.addChainTail(rp, &qi, &event); - assert(qi != ~0U); - u32 squashDistance = allowedSquashDistance(cr, width, g, puffv, - prefilter); - + rose.addOutfix(rp); + return; + } + + DEBUG_PRINTF("add chain tail\n"); + u32 qi = ~0U; + u32 event = MQE_TOP; + raw_puff rp(width, unbounded, report, cr); + rose.addChainTail(rp, &qi, &event); + assert(qi != ~0U); + u32 squashDistance = allowedSquashDistance(cr, width, g, puffv, + prefilter); + Report ir = makeMpvTrigger(event, squashDistance); - /* only need to trigger once if floatingUnboundedDot */ - bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth; - if (floatingUnboundedDot) { - ir.ekey = rm.getUnassociatedExhaustibleKey(); - } - ReportID id = rm.getInternalId(ir); - chain_reports.insert(id); - } else { - DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart); - assert(!auto_restart || unbounded); + /* only need to trigger once if floatingUnboundedDot */ + bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth; + if (floatingUnboundedDot) { + ir.ekey = rm.getUnassociatedExhaustibleKey(); + } + ReportID id = rm.getInternalId(ir); + chain_reports.insert(id); + } else { + DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart); + assert(!auto_restart || unbounded); raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); - rose.addOutfix(rp); - } -} - -static -bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, - set<NFAVertex> &dead, const CompileContext &cc, - bool prefilter) { - DEBUG_PRINTF("hello\n"); - vector<NFAVertex> nodes; - const CharReach &cr = g[a].char_reach; - bool isDot = cr.all(); - bool unbounded = false; - bool exhaustible = can_exhaust(g, rm); - - while (true) { - if (is_special(a, g)) { - DEBUG_PRINTF("stopped puffing due to special vertex\n"); - break; - } - - if (g[a].char_reach != cr) { - DEBUG_PRINTF("stopped puffing due to change in character " - "reachability\n"); - break; - } - - if (proper_in_degree(a, g) != 1) { - DEBUG_PRINTF("stopped puffing due to in degree != 1\n"); - break; - } - - size_t outDegree = out_degree(a, g); - if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) { - DEBUG_PRINTF("stopping puffing due to out degree\n"); - break; - } - - if (hasSelfLoop(a, g)) { - DEBUG_PRINTF("has self-loop, marking unbounded\n"); - unbounded = true; - } - - nodes.push_back(a); + rose.addOutfix(rp); + } +} + +static +bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, + set<NFAVertex> &dead, const CompileContext &cc, + bool prefilter) { + DEBUG_PRINTF("hello\n"); + vector<NFAVertex> nodes; + const CharReach &cr = g[a].char_reach; + bool isDot = cr.all(); + bool unbounded = false; + bool exhaustible = can_exhaust(g, rm); + + while (true) { + if (is_special(a, g)) { + DEBUG_PRINTF("stopped puffing due to special vertex\n"); + break; + } + + if (g[a].char_reach != cr) { + DEBUG_PRINTF("stopped puffing due to change in character " + "reachability\n"); + break; + } + + if (proper_in_degree(a, g) != 1) { + DEBUG_PRINTF("stopped puffing due to in degree != 1\n"); + break; + } + + size_t outDegree = out_degree(a, g); + if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) { + DEBUG_PRINTF("stopping puffing due to out degree\n"); + break; + } + + if (hasSelfLoop(a, g)) { + DEBUG_PRINTF("has self-loop, marking unbounded\n"); + unbounded = true; + } + + nodes.push_back(a); DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index, - in_degree(a, g)); - - a = getSoleSourceVertex(g, a); - - assert(a); /* already checked that old a had a proper in degree of 1 */ - - // Snark: we can't handle this case, because we can only handle a - // single report ID on a vertex - if (is_match_vertex(a, g)) { - DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n"); - if (!nodes.empty()) { - nodes.pop_back(); - } - break; - } - } - - if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) { - for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) { - if (is_special(u, g)) { - DEBUG_PRINTF("pop\n"); - a = nodes.back(); - nodes.pop_back(); - break; - } - } - } - - if (a != g.startDs && edge(g.startDs, a, g).second - && proper_out_degree(a, g) == 1 - && g[a].char_reach == cr) { - nodes.push_back(a); - a = g.startDs; - } - - bool auto_restart = false; - + in_degree(a, g)); + + a = getSoleSourceVertex(g, a); + + assert(a); /* already checked that old a had a proper in degree of 1 */ + + // Snark: we can't handle this case, because we can only handle a + // single report ID on a vertex + if (is_match_vertex(a, g)) { + DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n"); + if (!nodes.empty()) { + nodes.pop_back(); + } + break; + } + } + + if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) { + for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) { + if (is_special(u, g)) { + DEBUG_PRINTF("pop\n"); + a = nodes.back(); + nodes.pop_back(); + break; + } + } + } + + if (a != g.startDs && edge(g.startDs, a, g).second + && proper_out_degree(a, g) == 1 + && g[a].char_reach == cr) { + nodes.push_back(a); + a = g.startDs; + } + + bool auto_restart = false; + DEBUG_PRINTF("a = %zu\n", g[a].index); - - if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { + + if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index); - if (nodes.size() < MIN_PUFF_LENGTH) { - return false; - } else { - DEBUG_PRINTF("mark unbounded\n"); - unbounded = true; - a = g.start; - auto_restart = !isDot; - } - } - - bool supported = false; - bool fixed_depth = isFixedDepth(g, nodes.back()); - - if (exhaustible) { - supported = true; - } else if (fixed_depth) { - supported = true; - } else if (unbounded) { - /* any C{n, } can be supported as all ranges will be squashed together - * only need to track the first */ - supported = true; - } else if (triggerResetsPuff(g, nodes.back())) { - supported = true; - } else if (triggerFloodsPuff(g, nodes.back())) { - DEBUG_PRINTF("trigger floods puff\n"); - supported = true; - unbounded = true; - } - - if (!supported) { - DEBUG_PRINTF("not supported\n"); - return false; - } - - if (cc.grey.puffImproveHead && a != g.start) { - if (edge(g.startDs, a, g).second) { - goto skip_improve; /* direct sds cases are better handled by auto - * restarting puffettes */ - } - - if (fixed_depth) { - goto skip_improve; /* no danger of trigger floods */ - } - - /* if we come after something literalish don't bother */ - if (g[a].char_reach.count() <= 2 - && in_degree(a, g) == 1 - && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) { - goto skip_improve; - } - - if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) { - return false; /* not enough of the puff left to worth bothering - about */ - } - - improveHead(g, &a, &nodes); - skip_improve:; - } - - assert(!nodes.empty()); - const auto &reports = g[nodes[0]].reports; - assert(!reports.empty()); - - for (auto report : reports) { - const Report &ir = rm.getReport(report); - const bool highlander = ir.ekey != INVALID_EKEY; - if (!unbounded && highlander && !isSimpleExhaustible(ir)) { - DEBUG_PRINTF("report %u is bounded highlander but not simple " - "exhaustible\n", - report); - return false; - } - - if (ir.type == INTERNAL_ROSE_CHAIN) { - DEBUG_PRINTF("puffettes cannot be chained together\n"); - return false; - } - } - - NFAVertex puffv = nodes.back(); + if (nodes.size() < MIN_PUFF_LENGTH) { + return false; + } else { + DEBUG_PRINTF("mark unbounded\n"); + unbounded = true; + a = g.start; + auto_restart = !isDot; + } + } + + bool supported = false; + bool fixed_depth = isFixedDepth(g, nodes.back()); + + if (exhaustible) { + supported = true; + } else if (fixed_depth) { + supported = true; + } else if (unbounded) { + /* any C{n, } can be supported as all ranges will be squashed together + * only need to track the first */ + supported = true; + } else if (triggerResetsPuff(g, nodes.back())) { + supported = true; + } else if (triggerFloodsPuff(g, nodes.back())) { + DEBUG_PRINTF("trigger floods puff\n"); + supported = true; + unbounded = true; + } + + if (!supported) { + DEBUG_PRINTF("not supported\n"); + return false; + } + + if (cc.grey.puffImproveHead && a != g.start) { + if (edge(g.startDs, a, g).second) { + goto skip_improve; /* direct sds cases are better handled by auto + * restarting puffettes */ + } + + if (fixed_depth) { + goto skip_improve; /* no danger of trigger floods */ + } + + /* if we come after something literalish don't bother */ + if (g[a].char_reach.count() <= 2 + && in_degree(a, g) == 1 + && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) { + goto skip_improve; + } + + if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) { + return false; /* not enough of the puff left to worth bothering + about */ + } + + improveHead(g, &a, &nodes); + skip_improve:; + } + + assert(!nodes.empty()); + const auto &reports = g[nodes[0]].reports; + assert(!reports.empty()); + + for (auto report : reports) { + const Report &ir = rm.getReport(report); + const bool highlander = ir.ekey != INVALID_EKEY; + if (!unbounded && highlander && !isSimpleExhaustible(ir)) { + DEBUG_PRINTF("report %u is bounded highlander but not simple " + "exhaustible\n", + report); + return false; + } + + if (ir.type == INTERNAL_ROSE_CHAIN) { + DEBUG_PRINTF("puffettes cannot be chained together\n"); + return false; + } + } + + NFAVertex puffv = nodes.back(); assert(puffv != NGHolder::null_vertex()); - u32 width = countChain(g, nodes.back()); - - flat_set<ReportID> chain_reports; - - for (auto report : reports) { - constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded, - auto_restart, rose, rm, chain_reports, prefilter); - } - - if (!chain_reports.empty()) { - wireNewAccepts(g, puffv, chain_reports); - } - - dead.insert(nodes.begin(), nodes.end()); - return true; -} - -bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g, - bool prefilter, const CompileContext &cc) { - if (!cc.grey.allowPuff) { - return false; - } - - size_t count = 0; - set<NFAVertex> dead; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (doComponent(rose, rm, g, v, dead, cc, prefilter)) { - count++; - } - } - - if (!dead.empty()) { - remove_vertices(dead, g); - pruneUseless(g); - } - - DEBUG_PRINTF("puffs: %zu\n", count); - return num_vertices(g) <= N_SPECIALS; -} - -bool isPuffable(const NGHolder &g, bool fixed_depth, - const ReportManager &rm, const Grey &grey) { - if (!grey.allowPuff) { - return false; - } - - if (!onlyOneTop(g)) { - DEBUG_PRINTF("more than one top\n"); - return false; - } - - const set<ReportID> reports = all_reports(g); - if (reports.size() != 1) { - DEBUG_PRINTF("too many reports\n"); - return false; - } - - const Report &ir = rm.getReport(*reports.begin()); - - if (ir.type == INTERNAL_ROSE_CHAIN) { - DEBUG_PRINTF("puffettes cannot be chained together\n"); - return false; - } - - PureRepeat repeat; - if (!isPureRepeat(g, repeat)) { - DEBUG_PRINTF("not pure bounded repeat\n"); - return false; - } - - if (repeat.bounds.min == depth(0)) { - DEBUG_PRINTF("repeat min bound is zero\n"); - return false; - } - - // We can puff if: - // (a) repeat is {N,}; or - // (b) repeat is {N} and fixed-depth, or highlander (and will accept the - // first match) - - DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str()); - - if (repeat.bounds.max.is_infinite()) { - return true; - } - - if (repeat.bounds.min == repeat.bounds.max) { - if (fixed_depth) { - DEBUG_PRINTF("fixed depth\n"); - return true; - } - - const bool highlander = ir.ekey != INVALID_EKEY; - - // If we're highlander, we must be simple-exhaustible as well. - if (highlander && isSimpleExhaustible(ir)) { - return true; - } - } - - return false; -} - -} // namespace ue2 + u32 width = countChain(g, nodes.back()); + + flat_set<ReportID> chain_reports; + + for (auto report : reports) { + constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded, + auto_restart, rose, rm, chain_reports, prefilter); + } + + if (!chain_reports.empty()) { + wireNewAccepts(g, puffv, chain_reports); + } + + dead.insert(nodes.begin(), nodes.end()); + return true; +} + +bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g, + bool prefilter, const CompileContext &cc) { + if (!cc.grey.allowPuff) { + return false; + } + + size_t count = 0; + set<NFAVertex> dead; + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (doComponent(rose, rm, g, v, dead, cc, prefilter)) { + count++; + } + } + + if (!dead.empty()) { + remove_vertices(dead, g); + pruneUseless(g); + } + + DEBUG_PRINTF("puffs: %zu\n", count); + return num_vertices(g) <= N_SPECIALS; +} + +bool isPuffable(const NGHolder &g, bool fixed_depth, + const ReportManager &rm, const Grey &grey) { + if (!grey.allowPuff) { + return false; + } + + if (!onlyOneTop(g)) { + DEBUG_PRINTF("more than one top\n"); + return false; + } + + const set<ReportID> reports = all_reports(g); + if (reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return false; + } + + const Report &ir = rm.getReport(*reports.begin()); + + if (ir.type == INTERNAL_ROSE_CHAIN) { + DEBUG_PRINTF("puffettes cannot be chained together\n"); + return false; + } + + PureRepeat repeat; + if (!isPureRepeat(g, repeat)) { + DEBUG_PRINTF("not pure bounded repeat\n"); + return false; + } + + if (repeat.bounds.min == depth(0)) { + DEBUG_PRINTF("repeat min bound is zero\n"); + return false; + } + + // We can puff if: + // (a) repeat is {N,}; or + // (b) repeat is {N} and fixed-depth, or highlander (and will accept the + // first match) + + DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str()); + + if (repeat.bounds.max.is_infinite()) { + return true; + } + + if (repeat.bounds.min == repeat.bounds.max) { + if (fixed_depth) { + DEBUG_PRINTF("fixed depth\n"); + return true; + } + + const bool highlander = ir.ekey != INVALID_EKEY; + + // If we're highlander, we must be simple-exhaustible as well. + if (highlander && isSimpleExhaustible(ir)) { + return true; + } + } + + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h index c31e7540ba..af0237a594 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h @@ -1,56 +1,56 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Puff construction from NGHolder. - */ - -#ifndef NG_PUFF_H -#define NG_PUFF_H - -namespace ue2 { - -struct CompileContext; -struct Grey; -class RoseBuild; -class NGHolder; -class ReportManager; - -/** \brief Split off portions of the graph that are implementable as Puff - * engines. Returns true if the entire graph is consumed. */ -bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g, - bool prefilter, const CompileContext &cc); - -/** \brief True if the entire graph in \a g could be constructed as a Puff - * engine. */ -bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm, - const Grey &grey); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Puff construction from NGHolder. + */ + +#ifndef NG_PUFF_H +#define NG_PUFF_H + +namespace ue2 { + +struct CompileContext; +struct Grey; +class RoseBuild; +class NGHolder; +class ReportManager; + +/** \brief Split off portions of the graph that are implementable as Puff + * engines. Returns true if the entire graph is consumed. */ +bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g, + bool prefilter, const CompileContext &cc); + +/** \brief True if the entire graph in \a g could be constructed as a Puff + * engine. */ +bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm, + const Grey &grey); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp index 06b9daeeca..fc46907024 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp @@ -1,899 +1,899 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph reductions. - * - * This code attempts to make the NFA graph smaller by performing a number of - * local transformations: - * - * ### (1) removal of redundant vertices: - * - * v is redundant wrt to u if succ(v) is a subset of succ(u) - * AND pred(v) is a subset of pred(u) - * AND cr(v) is a subset of cr(u) - * - * ### (2) 'diamond' transformation: - * - * given succ(v) == succ(u) and pred(v) == pred(u), - * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v), - * and cr(w) = union(cr(v), cr(u)) - * - * ### (3) locally identifiable left equivalence: - * - * given pred(v) == pred(u) (**) and cr(v) == cr(u), - * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v), - * and succ(w) = union(succ(v), succ(u)) - * - * ### (4) locally identifiable right equivalence: - * - * given succ(v) == succ(u) (**) and cr(v) == cr(u), - * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v), - * and pred(w) = union(pred(v), pred(u)) - * - * NOTE (**): for left and right equivalence, we can also do the transform if - * set(u) contains u, set(v) contains v and the sets are otherwise equal. This - * enables equivalent vertices with self-loops to be merged. - * - * If v and u raise accepts, they can only be merged if they raise the same - * report IDs. - * - * Transformations are applied repeatedly until the graph stops changing. - * - * Note that the final graph may depend on the order in which these - * transformations are applied. In order to reduce the non-determinism the - * following order is imposed: (1); (2); (3) + (4). - */ -#include "ng_redundancy.h" - -#include "ng_holder.h" -#include "ng_calc_components.h" -#include "ng_dominators.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph reductions. + * + * This code attempts to make the NFA graph smaller by performing a number of + * local transformations: + * + * ### (1) removal of redundant vertices: + * + * v is redundant wrt to u if succ(v) is a subset of succ(u) + * AND pred(v) is a subset of pred(u) + * AND cr(v) is a subset of cr(u) + * + * ### (2) 'diamond' transformation: + * + * given succ(v) == succ(u) and pred(v) == pred(u), + * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v), + * and cr(w) = union(cr(v), cr(u)) + * + * ### (3) locally identifiable left equivalence: + * + * given pred(v) == pred(u) (**) and cr(v) == cr(u), + * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v), + * and succ(w) = union(succ(v), succ(u)) + * + * ### (4) locally identifiable right equivalence: + * + * given succ(v) == succ(u) (**) and cr(v) == cr(u), + * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v), + * and pred(w) = union(pred(v), pred(u)) + * + * NOTE (**): for left and right equivalence, we can also do the transform if + * set(u) contains u, set(v) contains v and the sets are otherwise equal. This + * enables equivalent vertices with self-loops to be merged. + * + * If v and u raise accepts, they can only be merged if they raise the same + * report IDs. + * + * Transformations are applied repeatedly until the graph stops changing. + * + * Note that the final graph may depend on the order in which these + * transformations are applied. In order to reduce the non-determinism the + * following order is imposed: (1); (2); (3) + (4). + */ +#include "ng_redundancy.h" + +#include "ng_holder.h" +#include "ng_calc_components.h" +#include "ng_dominators.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" - -#include <algorithm> -#include <cassert> -#include <map> -#include <set> -#include <vector> - -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/reverse_graph.hpp> - -using namespace std; - -namespace ue2 { - -namespace { - -/** Precalculated (and maintained) information about a vertex. */ -class VertexInfo { -public: - flat_set<NFAVertex> pred; //!< predecessors of this vertex - flat_set<NFAVertex> succ; //!< successors of this vertex - bool isAccept = false; //!< does this vertex lead to accept? - bool isRemoved = false; //!< have we already removed this vertex? - - size_t inDegree() const { return pred.size(); } - size_t outDegree() const { return succ.size(); } -}; - -class VertexInfoMap { -public: - explicit VertexInfoMap(const NGHolder &gg) - : g(gg), infos(num_vertices(gg)) {} - VertexInfo &operator[](NFAVertex v) { - u32 i = g[v].index; - assert(i < infos.size()); - return infos[i]; - } - - const VertexInfo &operator[](NFAVertex v) const { - u32 i = g[v].index; - assert(i < infos.size()); - return infos[i]; - } - -private: - const NGHolder &g; - vector<VertexInfo> infos; -}; - -} // namespace - -/** Populates the info map with their predecessor and successor states, and - * whether they are accept states. */ -static -void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { - for (auto v : vertices_range(g)) { - VertexInfo &info = infoMap[v]; - assert(info.pred.empty() && info.succ.empty()); - - // Build successor and predecessor sets - insert(&info.pred, inv_adjacent_vertices(v, g)); - insert(&info.succ, adjacent_vertices(v, g)); - - // Note whether the vertex is an accept state - if (!is_special(v, g)) { - if (contains(info.succ, g.accept) - || contains(info.succ, g.acceptEod)) { - info.isAccept = true; - } - } - } -} - -/** Helper function to take the intersection of two sorted vertex sets - * in-place. */ -static -void inplaceIntersection(vector<NFAVertex> &vset1, - const flat_set<NFAVertex> &vset2) { +#include "util/graph_range.h" + +#include <algorithm> +#include <cassert> +#include <map> +#include <set> +#include <vector> + +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/reverse_graph.hpp> + +using namespace std; + +namespace ue2 { + +namespace { + +/** Precalculated (and maintained) information about a vertex. */ +class VertexInfo { +public: + flat_set<NFAVertex> pred; //!< predecessors of this vertex + flat_set<NFAVertex> succ; //!< successors of this vertex + bool isAccept = false; //!< does this vertex lead to accept? + bool isRemoved = false; //!< have we already removed this vertex? + + size_t inDegree() const { return pred.size(); } + size_t outDegree() const { return succ.size(); } +}; + +class VertexInfoMap { +public: + explicit VertexInfoMap(const NGHolder &gg) + : g(gg), infos(num_vertices(gg)) {} + VertexInfo &operator[](NFAVertex v) { + u32 i = g[v].index; + assert(i < infos.size()); + return infos[i]; + } + + const VertexInfo &operator[](NFAVertex v) const { + u32 i = g[v].index; + assert(i < infos.size()); + return infos[i]; + } + +private: + const NGHolder &g; + vector<VertexInfo> infos; +}; + +} // namespace + +/** Populates the info map with their predecessor and successor states, and + * whether they are accept states. */ +static +void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { + for (auto v : vertices_range(g)) { + VertexInfo &info = infoMap[v]; + assert(info.pred.empty() && info.succ.empty()); + + // Build successor and predecessor sets + insert(&info.pred, inv_adjacent_vertices(v, g)); + insert(&info.succ, adjacent_vertices(v, g)); + + // Note whether the vertex is an accept state + if (!is_special(v, g)) { + if (contains(info.succ, g.accept) + || contains(info.succ, g.acceptEod)) { + info.isAccept = true; + } + } + } +} + +/** Helper function to take the intersection of two sorted vertex sets + * in-place. */ +static +void inplaceIntersection(vector<NFAVertex> &vset1, + const flat_set<NFAVertex> &vset2) { const NFAVertex GONE = NGHolder::null_vertex(); - - vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end(); - flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end(); - - while ((it != ite) && (jt != jte)) { - assert(*it != GONE); - - if (*it < *jt) { - // present in vset1 but not in vset2. Set to null, remove in a - // second pass. - *it = GONE; - ++it; - } else if (*jt < *it) { - // present in vset2 but not in vset1, skip. - ++jt; - } else { - // present in both sets. - ++it; ++jt; - } - } - - // Left overs are only in that set. - vset1.erase(it, ite); - - // Remove nulls created above. - vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end()); -} - -/** Find the intersection of the successors of our predecessors. */ -static -void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet, - const VertexInfoMap &infoMap, - vector<NFAVertex> &intersection, - bool considerSelf = true /* follow self loops */) { - /* find a good seed for the intersection */ - const flat_set<NFAVertex> *best = nullptr; - for (auto u : predSet) { - if (!considerSelf && u == v) { - continue; - } - - const flat_set<NFAVertex> &succSet = infoMap[u].succ; - if (!best || succSet.size() <= best->size()) { - best = &succSet; - - // Break out if we've reduced our intersection to [v] - if (best->size() == 1) { - assert(*(best->begin()) == v); - intersection.push_back(v); - return; - } - } - } - - if (best) { - insert(&intersection, intersection.end(), *best); - } - - for (auto u : predSet) { - if (!considerSelf && u == v) { - continue; - } - - inplaceIntersection(intersection, infoMap[u].succ); - - // Check: intersection should always be at least size 1 - assert(!intersection.empty()); - - // Break out if we've reduced our intersection to [v] - if (intersection.size() == 1) { - assert(*intersection.begin() == v); - return; - } - } -} - -/** Find the intersection of the predecessors of our successors. */ -static -void predSuccIntersection(const NFAVertex v, - const flat_set<NFAVertex> &succSet, - const VertexInfoMap &infoMap, - vector<NFAVertex> &intersection, - bool considerSelf = true /* follow self loops */) { - /* find a good seed for the intersection */ - const flat_set<NFAVertex> *best = nullptr; - for (auto w : succSet) { - if (!considerSelf && w == v) { - continue; - } - - const flat_set<NFAVertex> &predSet = infoMap[w].pred; - if (!best || predSet.size() <= best->size()) { - best = &predSet; - - // Break out if we've reduced our intersection to [v] - if (best->size() == 1) { - assert(*(best->begin()) == v); - intersection.push_back(v); - return; - } - } - } - - if (best) { - insert(&intersection, intersection.end(), *best); - } - - for (auto w : succSet) { - if (!considerSelf && w == v) { - continue; - } - - inplaceIntersection(intersection, infoMap[w].pred); - - // Check: intersection should always be at least size 1 - assert(!intersection.empty()); - - // Break out if we've reduced our intersection to [v] - if (intersection.size() == 1) { - assert(*intersection.begin() == v); - return; - } - } -} - -/** Update containers to take into account the removal of vertex v. */ -static -void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, - set<NFAVertex> &removable) { - VertexInfo &info = infoMap[v]; - assert(!info.isRemoved); - assert(!contains(removable, v)); - info.isRemoved = true; - removable.insert(v); - - // remove v from its predecessors' successors - for (auto u : info.pred) { - infoMap[u].succ.erase(v); - } - - // remove v from its successors' predecessors - for (auto w : info.succ) { - infoMap[w].pred.erase(v); - } -} - -static -bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { + + vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end(); + flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end(); + + while ((it != ite) && (jt != jte)) { + assert(*it != GONE); + + if (*it < *jt) { + // present in vset1 but not in vset2. Set to null, remove in a + // second pass. + *it = GONE; + ++it; + } else if (*jt < *it) { + // present in vset2 but not in vset1, skip. + ++jt; + } else { + // present in both sets. + ++it; ++jt; + } + } + + // Left overs are only in that set. + vset1.erase(it, ite); + + // Remove nulls created above. + vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end()); +} + +/** Find the intersection of the successors of our predecessors. */ +static +void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet, + const VertexInfoMap &infoMap, + vector<NFAVertex> &intersection, + bool considerSelf = true /* follow self loops */) { + /* find a good seed for the intersection */ + const flat_set<NFAVertex> *best = nullptr; + for (auto u : predSet) { + if (!considerSelf && u == v) { + continue; + } + + const flat_set<NFAVertex> &succSet = infoMap[u].succ; + if (!best || succSet.size() <= best->size()) { + best = &succSet; + + // Break out if we've reduced our intersection to [v] + if (best->size() == 1) { + assert(*(best->begin()) == v); + intersection.push_back(v); + return; + } + } + } + + if (best) { + insert(&intersection, intersection.end(), *best); + } + + for (auto u : predSet) { + if (!considerSelf && u == v) { + continue; + } + + inplaceIntersection(intersection, infoMap[u].succ); + + // Check: intersection should always be at least size 1 + assert(!intersection.empty()); + + // Break out if we've reduced our intersection to [v] + if (intersection.size() == 1) { + assert(*intersection.begin() == v); + return; + } + } +} + +/** Find the intersection of the predecessors of our successors. */ +static +void predSuccIntersection(const NFAVertex v, + const flat_set<NFAVertex> &succSet, + const VertexInfoMap &infoMap, + vector<NFAVertex> &intersection, + bool considerSelf = true /* follow self loops */) { + /* find a good seed for the intersection */ + const flat_set<NFAVertex> *best = nullptr; + for (auto w : succSet) { + if (!considerSelf && w == v) { + continue; + } + + const flat_set<NFAVertex> &predSet = infoMap[w].pred; + if (!best || predSet.size() <= best->size()) { + best = &predSet; + + // Break out if we've reduced our intersection to [v] + if (best->size() == 1) { + assert(*(best->begin()) == v); + intersection.push_back(v); + return; + } + } + } + + if (best) { + insert(&intersection, intersection.end(), *best); + } + + for (auto w : succSet) { + if (!considerSelf && w == v) { + continue; + } + + inplaceIntersection(intersection, infoMap[w].pred); + + // Check: intersection should always be at least size 1 + assert(!intersection.empty()); + + // Break out if we've reduced our intersection to [v] + if (intersection.size() == 1) { + assert(*intersection.begin() == v); + return; + } + } +} + +/** Update containers to take into account the removal of vertex v. */ +static +void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, + set<NFAVertex> &removable) { + VertexInfo &info = infoMap[v]; + assert(!info.isRemoved); + assert(!contains(removable, v)); + info.isRemoved = true; + removable.insert(v); + + // remove v from its predecessors' successors + for (auto u : info.pred) { + infoMap[u].succ.erase(v); + } + + // remove v from its successors' predecessors + for (auto w : info.succ) { + infoMap[w].pred.erase(v); + } +} + +static +bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { NFAEdge e = edge(g.start, v, g); return e && !g[e].tops.empty(); -} - -/** Transform (1), removal of redundant vertices. */ -static -bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, - set<NFAVertex> &removable) { - /* useless merges can be done in any order, no need to take any care with - * ordering */ - - // Temporary vectors used for intersections below - vector<NFAVertex> succPredSet, predSuccSet, intersection; - - bool changed = false; - for (auto v : vertices_range(g)) { - VertexInfo &info = infoMap[v]; - - if (info.isRemoved) { - continue; - } - - assert(!contains(removable, v)); - - if (is_special(v, g)) { - continue; - } - - /* we do not need to check for out edge tops - as only specials (start) - * can have tops and they are already disqualified. */ - if (hasInEdgeTops(g, v)) { - continue; // Conservatively skip anything with nonzero tops. - } - - if (info.pred.empty() || info.succ.empty()) { +} + +/** Transform (1), removal of redundant vertices. */ +static +bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, + set<NFAVertex> &removable) { + /* useless merges can be done in any order, no need to take any care with + * ordering */ + + // Temporary vectors used for intersections below + vector<NFAVertex> succPredSet, predSuccSet, intersection; + + bool changed = false; + for (auto v : vertices_range(g)) { + VertexInfo &info = infoMap[v]; + + if (info.isRemoved) { + continue; + } + + assert(!contains(removable, v)); + + if (is_special(v, g)) { + continue; + } + + /* we do not need to check for out edge tops - as only specials (start) + * can have tops and they are already disqualified. */ + if (hasInEdgeTops(g, v)) { + continue; // Conservatively skip anything with nonzero tops. + } + + if (info.pred.empty() || info.succ.empty()) { DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index); - assert(0); // non-special states should always have succ/pred lists - continue; - } - - // The following cases are more complex and rely on the intersection of - // Succ(Pred(v)) and Pred(Succ(v)) - - // Compute intersections, operating on the smaller set first - // Note that we use vectors here, as set_intersection underneath - // guarantees sorted output, and vectors were quite a bit - // faster than sets or lists. - - succPredSet.clear(); - predSuccSet.clear(); - - if (info.pred.size() <= info.succ.size()) { - succPredIntersection(v, info.pred, infoMap, succPredSet); - if (succPredSet.size() == 1) { - // nobody in here but us chickens - assert(*succPredSet.begin() == v); - continue; - } - predSuccIntersection(v, info.succ, infoMap, predSuccSet); - if (predSuccSet.size() == 1) { - assert(*predSuccSet.begin() == v); - continue; - } - } else { - predSuccIntersection(v, info.succ, infoMap, predSuccSet); - if (predSuccSet.size() == 1) { - assert(*predSuccSet.begin() == v); - continue; - } - succPredIntersection(v, info.pred, infoMap, succPredSet); - if (succPredSet.size() == 1) { - assert(*succPredSet.begin() == v); - continue; - } - } - - // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) - intersection.clear(); - set_intersection(succPredSet.begin(), succPredSet.end(), - predSuccSet.begin(), predSuccSet.end(), - back_inserter(intersection)); - - /* Boring if it is just us in the intersection */ - if (intersection.size() < 2) { - continue; - } - - // Compare char_reach, mark v for removal if any members of - // the intersection have an equal or greater reach - const CharReach &currReach = g[v].char_reach; - const auto &currReports = g[v].reports; - for (auto t : intersection) { - const VertexInfo &info2 = infoMap[t]; - - /* start is never a succ of a state, so will never be in the - * predsucc/succpred intersection */ - assert(t != g.start); - - if (t == v || info2.isRemoved) { - continue; - } - - // For each candidate C to make V redundant, check: - // if V is an accept state, C must be an accept state for - // the same pattern - // pred(C) is a superset of pred(V) - // succ(C) is a superset of succ(V) - // reach(C) is a superset of reach(V) - // - // Note: pred/sec tests are covered by the intersections - // calculated above. - - /* note: links to accepts are also tracked in succs */ - if (info.isAccept && currReports != g[t].reports) { - continue; - } - - if (som) { - if (t == g.startDs) { - continue; - } - if (is_virtual_start(t, g) != is_virtual_start(v, g)) { - continue; - } - } - - /* we do not need to check for out edge tops - as only start - * can have tops and it has already been ruled out. */ - if (hasInEdgeTops(g, t)) { - continue; // Conservatively skip anything with nonzero tops. - } - - CharReach &otherReach = g[t].char_reach; - if (currReach.isSubsetOf(otherReach)) { + assert(0); // non-special states should always have succ/pred lists + continue; + } + + // The following cases are more complex and rely on the intersection of + // Succ(Pred(v)) and Pred(Succ(v)) + + // Compute intersections, operating on the smaller set first + // Note that we use vectors here, as set_intersection underneath + // guarantees sorted output, and vectors were quite a bit + // faster than sets or lists. + + succPredSet.clear(); + predSuccSet.clear(); + + if (info.pred.size() <= info.succ.size()) { + succPredIntersection(v, info.pred, infoMap, succPredSet); + if (succPredSet.size() == 1) { + // nobody in here but us chickens + assert(*succPredSet.begin() == v); + continue; + } + predSuccIntersection(v, info.succ, infoMap, predSuccSet); + if (predSuccSet.size() == 1) { + assert(*predSuccSet.begin() == v); + continue; + } + } else { + predSuccIntersection(v, info.succ, infoMap, predSuccSet); + if (predSuccSet.size() == 1) { + assert(*predSuccSet.begin() == v); + continue; + } + succPredIntersection(v, info.pred, infoMap, succPredSet); + if (succPredSet.size() == 1) { + assert(*succPredSet.begin() == v); + continue; + } + } + + // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) + intersection.clear(); + set_intersection(succPredSet.begin(), succPredSet.end(), + predSuccSet.begin(), predSuccSet.end(), + back_inserter(intersection)); + + /* Boring if it is just us in the intersection */ + if (intersection.size() < 2) { + continue; + } + + // Compare char_reach, mark v for removal if any members of + // the intersection have an equal or greater reach + const CharReach &currReach = g[v].char_reach; + const auto &currReports = g[v].reports; + for (auto t : intersection) { + const VertexInfo &info2 = infoMap[t]; + + /* start is never a succ of a state, so will never be in the + * predsucc/succpred intersection */ + assert(t != g.start); + + if (t == v || info2.isRemoved) { + continue; + } + + // For each candidate C to make V redundant, check: + // if V is an accept state, C must be an accept state for + // the same pattern + // pred(C) is a superset of pred(V) + // succ(C) is a superset of succ(V) + // reach(C) is a superset of reach(V) + // + // Note: pred/sec tests are covered by the intersections + // calculated above. + + /* note: links to accepts are also tracked in succs */ + if (info.isAccept && currReports != g[t].reports) { + continue; + } + + if (som) { + if (t == g.startDs) { + continue; + } + if (is_virtual_start(t, g) != is_virtual_start(v, g)) { + continue; + } + } + + /* we do not need to check for out edge tops - as only start + * can have tops and it has already been ruled out. */ + if (hasInEdgeTops(g, t)) { + continue; // Conservatively skip anything with nonzero tops. + } + + CharReach &otherReach = g[t].char_reach; + if (currReach.isSubsetOf(otherReach)) { DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", - g[v].index, g[t].index); - markForRemoval(v, infoMap, removable); - changed = true; - break; - } - } - } - - return changed; -} - -/** Transform (2), diamond merge pass. */ -static -bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, - set<NFAVertex> &removable) { - // Temporary vectors used for intersections below - vector<NFAVertex> succPredSet, predSuccSet, intersection; - - bool changed = false; - for (auto v : vertices_range(g)) { - VertexInfo &info = infoMap[v]; - - if (info.isRemoved) { - continue; - } - - assert(!contains(removable, v)); - - if (is_special(v, g)) { - continue; - } - - /* we do not need to check for out edge tops - as only specials (start) - * can have tops and they are already disqualified. */ - if (hasInEdgeTops(g, v)) { - continue; // Conservatively skip anything with nonzero tops. - } - - if (info.pred.empty() || info.succ.empty()) { - assert(0); // non-special states should always have succ/pred lists - continue; - } - - // The following cases are more complex and rely on the intersection of - // Succ(Pred(v)) and Pred(Succ(v)) - - // Compute intersections, operating on the smaller set first - // Note that we use vectors here, as set_intersection underneath - // guarantees sorted output, and vectors were quite a bit faster than - // sets or lists. - - succPredSet.clear(); - predSuccSet.clear(); - - if (info.pred.size() <= info.succ.size()) { - succPredIntersection(v, info.pred, infoMap, succPredSet); - if (succPredSet.size() == 1) { - // nobody in here but us chickens - assert(*succPredSet.begin() == v); - continue; - } - predSuccIntersection(v, info.succ, infoMap, predSuccSet); - if (predSuccSet.size() == 1) { - assert(*predSuccSet.begin() == v); - continue; - } - } else { - predSuccIntersection(v, info.succ, infoMap, predSuccSet); - if (predSuccSet.size() == 1) { - assert(*predSuccSet.begin() == v); - continue; - } - succPredIntersection(v, info.pred, infoMap, succPredSet); - if (succPredSet.size() == 1) { - assert(*succPredSet.begin() == v); - continue; - } - } - - // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) - intersection.clear(); - set_intersection(succPredSet.begin(), succPredSet.end(), - predSuccSet.begin(), predSuccSet.end(), - back_inserter(intersection)); - - /* Boring if it is just us in the intersection */ - if (intersection.size() < 2) { - continue; - } - - const CharReach &currReach = g[v].char_reach; - const auto &currReports = g[v].reports; - for (auto t : intersection) { - const VertexInfo &info2 = infoMap[t]; - - if (t == v || info2.isRemoved || is_special(t, g)) { - continue; - } - - /* note: links to accepts are also tracked in succs */ - if (info.isAccept && currReports != g[t].reports) { - continue; - } - - /* we do not need to check for out edge tops - as only specials - * (start) can have tops and they are already disqualified. */ - if (hasInEdgeTops(g, t)) { - continue; // Conservatively skip anything with nonzero tops. - } - - if (som) { - if (is_virtual_start(v, g) != is_virtual_start(t, g)) { - continue; // can only merge like with like. - } - } - - // If in-degree of v == in-degree of target - // and out-degree of v == out-degree of target - // (because pred and succ are supersets) - // then combine charreach of v into target and remove v - if (info.inDegree() == info2.inDegree() - && info.outDegree() == info2.outDegree()) { - // add character reachability of v into target - CharReach &otherReach = g[t].char_reach; - otherReach |= currReach; - // v can be removed + g[v].index, g[t].index); + markForRemoval(v, infoMap, removable); + changed = true; + break; + } + } + } + + return changed; +} + +/** Transform (2), diamond merge pass. */ +static +bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, + set<NFAVertex> &removable) { + // Temporary vectors used for intersections below + vector<NFAVertex> succPredSet, predSuccSet, intersection; + + bool changed = false; + for (auto v : vertices_range(g)) { + VertexInfo &info = infoMap[v]; + + if (info.isRemoved) { + continue; + } + + assert(!contains(removable, v)); + + if (is_special(v, g)) { + continue; + } + + /* we do not need to check for out edge tops - as only specials (start) + * can have tops and they are already disqualified. */ + if (hasInEdgeTops(g, v)) { + continue; // Conservatively skip anything with nonzero tops. + } + + if (info.pred.empty() || info.succ.empty()) { + assert(0); // non-special states should always have succ/pred lists + continue; + } + + // The following cases are more complex and rely on the intersection of + // Succ(Pred(v)) and Pred(Succ(v)) + + // Compute intersections, operating on the smaller set first + // Note that we use vectors here, as set_intersection underneath + // guarantees sorted output, and vectors were quite a bit faster than + // sets or lists. + + succPredSet.clear(); + predSuccSet.clear(); + + if (info.pred.size() <= info.succ.size()) { + succPredIntersection(v, info.pred, infoMap, succPredSet); + if (succPredSet.size() == 1) { + // nobody in here but us chickens + assert(*succPredSet.begin() == v); + continue; + } + predSuccIntersection(v, info.succ, infoMap, predSuccSet); + if (predSuccSet.size() == 1) { + assert(*predSuccSet.begin() == v); + continue; + } + } else { + predSuccIntersection(v, info.succ, infoMap, predSuccSet); + if (predSuccSet.size() == 1) { + assert(*predSuccSet.begin() == v); + continue; + } + succPredIntersection(v, info.pred, infoMap, succPredSet); + if (succPredSet.size() == 1) { + assert(*succPredSet.begin() == v); + continue; + } + } + + // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) + intersection.clear(); + set_intersection(succPredSet.begin(), succPredSet.end(), + predSuccSet.begin(), predSuccSet.end(), + back_inserter(intersection)); + + /* Boring if it is just us in the intersection */ + if (intersection.size() < 2) { + continue; + } + + const CharReach &currReach = g[v].char_reach; + const auto &currReports = g[v].reports; + for (auto t : intersection) { + const VertexInfo &info2 = infoMap[t]; + + if (t == v || info2.isRemoved || is_special(t, g)) { + continue; + } + + /* note: links to accepts are also tracked in succs */ + if (info.isAccept && currReports != g[t].reports) { + continue; + } + + /* we do not need to check for out edge tops - as only specials + * (start) can have tops and they are already disqualified. */ + if (hasInEdgeTops(g, t)) { + continue; // Conservatively skip anything with nonzero tops. + } + + if (som) { + if (is_virtual_start(v, g) != is_virtual_start(t, g)) { + continue; // can only merge like with like. + } + } + + // If in-degree of v == in-degree of target + // and out-degree of v == out-degree of target + // (because pred and succ are supersets) + // then combine charreach of v into target and remove v + if (info.inDegree() == info2.inDegree() + && info.outDegree() == info2.outDegree()) { + // add character reachability of v into target + CharReach &otherReach = g[t].char_reach; + otherReach |= currReach; + // v can be removed DEBUG_PRINTF("removing redundant vertex %zu and merging " "reachability with vertex %zu\n", - g[v].index, g[t].index); - markForRemoval(v, infoMap, removable); - changed = true; - break; - } - } - } - - return changed; -} - -namespace { - -struct ReachMismatch {}; - -class ReachSubsetVisitor : public boost::default_dfs_visitor { -public: - explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {} - - template <class Graph, class Vertex> - void discover_vertex(const Vertex &v, const Graph &g) const { - if (is_any_start(v, g)) { - return; // start vertices are OK - } else if (is_special(v, g)) { - assert(0); - throw ReachMismatch(); // other special nodes?? - } - - const CharReach &vcr = g[v].char_reach; - DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(), - cr.count()); - if (vcr != (vcr & cr)) { - throw ReachMismatch(); - } - } - -private: - const CharReach &cr; -}; - -/** Terminator function for DFS used in pathReachSubset. */ -template <class Graph, class Vertex> class VertexIs { -public: - explicit VertexIs(const Vertex &v) : vertex(v) {} - bool operator()(const Vertex &v, const Graph &) const { - return v == vertex; - } - -private: - Vertex vertex; -}; - -} // namespace - -/** Returns true if every vertex on paths leading to edge \p e has reachability - * which is a subset of the reachability of \p dom */ -static -bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, - const NGHolder &g) { - const CharReach &domReach = g[dom].char_reach; - if (domReach.all()) { - return true; - } - - NFAVertex start = source(e, g); + g[v].index, g[t].index); + markForRemoval(v, infoMap, removable); + changed = true; + break; + } + } + } + + return changed; +} + +namespace { + +struct ReachMismatch {}; + +class ReachSubsetVisitor : public boost::default_dfs_visitor { +public: + explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {} + + template <class Graph, class Vertex> + void discover_vertex(const Vertex &v, const Graph &g) const { + if (is_any_start(v, g)) { + return; // start vertices are OK + } else if (is_special(v, g)) { + assert(0); + throw ReachMismatch(); // other special nodes?? + } + + const CharReach &vcr = g[v].char_reach; + DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(), + cr.count()); + if (vcr != (vcr & cr)) { + throw ReachMismatch(); + } + } + +private: + const CharReach &cr; +}; + +/** Terminator function for DFS used in pathReachSubset. */ +template <class Graph, class Vertex> class VertexIs { +public: + explicit VertexIs(const Vertex &v) : vertex(v) {} + bool operator()(const Vertex &v, const Graph &) const { + return v == vertex; + } + +private: + Vertex vertex; +}; + +} // namespace + +/** Returns true if every vertex on paths leading to edge \p e has reachability + * which is a subset of the reachability of \p dom */ +static +bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, + const NGHolder &g) { + const CharReach &domReach = g[dom].char_reach; + if (domReach.all()) { + return true; + } + + NFAVertex start = source(e, g); using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; - map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor; - - // Walk the graph backwards from v, examining each node. We fail (return - // false) if we encounter a node with reach NOT a subset of domReach, and - // we stop searching at dom. - try { + map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor; + + // Walk the graph backwards from v, examining each node. We fail (return + // false) if we encounter a node with reach NOT a subset of domReach, and + // we stop searching at dom. + try { depth_first_visit(RevGraph(g), start, - ReachSubsetVisitor(domReach), - make_assoc_property_map(vertexColor), - VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom)); - } catch(ReachMismatch&) { - return false; - } - - return true; -} - -/** Returns true if every vertex on paths leading from edge \p e has - * reachability which is a subset of the reachability of \p dom */ -static -bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, - const NGHolder &g) { - const CharReach &domReach = g[dom].char_reach; - if (domReach.all()) { - return true; - } - - NFAVertex start = target(e, g); + ReachSubsetVisitor(domReach), + make_assoc_property_map(vertexColor), + VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom)); + } catch(ReachMismatch&) { + return false; + } + + return true; +} + +/** Returns true if every vertex on paths leading from edge \p e has + * reachability which is a subset of the reachability of \p dom */ +static +bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, + const NGHolder &g) { + const CharReach &domReach = g[dom].char_reach; + if (domReach.all()) { + return true; + } + + NFAVertex start = target(e, g); map<NFAVertex, boost::default_color_type> vertexColor; - - // Walk the graph forward from v, examining each node. We fail (return - // false) if we encounter a node with reach NOT a subset of domReach, and - // we stop searching at dom. - try { + + // Walk the graph forward from v, examining each node. We fail (return + // false) if we encounter a node with reach NOT a subset of domReach, and + // we stop searching at dom. + try { depth_first_visit(g, start, ReachSubsetVisitor(domReach), - make_assoc_property_map(vertexColor), + make_assoc_property_map(vertexColor), VertexIs<NGHolder, NFAVertex>(dom)); - } catch(ReachMismatch&) { - return false; - } - - return true; -} - -static -bool allOutsSpecial(NFAVertex v, const NGHolder &g) { - for (auto w : adjacent_vertices_range(v, g)) { - if (!is_special(w, g)) { - return false; - } - } - return true; -} - -static -bool allInsSpecial(NFAVertex v, const NGHolder &g) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g)) { - return false; - } - } - return true; -} - -/** Cheaply check whether this graph can't be reduced at all, because it is - * just a chain of vertices with no other edges. */ -static -bool isIrreducible(const NGHolder &g) { - for (auto v : vertices_range(g)) { - // skip specials - if (is_special(v, g)) { - continue; - } - - if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) { - return false; - } - if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) { - return false; - } - } - - /* if calcComponents got sleepy and went home, the above checks don't hold - * as it assumes there is only one connected component. */ - if (isAlternationOfClasses(g)) { - return false; - } - - return true; -} - -static -u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { - u32 count = 0; - - cyclic.resize(num_vertices(g)); - - for (auto v : vertices_range(g)) { - assert(g[v].index < cyclic.size()); + } catch(ReachMismatch&) { + return false; + } + + return true; +} + +static +bool allOutsSpecial(NFAVertex v, const NGHolder &g) { + for (auto w : adjacent_vertices_range(v, g)) { + if (!is_special(w, g)) { + return false; + } + } + return true; +} + +static +bool allInsSpecial(NFAVertex v, const NGHolder &g) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_special(u, g)) { + return false; + } + } + return true; +} + +/** Cheaply check whether this graph can't be reduced at all, because it is + * just a chain of vertices with no other edges. */ +static +bool isIrreducible(const NGHolder &g) { + for (auto v : vertices_range(g)) { + // skip specials + if (is_special(v, g)) { + continue; + } + + if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) { + return false; + } + if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) { + return false; + } + } + + /* if calcComponents got sleepy and went home, the above checks don't hold + * as it assumes there is only one connected component. */ + if (isAlternationOfClasses(g)) { + return false; + } + + return true; +} + +static +u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { + u32 count = 0; + + cyclic.resize(num_vertices(g)); + + for (auto v : vertices_range(g)) { + assert(g[v].index < cyclic.size()); if (hasSelfLoop(v, g)) { - count++; + count++; cyclic[g[v].index] = true; - } - } - - return count; -} - -static -void findCyclicDom(NGHolder &g, vector<bool> &cyclic, - set<NFAEdge> &dead, som_type som) { + } + } + + return count; +} + +static +void findCyclicDom(NGHolder &g, vector<bool> &cyclic, + set<NFAEdge> &dead, som_type som) { auto dominators = findDominators(g); - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - // Path in through a dominator (e.g. '.+a?foobar') - NFAVertex dom = dominators[v]; - if (dom && cyclic[g[dom].index] - && edge(dom, v, g).second) { - - if (som && dom == g.startDs) { - continue; - } - + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + // Path in through a dominator (e.g. '.+a?foobar') + NFAVertex dom = dominators[v]; + if (dom && cyclic[g[dom].index] + && edge(dom, v, g).second) { + + if (som && dom == g.startDs) { + continue; + } + DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " "vertex %zu\n", g[v].index, g[dom].index); - - // iff all paths through in-edge e of v involve vertices whose - // reachability is a subset of reach(dom), we can delete edge e. - for (const auto &e : in_edges_range(v, g)) { - if (source(e, g) == dom) { - continue; - } - - if (reversePathReachSubset(e, dom, g)) { + + // iff all paths through in-edge e of v involve vertices whose + // reachability is a subset of reach(dom), we can delete edge e. + for (const auto &e : in_edges_range(v, g)) { + if (source(e, g) == dom) { + continue; + } + + if (reversePathReachSubset(e, dom, g)) { DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " "paths share dom reach\n", - g[source(e, g)].index, g[target(e, g)].index); - dead.insert(e); - if (source(e, g) == v) { - cyclic[g[v].index] = false; - } - continue; - } - } - } - } -} - -static -void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, - set<NFAEdge> &dead) { + g[source(e, g)].index, g[target(e, g)].index); + dead.insert(e); + if (source(e, g) == v) { + cyclic[g[v].index] = false; + } + continue; + } + } + } + } +} + +static +void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, + set<NFAEdge> &dead) { auto postdominators = findPostDominators(g); - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - // Path out through a post-dominator (e.g. a?.+foobar') - NFAVertex postdom = postdominators[v]; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + // Path out through a post-dominator (e.g. a?.+foobar') + NFAVertex postdom = postdominators[v]; if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " "cyclic vertex %zu\n", g[v].index, g[postdom].index); - - // iff all paths through in-edge e of v involve vertices whose - // reachability is a subset of reach(dom), we can delete edge e. - for (const auto &e : out_edges_range(v, g)) { - if (target(e, g) == postdom) { - continue; - } - - if (forwardPathReachSubset(e, postdom, g)) { + + // iff all paths through in-edge e of v involve vertices whose + // reachability is a subset of reach(dom), we can delete edge e. + for (const auto &e : out_edges_range(v, g)) { + if (target(e, g) == postdom) { + continue; + } + + if (forwardPathReachSubset(e, postdom, g)) { DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " "paths share postdom reach\n", - g[source(e, g)].index, g[target(e, g)].index); - if (target(e, g) == v) { - cyclic[g[v].index] = false; - } - dead.insert(e); - continue; - } - } - } - } -} - -bool removeRedundancy(NGHolder &g, som_type som) { - DEBUG_PRINTF("rr som = %d\n", (int)som); + g[source(e, g)].index, g[target(e, g)].index); + if (target(e, g) == v) { + cyclic[g[v].index] = false; + } + dead.insert(e); + continue; + } + } + } + } +} + +bool removeRedundancy(NGHolder &g, som_type som) { + DEBUG_PRINTF("rr som = %d\n", (int)som); renumber_vertices(g); - - // Cheap check: if all the non-special vertices have in-degree one and - // out-degree one, there's no redundancy in this here graph and we can - // vamoose. - if (isIrreducible(g)) { - return false; - } - - VertexInfoMap infoMap(g); - - // Populate maps of successors and predecessors, and accept status - populateContainers(g, infoMap); - - /* Run multiple passes: terminate when a full pass doesn't remove - * any vertices */ - bool doUseless = true; - bool doDiamond = true; - set<NFAVertex> removable; - while (doUseless || doDiamond) { - if (doUseless - && doUselessMergePass(g, som, infoMap, removable)) { - doDiamond = true; - } - doUseless = false; - - if (doDiamond - && doDiamondMergePass(g, som, infoMap, removable)) { - doUseless = true; - } - doDiamond = false; - } - DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size()); - remove_vertices(removable, g); - - return !removable.empty(); -} - -/** UE-524: remove edges into nodes that are dominated by cyclic nodes with - * reachability that is a superset of all paths feeding into that edge. */ -bool removeCyclicDominated(NGHolder &g, som_type som) { - set<NFAEdge> dead; - vector<bool> cyclic; - bool changed = false; - - findCyclic(g, cyclic); - - findCyclicDom(g, cyclic, dead, som); - if (!dead.empty()) { - remove_edges(dead, g); - pruneUseless(g); - dead.clear(); - cyclic.clear(); // need to recalculate cyclic as ids have changed - findCyclic(g, cyclic); - changed = true; - } - - findCyclicPostDom(g, cyclic, dead); - if (!dead.empty()) { - remove_edges(dead, g); - pruneUseless(g); - dead.clear(); - changed = true; - } - - return changed; -} - -} // namespace ue2 + + // Cheap check: if all the non-special vertices have in-degree one and + // out-degree one, there's no redundancy in this here graph and we can + // vamoose. + if (isIrreducible(g)) { + return false; + } + + VertexInfoMap infoMap(g); + + // Populate maps of successors and predecessors, and accept status + populateContainers(g, infoMap); + + /* Run multiple passes: terminate when a full pass doesn't remove + * any vertices */ + bool doUseless = true; + bool doDiamond = true; + set<NFAVertex> removable; + while (doUseless || doDiamond) { + if (doUseless + && doUselessMergePass(g, som, infoMap, removable)) { + doDiamond = true; + } + doUseless = false; + + if (doDiamond + && doDiamondMergePass(g, som, infoMap, removable)) { + doUseless = true; + } + doDiamond = false; + } + DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size()); + remove_vertices(removable, g); + + return !removable.empty(); +} + +/** UE-524: remove edges into nodes that are dominated by cyclic nodes with + * reachability that is a superset of all paths feeding into that edge. */ +bool removeCyclicDominated(NGHolder &g, som_type som) { + set<NFAEdge> dead; + vector<bool> cyclic; + bool changed = false; + + findCyclic(g, cyclic); + + findCyclicDom(g, cyclic, dead, som); + if (!dead.empty()) { + remove_edges(dead, g); + pruneUseless(g); + dead.clear(); + cyclic.clear(); // need to recalculate cyclic as ids have changed + findCyclic(g, cyclic); + changed = true; + } + + findCyclicPostDom(g, cyclic, dead); + if (!dead.empty()) { + remove_edges(dead, g); + pruneUseless(g); + dead.clear(); + changed = true; + } + + return changed; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h index 941844d061..617aed6b37 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h @@ -1,54 +1,54 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph reductions. - */ - -#ifndef NG_REDUNDANCY_H -#define NG_REDUNDANCY_H - -#include "som/som.h" - -namespace ue2 { - -class NGHolder; -struct CompileContext; - -/** Attempt to make the NFA graph \p g smaller by performing a number of local - * transformations. */ -bool removeRedundancy(NGHolder &g, som_type som); - -/** UE-524: remove edges into nodes that are dominated by cyclic nodes with - * reachability that is a superset of all paths feeding into that edge. Returns - * true if any edges/vertices were removed. */ -bool removeCyclicDominated(NGHolder &g, som_type som); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph reductions. + */ + +#ifndef NG_REDUNDANCY_H +#define NG_REDUNDANCY_H + +#include "som/som.h" + +namespace ue2 { + +class NGHolder; +struct CompileContext; + +/** Attempt to make the NFA graph \p g smaller by performing a number of local + * transformations. */ +bool removeRedundancy(NGHolder &g, som_type som); + +/** UE-524: remove edges into nodes that are dominated by cyclic nodes with + * reachability that is a superset of all paths feeding into that edge. Returns + * true if any edges/vertices were removed. */ +bool removeCyclicDominated(NGHolder &g, som_type som); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp index 2675be643f..a879e34695 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp @@ -1,476 +1,476 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Region analysis. - * - * Definition: a \a region is a subset of vertices in a graph such that: - * - the edges entering the region are a cutset of the graph - * - for every in-edge (u, v) to the region there exist edges (u, w) for all - * w in {w : w in region and w has an in-edge} - * - the regions in a graph partition the graph - * - * Note: - * - we partition a graph into the maximal number of regions - * - similar properties for exit edges should hold as a consequence - * - graph == sequence of regions - * - a region is considered to have an epsilon vertex to allow jumps - * - vertices which only lead to back edges need to be floated up in the topo - * order - * - * Algorithm overview: - * -# topo-order over the DAG skeleton; - * -# incrementally add vertices to the current region until the boundary edges - * form a valid cut-set; - * -# for each back-edge, if the source and target are in different regions, - * merge the regions (and all intervening regions) into a common region. - */ -#include "ng_region.h" - -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Region analysis. + * + * Definition: a \a region is a subset of vertices in a graph such that: + * - the edges entering the region are a cutset of the graph + * - for every in-edge (u, v) to the region there exist edges (u, w) for all + * w in {w : w in region and w has an in-edge} + * - the regions in a graph partition the graph + * + * Note: + * - we partition a graph into the maximal number of regions + * - similar properties for exit edges should hold as a consequence + * - graph == sequence of regions + * - a region is considered to have an epsilon vertex to allow jumps + * - vertices which only lead to back edges need to be floated up in the topo + * order + * + * Algorithm overview: + * -# topo-order over the DAG skeleton; + * -# incrementally add vertices to the current region until the boundary edges + * form a valid cut-set; + * -# for each back-edge, if the source and target are in different regions, + * merge the regions (and all intervening regions) into a common region. + */ +#include "ng_region.h" + +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" - -#include <set> -#include <utility> -#include <vector> - -#include <boost/graph/filtered_graph.hpp> -#include <boost/graph/topological_sort.hpp> - -using namespace std; - -namespace ue2 { - + +#include <set> +#include <utility> +#include <vector> + +#include <boost/graph/filtered_graph.hpp> +#include <boost/graph/topological_sort.hpp> + +using namespace std; + +namespace ue2 { + using BackEdgeSet = unordered_set<NFAEdge>; using AcyclicGraph = boost::filtered_graph<NGHolder, bad_edge_filter<BackEdgeSet>>; - -namespace { -struct exit_info { - explicit exit_info(NFAVertex v) : exit(v) {} - - NFAVertex exit; + +namespace { +struct exit_info { + explicit exit_info(NFAVertex v) : exit(v) {} + + NFAVertex exit; flat_set<NFAVertex> open; -}; -} - -static -void checkAndAddExitCandidate(const AcyclicGraph &g, +}; +} + +static +void checkAndAddExitCandidate(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, NFAVertex v, vector<exit_info> &exits) { exit_info v_exit(v); auto &open = v_exit.open; - - /* find the set of vertices reachable from v which are not in r */ - for (auto w : adjacent_vertices_range(v, g)) { - if (!contains(r, w)) { + + /* find the set of vertices reachable from v which are not in r */ + for (auto w : adjacent_vertices_range(v, g)) { + if (!contains(r, w)) { open.insert(w); - } - } - + } + } + if (!open.empty()) { DEBUG_PRINTF("exit %zu\n", g[v].index); exits.push_back(move(v_exit)); - } -} - -static + } +} + +static void findExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, vector<exit_info> &exits) { exits.clear(); - for (auto v : r) { - checkAndAddExitCandidate(g, r, v, exits); - } -} - -static + for (auto v : r) { + checkAndAddExitCandidate(g, r, v, exits); + } +} + +static void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, NFAVertex new_v, vector<exit_info> &exits) { /* new_v is no long an open edge */ for (auto &exit : exits) { exit.open.erase(new_v); - } - + } + /* no open edges: no longer an exit */ exits.erase(remove_if(exits.begin(), exits.end(), [&](const exit_info &exit) { return exit.open.empty(); }), exits.end()); - checkAndAddExitCandidate(g, r, new_v, exits); -} - -/** the set of exits from a candidate region are valid if: FIXME: document - */ -static -bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits, + checkAndAddExitCandidate(g, r, new_v, exits); +} + +/** the set of exits from a candidate region are valid if: FIXME: document + */ +static +bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits, const flat_set<NFAVertex> &open_jumps) { - if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { - return true; - } - if (exits.size() == 1 && open_jumps.size() == 1) { + if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { + return true; + } + if (exits.size() == 1 && open_jumps.size() == 1) { DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, - g[exits[0].exit].index); - if (*open_jumps.begin() == exits[0].exit) { - return true; - } - } - - assert(!exits.empty()); - const auto &enters = exits.front().open; - - if (!open_jumps.empty() && enters != open_jumps) { - return false; - } - - for (auto it = begin(exits) + 1; it != end(exits); ++it) { - if (it->open != enters) { - return false; - } - } - - return true; -} - -static + g[exits[0].exit].index); + if (*open_jumps.begin() == exits[0].exit) { + return true; + } + } + + assert(!exits.empty()); + const auto &enters = exits.front().open; + + if (!open_jumps.empty() && enters != open_jumps) { + return false; + } + + for (auto it = begin(exits) + 1; it != end(exits); ++it) { + if (it->open != enters) { + return false; + } + } + + return true; +} + +static void setRegion(const unordered_set<NFAVertex> &r, u32 rid, unordered_map<NFAVertex, u32> ®ions) { - for (auto v : r) { - regions[v] = rid; - } -} - -static -void buildInitialCandidate(const AcyclicGraph &g, - vector<NFAVertex>::const_reverse_iterator &it, - const vector<NFAVertex>::const_reverse_iterator &ite, + for (auto v : r) { + regions[v] = rid; + } +} + +static +void buildInitialCandidate(const AcyclicGraph &g, + vector<NFAVertex>::const_reverse_iterator &it, + const vector<NFAVertex>::const_reverse_iterator &ite, unordered_set<NFAVertex> &candidate, - /* in exits of prev region; - * out exits from candidate */ + /* in exits of prev region; + * out exits from candidate */ vector<exit_info> &exits, flat_set<NFAVertex> &open_jumps) { - if (it == ite) { + if (it == ite) { candidate.clear(); exits.clear(); - return; - } - + return; + } + if (exits.empty()) { - DEBUG_PRINTF("odd\n"); + DEBUG_PRINTF("odd\n"); candidate.clear(); DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate.insert(*it); open_jumps.erase(*it); checkAndAddExitCandidate(g, candidate, *it, exits); - ++it; - return; - } - + ++it; + return; + } + // Note: findExits() will clear exits, so it's safe to mutate/move its // elements here. auto &enters = exits.front().open; candidate.clear(); - - for (; it != ite; ++it) { + + for (; it != ite; ++it) { DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate.insert(*it); - if (contains(enters, *it)) { - break; - } - } - - if (it != ite) { - enters.erase(*it); + if (contains(enters, *it)) { + break; + } + } + + if (it != ite) { + enters.erase(*it); open_jumps = move(enters); DEBUG_PRINTF("oj size = %zu\n", open_jumps.size()); - ++it; - } else { + ++it; + } else { open_jumps.clear(); - } - + } + findExits(g, candidate, exits); -} - -static -void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, - const vector<NFAVertex> &topo, +} + +static +void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, + const vector<NFAVertex> &topo, unordered_map<NFAVertex, u32> ®ions) { - assert(!topo.empty()); - u32 curr_id = 0; + assert(!topo.empty()); + u32 curr_id = 0; auto t_it = topo.rbegin(); unordered_set<NFAVertex> candidate; flat_set<NFAVertex> open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); - assert(t_it != topo.rend()); - candidate.insert(*t_it++); + assert(t_it != topo.rend()); + candidate.insert(*t_it++); DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); - assert(t_it != topo.rend()); - candidate.insert(*t_it++); - + assert(t_it != topo.rend()); + candidate.insert(*t_it++); + vector<exit_info> exits; findExits(g, candidate, exits); - while (t_it != topo.rend()) { - assert(!candidate.empty()); - - if (exitValid(g, exits, open_jumps)) { - if (contains(candidate, h.accept) && !open_jumps.empty()) { - /* we have tried to make an optional region containing accept as - * we have an open jump to eod. This candidate region needs to - * be put in with the previous region. */ - curr_id--; - DEBUG_PRINTF("merging in with region %u\n", curr_id); - } else { - DEBUG_PRINTF("setting region %u\n", curr_id); - } - setRegion(candidate, curr_id++, regions); + while (t_it != topo.rend()) { + assert(!candidate.empty()); + + if (exitValid(g, exits, open_jumps)) { + if (contains(candidate, h.accept) && !open_jumps.empty()) { + /* we have tried to make an optional region containing accept as + * we have an open jump to eod. This candidate region needs to + * be put in with the previous region. */ + curr_id--; + DEBUG_PRINTF("merging in with region %u\n", curr_id); + } else { + DEBUG_PRINTF("setting region %u\n", curr_id); + } + setRegion(candidate, curr_id++, regions); buildInitialCandidate(g, t_it, topo.rend(), candidate, exits, open_jumps); - } else { - NFAVertex curr = *t_it; + } else { + NFAVertex curr = *t_it; DEBUG_PRINTF("adding %zu to current\n", g[curr].index); - candidate.insert(curr); - open_jumps.erase(curr); + candidate.insert(curr); + open_jumps.erase(curr); refineExits(g, candidate, *t_it, exits); - DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), - exits.size()); - ++t_it; - } - } - /* assert exits valid */ - setRegion(candidate, curr_id, regions); -} - -static -void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo, - const BackEdgeSet &backEdges, + DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), + exits.size()); + ++t_it; + } + } + /* assert exits valid */ + setRegion(candidate, curr_id, regions); +} + +static +void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo, + const BackEdgeSet &backEdges, unordered_map<NFAVertex, u32> ®ions) { - for (const auto &e : backEdges) { - NFAVertex u = source(e, g); - NFAVertex v = target(e, g); - - u32 ru = regions[u]; - u32 rv = regions[v]; - if (ru == rv) { - continue; - } - + for (const auto &e : backEdges) { + NFAVertex u = source(e, g); + NFAVertex v = target(e, g); + + u32 ru = regions[u]; + u32 rv = regions[v]; + if (ru == rv) { + continue; + } + DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, - g[u].index, ru); - assert(rv < ru); - - for (auto t : topo) { - u32 r = regions[t]; - if (r <= ru && r > rv) { - regions[t] = rv; - } else if (r > ru) { - regions[t] = rv + r - ru; - } - } - } -} - -static -void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, - vector<NFAVertex> &topoOrder) { - // Start is last element of reverse topo ordering. - auto it = find(topoOrder.begin(), topoOrder.end(), w.start); - if (it != topoOrder.end() - 1) { - DEBUG_PRINTF("repositioning start\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.end(), w.start); - } - - // StartDs is second-to-last element of reverse topo ordering. - it = find(topoOrder.begin(), topoOrder.end(), w.startDs); - if (it != topoOrder.end() - 2) { - DEBUG_PRINTF("repositioning start ds\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.end() - 1, w.startDs); - } - - // AcceptEOD is first element of reverse topo ordering. - it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod); - if (it != topoOrder.begin()) { - DEBUG_PRINTF("repositioning accept\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.begin(), w.acceptEod); - } - - // Accept is second element of reverse topo ordering, if it's connected. - it = find(topoOrder.begin(), topoOrder.end(), w.accept); - if (it != topoOrder.begin() + 1) { - DEBUG_PRINTF("repositioning accept\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - if (in_degree(w.accept, acyclic_g) != 0) { - topoOrder.insert(topoOrder.begin() + 1, w.accept); - } - } -} - -static -void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { + g[u].index, ru); + assert(rv < ru); + + for (auto t : topo) { + u32 r = regions[t]; + if (r <= ru && r > rv) { + regions[t] = rv; + } else if (r > ru) { + regions[t] = rv + r - ru; + } + } + } +} + +static +void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, + vector<NFAVertex> &topoOrder) { + // Start is last element of reverse topo ordering. + auto it = find(topoOrder.begin(), topoOrder.end(), w.start); + if (it != topoOrder.end() - 1) { + DEBUG_PRINTF("repositioning start\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end(), w.start); + } + + // StartDs is second-to-last element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), w.startDs); + if (it != topoOrder.end() - 2) { + DEBUG_PRINTF("repositioning start ds\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end() - 1, w.startDs); + } + + // AcceptEOD is first element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod); + if (it != topoOrder.begin()) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.begin(), w.acceptEod); + } + + // Accept is second element of reverse topo ordering, if it's connected. + it = find(topoOrder.begin(), topoOrder.end(), w.accept); + if (it != topoOrder.begin() + 1) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + if (in_degree(w.accept, acyclic_g) != 0) { + topoOrder.insert(topoOrder.begin() + 1, w.accept); + } + } +} + +static +void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { unordered_set<NFAVertex> sinks; - for (auto v : vertices_range(acyclic_g)) { - if (is_special(v, acyclic_g)) { - continue; - } - - if (isLeafNode(v, acyclic_g)) { + for (auto v : vertices_range(acyclic_g)) { + if (is_special(v, acyclic_g)) { + continue; + } + + if (isLeafNode(v, acyclic_g)) { DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); sinks.insert(NFAVertex(v)); - } - } - - if (sinks.empty()) { - DEBUG_PRINTF("no sinks found\n"); - return; - } - - bool changed; - do { - DEBUG_PRINTF("look\n"); - changed = false; - for (auto v : vertices_range(acyclic_g)) { + } + } + + if (sinks.empty()) { + DEBUG_PRINTF("no sinks found\n"); + return; + } + + bool changed; + do { + DEBUG_PRINTF("look\n"); + changed = false; + for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { - continue; - } - - for (auto w : adjacent_vertices_range(v, acyclic_g)) { + continue; + } + + for (auto w : adjacent_vertices_range(v, acyclic_g)) { if (!contains(sinks, NFAVertex(w))) { - goto next; - } - } - + goto next; + } + } + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); sinks.insert(NFAVertex(v)); - changed = true; - next:; - } - } while (changed); - - for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) { - if (!contains(sinks, *ri)) { - continue; - } - NFAVertex s = *ri; + changed = true; + next:; + } + } while (changed); + + for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) { + if (!contains(sinks, *ri)) { + continue; + } + NFAVertex s = *ri; DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); unordered_set<NFAVertex> parents; - for (const auto &e : in_edges_range(s, acyclic_g)) { + for (const auto &e : in_edges_range(s, acyclic_g)) { parents.insert(NFAVertex(source(e, acyclic_g))); - } - - /* vertex has no children not reachable on a back edge, bubble the - * vertex up the topo order to be near its parents */ - vector<NFAVertex>::reverse_iterator rj = ri; - --rj; - while (rj != topoOrder.rbegin() && !contains(parents, *rj)) { - /* sink is in rj + 1 */ - assert(*(rj + 1) == s); - DEBUG_PRINTF("lifting\n"); - using std::swap; - swap(*rj, *(rj + 1)); - --rj; - } - } -} - + } + + /* vertex has no children not reachable on a back edge, bubble the + * vertex up the topo order to be near its parents */ + vector<NFAVertex>::reverse_iterator rj = ri; + --rj; + while (rj != topoOrder.rbegin() && !contains(parents, *rj)) { + /* sink is in rj + 1 */ + assert(*(rj + 1) == s); + DEBUG_PRINTF("lifting\n"); + using std::swap; + swap(*rj, *(rj + 1)); + --rj; + } + } +} + using ColorMap = decltype(make_small_color_map(NGHolder())); -/** Build a reverse topo ordering (with only the specials that are in use). We - * also want to ensure vertices which only lead to back edges are placed near - * their parents. */ -static -vector<NFAVertex> buildTopoOrder(const NGHolder &w, - const AcyclicGraph &acyclic_g, +/** Build a reverse topo ordering (with only the specials that are in use). We + * also want to ensure vertices which only lead to back edges are placed near + * their parents. */ +static +vector<NFAVertex> buildTopoOrder(const NGHolder &w, + const AcyclicGraph &acyclic_g, ColorMap &colours) { - vector<NFAVertex> topoOrder; + vector<NFAVertex> topoOrder; topoOrder.reserve(num_vertices(w)); - + topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colours)); - - reorderSpecials(w, acyclic_g, topoOrder); - - if (topoOrder.empty()) { - return topoOrder; - } - - liftSinks(acyclic_g, topoOrder); - - DEBUG_PRINTF("TOPO ORDER\n"); - for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { + + reorderSpecials(w, acyclic_g, topoOrder); + + if (topoOrder.empty()) { + return topoOrder; + } + + liftSinks(acyclic_g, topoOrder); + + DEBUG_PRINTF("TOPO ORDER\n"); + for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); - } - DEBUG_PRINTF("----------\n"); - - return topoOrder; -} - + } + DEBUG_PRINTF("----------\n"); + + return topoOrder; +} + unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - const u32 numVertices = num_vertices(g); - DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); - + assert(hasCorrectlyNumberedVertices(g)); + const u32 numVertices = num_vertices(g); + DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); + auto colours = make_small_color_map(g); - - // Build an acyclic graph for this NGHolder. - BackEdgeSet deadEdges; + + // Build an acyclic graph for this NGHolder. + BackEdgeSet deadEdges; depth_first_search(g, visitor(BackEdges<BackEdgeSet>(deadEdges)) .root_vertex(g.start) .color_map(colours)); - + auto af = make_bad_edge_filter(&deadEdges); AcyclicGraph acyclic_g(g, af); - - // Build a (reverse) topological ordering. - vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours); - - // Everybody starts in region 0. + + // Build a (reverse) topological ordering. + vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours); + + // Everybody starts in region 0. unordered_map<NFAVertex, u32> regions; - regions.reserve(numVertices); - for (auto v : vertices_range(g)) { - regions.emplace(v, 0); - } - - findDagLeaders(g, acyclic_g, topoOrder, regions); - mergeUnderBackEdges(g, topoOrder, deadEdges, regions); - - return regions; -} - -} // namespace ue2 + regions.reserve(numVertices); + for (auto v : vertices_range(g)) { + regions.emplace(v, 0); + } + + findDagLeaders(g, acyclic_g, topoOrder, regions); + mergeUnderBackEdges(g, topoOrder, deadEdges, regions); + + return regions; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.h b/contrib/libs/hyperscan/src/nfagraph/ng_region.h index a4708a582e..dec8ea7a04 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.h @@ -1,219 +1,219 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Region analysis and utility functions. - */ - -#ifndef NG_REGION_H -#define NG_REGION_H - -#include "ng_holder.h" -#include "util/container.h" -#include "util/graph_range.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Region analysis and utility functions. + */ + +#ifndef NG_REGION_H +#define NG_REGION_H + +#include "ng_holder.h" +#include "util/container.h" +#include "util/graph_range.h" + #include <unordered_map> -#include <vector> - -namespace ue2 { - -/** \brief Assign a region ID to every vertex in the graph. */ +#include <vector> + +namespace ue2 { + +/** \brief Assign a region ID to every vertex in the graph. */ std::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g); - -/** \brief True if vertices \p a and \p b are in the same region. */ -template <class Graph> -bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, + +/** \brief True if vertices \p a and \p b are in the same region. */ +template <class Graph> +bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, const std::unordered_map<NFAVertex, u32> ®ion_map) { - assert(contains(region_map, a) && contains(region_map, b)); - - return region_map.at(a) == region_map.at(b) && - is_special(a, g) == is_special(b, g); -} - -/** \brief True if vertex \p b is in a later region than vertex \p a. */ -template <class Graph> -bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, + assert(contains(region_map, a) && contains(region_map, b)); + + return region_map.at(a) == region_map.at(b) && + is_special(a, g) == is_special(b, g); +} + +/** \brief True if vertex \p b is in a later region than vertex \p a. */ +template <class Graph> +bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, const std::unordered_map<NFAVertex, u32> ®ion_map) { - assert(contains(region_map, a) && contains(region_map, b)); - - u32 aa = g[a].index; - u32 bb = g[b].index; - - if (bb == NODE_START || bb == NODE_START_DOTSTAR) { - return false; - } - - if (aa == NODE_START || aa == NODE_START_DOTSTAR) { - return true; - } - - if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) { - return true; - } - if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) { - return false; - } - - return region_map.at(a) < region_map.at(b); -} - -/** \brief True if vertex \p b is in an earlier region than vertex \p a. */ -template <class Graph> -bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, + assert(contains(region_map, a) && contains(region_map, b)); + + u32 aa = g[a].index; + u32 bb = g[b].index; + + if (bb == NODE_START || bb == NODE_START_DOTSTAR) { + return false; + } + + if (aa == NODE_START || aa == NODE_START_DOTSTAR) { + return true; + } + + if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) { + return true; + } + if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) { + return false; + } + + return region_map.at(a) < region_map.at(b); +} + +/** \brief True if vertex \p b is in an earlier region than vertex \p a. */ +template <class Graph> +bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, const std::unordered_map<NFAVertex, u32> ®ion_map) { - assert(contains(region_map, a) && contains(region_map, b)); - - u32 aa = g[a].index; - u32 bb = g[b].index; - - if (bb == NODE_START || bb == NODE_START_DOTSTAR) { - return true; - } - - if (aa == NODE_START || aa == NODE_START_DOTSTAR) { - return false; - } - - if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) { - return false; - } - if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) { - return true; - } - - return region_map.at(b) < region_map.at(a); -} - -/** \brief True if vertex \p v is an entry vertex for its region. */ -template <class Graph> -bool isRegionEntry(const Graph &g, NFAVertex v, + assert(contains(region_map, a) && contains(region_map, b)); + + u32 aa = g[a].index; + u32 bb = g[b].index; + + if (bb == NODE_START || bb == NODE_START_DOTSTAR) { + return true; + } + + if (aa == NODE_START || aa == NODE_START_DOTSTAR) { + return false; + } + + if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) { + return false; + } + if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) { + return true; + } + + return region_map.at(b) < region_map.at(a); +} + +/** \brief True if vertex \p v is an entry vertex for its region. */ +template <class Graph> +bool isRegionEntry(const Graph &g, NFAVertex v, const std::unordered_map<NFAVertex, u32> ®ion_map) { - // Note that some graph types do not have inv_adjacent_vertices, so we must - // use in_edges here. - for (const auto &e : in_edges_range(v, g)) { - if (!inSameRegion(g, v, source(e, g), region_map)) { - return true; - } - } - - return false; -} - -/** \brief True if vertex \p v is an exit vertex for its region. */ -template <class Graph> -bool isRegionExit(const Graph &g, NFAVertex v, + // Note that some graph types do not have inv_adjacent_vertices, so we must + // use in_edges here. + for (const auto &e : in_edges_range(v, g)) { + if (!inSameRegion(g, v, source(e, g), region_map)) { + return true; + } + } + + return false; +} + +/** \brief True if vertex \p v is an exit vertex for its region. */ +template <class Graph> +bool isRegionExit(const Graph &g, NFAVertex v, const std::unordered_map<NFAVertex, u32> ®ion_map) { - for (auto w : adjacent_vertices_range(v, g)) { - if (!inSameRegion(g, v, w, region_map)) { - return true; - } - } - - return false; -} - -/** \brief True if vertex \p v is in a region all on its own. */ -template <class Graph> -bool isSingletonRegion(const Graph &g, NFAVertex v, + for (auto w : adjacent_vertices_range(v, g)) { + if (!inSameRegion(g, v, w, region_map)) { + return true; + } + } + + return false; +} + +/** \brief True if vertex \p v is in a region all on its own. */ +template <class Graph> +bool isSingletonRegion(const Graph &g, NFAVertex v, const std::unordered_map<NFAVertex, u32> ®ion_map) { - for (const auto &e : in_edges_range(v, g)) { - auto u = source(e, g); - if (u != v && inSameRegion(g, v, u, region_map)) { - return false; - } - - for (auto w : ue2::adjacent_vertices_range(u, g)) { - if (w != v && inSameRegion(g, v, w, region_map)) { - return false; - } - } - } - - for (auto w : adjacent_vertices_range(v, g)) { - if (w != v && inSameRegion(g, v, w, region_map)) { - return false; - } - - for (const auto &e : in_edges_range(w, g)) { - auto u = source(e, g); - if (u != v && inSameRegion(g, v, u, region_map)) { - return false; - } - } - - return true; - } - - return true; -} - -/** - * \brief True if the region containing vertex \p v is optional. The vertex \p v - * should be a region leader. - */ -template <class Graph> -bool isOptionalRegion(const Graph &g, NFAVertex v, + for (const auto &e : in_edges_range(v, g)) { + auto u = source(e, g); + if (u != v && inSameRegion(g, v, u, region_map)) { + return false; + } + + for (auto w : ue2::adjacent_vertices_range(u, g)) { + if (w != v && inSameRegion(g, v, w, region_map)) { + return false; + } + } + } + + for (auto w : adjacent_vertices_range(v, g)) { + if (w != v && inSameRegion(g, v, w, region_map)) { + return false; + } + + for (const auto &e : in_edges_range(w, g)) { + auto u = source(e, g); + if (u != v && inSameRegion(g, v, u, region_map)) { + return false; + } + } + + return true; + } + + return true; +} + +/** + * \brief True if the region containing vertex \p v is optional. The vertex \p v + * should be a region leader. + */ +template <class Graph> +bool isOptionalRegion(const Graph &g, NFAVertex v, const std::unordered_map<NFAVertex, u32> ®ion_map) { - assert(isRegionEntry(g, v, region_map)); - + assert(isRegionEntry(g, v, region_map)); + DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", - region_map.at(v), g[v].index); - - // Region zero is never optional. - assert(contains(region_map, v)); - if (region_map.at(v) == 0) { - return false; - } - - // Optional if v has a predecessor in an earlier region that has a - // successor in a later one. - - for (const auto &e : in_edges_range(v, g)) { - auto u = source(e, g); - if (inSameRegion(g, v, u, region_map)) { - continue; - } + region_map.at(v), g[v].index); + + // Region zero is never optional. + assert(contains(region_map, v)); + if (region_map.at(v) == 0) { + return false; + } + + // Optional if v has a predecessor in an earlier region that has a + // successor in a later one. + + for (const auto &e : in_edges_range(v, g)) { + auto u = source(e, g); + if (inSameRegion(g, v, u, region_map)) { + continue; + } DEBUG_PRINTF(" searching from u=%zu\n", g[u].index); - - assert(inEarlierRegion(g, v, u, region_map)); - - for (auto w : adjacent_vertices_range(u, g)) { + + assert(inEarlierRegion(g, v, u, region_map)); + + for (auto w : adjacent_vertices_range(u, g)) { DEBUG_PRINTF(" searching to w=%zu\n", g[w].index); - if (inLaterRegion(g, v, w, region_map)) { - return true; - } - } - return false; - } - - return false; -} - -} // namespace ue2 - -#endif + if (inLaterRegion(g, v, w, region_map)) { + return true; + } + } + return false; + } + + return false; +} + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp index 1126d4d6c9..4eecb1f917 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp @@ -1,270 +1,270 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Region Redundancy optimisation pass. - * - * Identifies and removes entire regions that are adjacent to a cyclic state - * with a superset of their character reachability. - */ -#include "ng_region_redundancy.h" - -#include "ng_holder.h" -#include "ng_region.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" -#include "util/graph_range.h" - -#include <set> - -using namespace std; - -namespace ue2 { - -namespace { - -/** Precalculated information about a region. */ -struct RegionInfo { - NFAVertex entry; //!< arbitrary entry vertex - CharReach cr; //!< union of the reach of all vertices in region -}; - -} // namespace - -static -bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, - const flat_set<ReportID> &expected_reports, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Region Redundancy optimisation pass. + * + * Identifies and removes entire regions that are adjacent to a cyclic state + * with a superset of their character reachability. + */ +#include "ng_region_redundancy.h" + +#include "ng_holder.h" +#include "ng_region.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" +#include "util/graph_range.h" + +#include <set> + +using namespace std; + +namespace ue2 { + +namespace { + +/** Precalculated information about a region. */ +struct RegionInfo { + NFAVertex entry; //!< arbitrary entry vertex + CharReach cr; //!< union of the reach of all vertices in region +}; + +} // namespace + +static +bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, + const flat_set<ReportID> &expected_reports, const unordered_map<NFAVertex, u32> ®ion_map) { - /* TODO: only check vertices connected to accept/acceptEOD */ - for (auto v : vertices_range(g)) { - if (region != region_map.at(v)) { - continue; - } - - if (is_any_accept(v, g)) { - return true; /* encountering an actual special in the region is - * possible but definitely unexpected */ - } - - for (auto w : adjacent_vertices_range(v, g)) { - if (is_any_accept(w, g) && g[v].reports != expected_reports) { - return true; - } - } - } - return false; -} - -static -void processCyclicStateForward(NGHolder &h, NFAVertex cyc, - const map<u32, RegionInfo> &info, + /* TODO: only check vertices connected to accept/acceptEOD */ + for (auto v : vertices_range(g)) { + if (region != region_map.at(v)) { + continue; + } + + if (is_any_accept(v, g)) { + return true; /* encountering an actual special in the region is + * possible but definitely unexpected */ + } + + for (auto w : adjacent_vertices_range(v, g)) { + if (is_any_accept(w, g) && g[v].reports != expected_reports) { + return true; + } + } + } + return false; +} + +static +void processCyclicStateForward(NGHolder &h, NFAVertex cyc, + const map<u32, RegionInfo> &info, const unordered_map<NFAVertex, u32> ®ion_map, - set<u32> &deadRegions) { - u32 region = region_map.at(cyc); - CharReach cr = h[cyc].char_reach; - auto reports = h[cyc].reports; - + set<u32> &deadRegions) { + u32 region = region_map.at(cyc); + CharReach cr = h[cyc].char_reach; + auto reports = h[cyc].reports; + DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index, - region); - - map<u32, RegionInfo>::const_iterator it; - while ((it = info.find(++region)) != info.end()) { - NFAVertex v = it->second.entry; - const CharReach ®ion_cr = it->second.cr; - assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); + region); + + map<u32, RegionInfo>::const_iterator it; + while ((it = info.find(++region)) != info.end()) { + NFAVertex v = it->second.entry; + const CharReach ®ion_cr = it->second.cr; + assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); DEBUG_PRINTF("checking %zu\n", h[v].index); - - if (!region_cr.isSubsetOf(cr)) { - DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); - break; - } - - if (isOptionalRegion(h, v, region_map) - && !regionHasUnexpectedAccept(h, region, reports, region_map)) { + + if (!region_cr.isSubsetOf(cr)) { + DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); + break; + } + + if (isOptionalRegion(h, v, region_map) + && !regionHasUnexpectedAccept(h, region, reports, region_map)) { DEBUG_PRINTF("cyclic state %zu leads to optional region leader" " %zu\n", h[cyc].index, h[v].index); - deadRegions.insert(region); - } else if (isSingletonRegion(h, v, region_map)) { - /* we can use this region as straw and suck in optional regions on - * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */ - cr = h[v].char_reach; - reports = h[v].reports; - DEBUG_PRINTF("%u is straw\n", region); - assert(cr.isSubsetOf(h[cyc].char_reach)); - if (hasSelfLoop(v, h)) { - DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); - remove_edge(v, v, h); - } - } else { - break; - } - } -} - -static -void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, - const map<u32, RegionInfo> &info, + deadRegions.insert(region); + } else if (isSingletonRegion(h, v, region_map)) { + /* we can use this region as straw and suck in optional regions on + * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */ + cr = h[v].char_reach; + reports = h[v].reports; + DEBUG_PRINTF("%u is straw\n", region); + assert(cr.isSubsetOf(h[cyc].char_reach)); + if (hasSelfLoop(v, h)) { + DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); + remove_edge(v, v, h); + } + } else { + break; + } + } +} + +static +void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, + const map<u32, RegionInfo> &info, const unordered_map<NFAVertex, u32> ®ion_map, - set<u32> &deadRegions) { - u32 region = region_map.at(cyc); - CharReach cr = h[cyc].char_reach; - auto reports = h[cyc].reports; - + set<u32> &deadRegions) { + u32 region = region_map.at(cyc); + CharReach cr = h[cyc].char_reach; + auto reports = h[cyc].reports; + DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region); - - map<u32, RegionInfo>::const_iterator it; - while ((it = info.find(--region)) != info.end()) { - NFAVertex v = it->second.entry; - const CharReach ®ion_cr = it->second.cr; - assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); + + map<u32, RegionInfo>::const_iterator it; + while ((it = info.find(--region)) != info.end()) { + NFAVertex v = it->second.entry; + const CharReach ®ion_cr = it->second.cr; + assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); DEBUG_PRINTF("checking %zu\n", h[v].index); - - if (!region_cr.isSubsetOf(cr)) { - DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); - break; - } - - if (isOptionalRegion(h, v, region_map) - && !regionHasUnexpectedAccept(h, region, reports, region_map)) { + + if (!region_cr.isSubsetOf(cr)) { + DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); + break; + } + + if (isOptionalRegion(h, v, region_map) + && !regionHasUnexpectedAccept(h, region, reports, region_map)) { DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n", - h[cyc].index, h[v].index); - deadRegions.insert(region); - } else if (isSingletonRegion(h, v, region_map)) { - /* we can use this region as a reverse straw and suck in optional - * regions on the other side. This allows us to transform - * /^a?a{n}.*b/ to /^a{n}.*b/ */ - cr = h[v].char_reach; - reports = h[v].reports; - DEBUG_PRINTF("%u is straw\n", region); - assert(cr.isSubsetOf(h[cyc].char_reach)); - if (hasSelfLoop(v, h)) { - DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); - remove_edge(v, v, h); - } - } else { - break; - } - - if (!region) { // No wrapping - break; - } - } -} - -static -map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g, + h[cyc].index, h[v].index); + deadRegions.insert(region); + } else if (isSingletonRegion(h, v, region_map)) { + /* we can use this region as a reverse straw and suck in optional + * regions on the other side. This allows us to transform + * /^a?a{n}.*b/ to /^a{n}.*b/ */ + cr = h[v].char_reach; + reports = h[v].reports; + DEBUG_PRINTF("%u is straw\n", region); + assert(cr.isSubsetOf(h[cyc].char_reach)); + if (hasSelfLoop(v, h)) { + DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); + remove_edge(v, v, h); + } + } else { + break; + } + + if (!region) { // No wrapping + break; + } + } +} + +static +map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ion_map) { - map<u32, RegionInfo> info; - - for (auto v : vertices_range(g)) { - u32 region = region_map.at(v); - if (is_special(v, g) || region == 0) { - continue; - } - - RegionInfo &ri = info[region]; - ri.cr |= g[v].char_reach; - if (isRegionEntry(g, v, region_map)) { - ri.entry = v; - } - } - - return info; -} - -static -bool hasNoStartAnchoring(const NGHolder &h) { - for (auto v : adjacent_vertices_range(h.start, h)) { - if (!edge(h.startDs, v, h).second) { - return false; - } - } - return true; -} - -void removeRegionRedundancy(NGHolder &g, som_type som) { - auto region_map = assignRegions(g); - - map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map); - - set<u32> deadRegions; - - /* if we are not tracking som, we can treat sds as a cyclic region if there - * is no anchoring */ - if (!som && hasNoStartAnchoring(g)) { - processCyclicStateForward(g, g.startDs, info, region_map, deadRegions); - } - - // Walk the region mapping, looking for regions that consist of a single - // cyclic node. - - for (const auto &m : info) { - // Must not have already been removed - if (contains(deadRegions, m.first)) { - continue; - } - - NFAVertex v = m.second.entry; - /* require a singleton cyclic region */ - if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) { - continue; - } - - if (som && is_virtual_start(v, g)) { - continue; - } - - processCyclicStateForward(g, v, info, region_map, deadRegions); - processCyclicStateReverse(g, v, info, region_map, deadRegions); - } - - if (deadRegions.empty()) { - return; - } - - vector<NFAVertex> dead; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - u32 region = region_map.at(v); - if (contains(deadRegions, region)) { - dead.push_back(v); - } - } - - if (!dead.empty()) { - DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n", - dead.size(), deadRegions.size()); - remove_vertices(dead, g); - } -} - -} // namespace ue2 + map<u32, RegionInfo> info; + + for (auto v : vertices_range(g)) { + u32 region = region_map.at(v); + if (is_special(v, g) || region == 0) { + continue; + } + + RegionInfo &ri = info[region]; + ri.cr |= g[v].char_reach; + if (isRegionEntry(g, v, region_map)) { + ri.entry = v; + } + } + + return info; +} + +static +bool hasNoStartAnchoring(const NGHolder &h) { + for (auto v : adjacent_vertices_range(h.start, h)) { + if (!edge(h.startDs, v, h).second) { + return false; + } + } + return true; +} + +void removeRegionRedundancy(NGHolder &g, som_type som) { + auto region_map = assignRegions(g); + + map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map); + + set<u32> deadRegions; + + /* if we are not tracking som, we can treat sds as a cyclic region if there + * is no anchoring */ + if (!som && hasNoStartAnchoring(g)) { + processCyclicStateForward(g, g.startDs, info, region_map, deadRegions); + } + + // Walk the region mapping, looking for regions that consist of a single + // cyclic node. + + for (const auto &m : info) { + // Must not have already been removed + if (contains(deadRegions, m.first)) { + continue; + } + + NFAVertex v = m.second.entry; + /* require a singleton cyclic region */ + if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) { + continue; + } + + if (som && is_virtual_start(v, g)) { + continue; + } + + processCyclicStateForward(g, v, info, region_map, deadRegions); + processCyclicStateReverse(g, v, info, region_map, deadRegions); + } + + if (deadRegions.empty()) { + return; + } + + vector<NFAVertex> dead; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + u32 region = region_map.at(v); + if (contains(deadRegions, region)) { + dead.push_back(v); + } + } + + if (!dead.empty()) { + DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n", + dead.size(), deadRegions.size()); + remove_vertices(dead, g); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h index f0b396ca43..c4b4fc958c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h @@ -1,49 +1,49 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Region Redundancy optimisation pass. - * - * Identifies and removes entire regions that are adjacent to a cyclic state - * with a superset of their character reachability. - */ - -#ifndef NG_REGION_REDUNDANCY_H -#define NG_REGION_REDUNDANCY_H - -#include "som/som.h" - -namespace ue2 { - -class NGHolder; - -void removeRegionRedundancy(NGHolder &g, som_type som); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Region Redundancy optimisation pass. + * + * Identifies and removes entire regions that are adjacent to a cyclic state + * with a superset of their character reachability. + */ + +#ifndef NG_REGION_REDUNDANCY_H +#define NG_REGION_REDUNDANCY_H + +#include "som/som.h" + +namespace ue2 { + +class NGHolder; + +void removeRegionRedundancy(NGHolder &g, som_type som); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp index 1f63ad3c6f..72c7eee3f3 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp @@ -1,329 +1,329 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded repeat analysis. - */ -#include "ng_repeat.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_holder.h" -#include "ng_limex_accel.h" -#include "ng_prune.h" -#include "ng_reports.h" -#include "ng_som_util.h" -#include "ng_util.h" -#include "nfa/accel.h" -#include "nfa/limex_limits.h" -#include "nfa/repeat_internal.h" -#include "nfa/repeatcompile.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded repeat analysis. + */ +#include "ng_repeat.h" + +#include "grey.h" +#include "ng_depth.h" +#include "ng_holder.h" +#include "ng_limex_accel.h" +#include "ng_prune.h" +#include "ng_reports.h" +#include "ng_som_util.h" +#include "ng_util.h" +#include "nfa/accel.h" +#include "nfa/limex_limits.h" +#include "nfa/repeat_internal.h" +#include "nfa/repeatcompile.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" #include "util/graph_undirected.h" -#include "util/report_manager.h" +#include "util/report_manager.h" #include "util/unordered.h" - -#include <algorithm> -#include <map> -#include <queue> + +#include <algorithm> +#include <map> +#include <queue> #include <unordered_map> #include <unordered_set> - -#include <boost/graph/connected_components.hpp> -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/filtered_graph.hpp> -#include <boost/graph/reverse_graph.hpp> -#include <boost/graph/topological_sort.hpp> -#include <boost/icl/interval_set.hpp> - -using namespace std; + +#include <boost/graph/connected_components.hpp> +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/filtered_graph.hpp> +#include <boost/graph/reverse_graph.hpp> +#include <boost/graph/topological_sort.hpp> +#include <boost/icl/interval_set.hpp> + +using namespace std; using boost::depth_first_search; using boost::depth_first_visit; using boost::make_assoc_property_map; - -namespace ue2 { - -namespace { - + +namespace ue2 { + +namespace { + /** * \brief Filter that retains only edges between vertices with the same * reachability. Special vertices are dropped. */ -template<class Graph> -struct ReachFilter { +template<class Graph> +struct ReachFilter { ReachFilter() = default; - explicit ReachFilter(const Graph *g_in) : g(g_in) {} - - // Convenience typedefs. + explicit ReachFilter(const Graph *g_in) : g(g_in) {} + + // Convenience typedefs. using Traits = typename boost::graph_traits<Graph>; using VertexDescriptor = typename Traits::vertex_descriptor; using EdgeDescriptor = typename Traits::edge_descriptor; - + bool operator()(const VertexDescriptor &v) const { - assert(g); - // Disallow special vertices, as otherwise we will try to remove them - // later. + assert(g); + // Disallow special vertices, as otherwise we will try to remove them + // later. return !is_special(v, *g); } - + bool operator()(const EdgeDescriptor &e) const { assert(g); - // Vertices must have the same reach. + // Vertices must have the same reach. auto u = source(e, *g), v = target(e, *g); - const CharReach &cr_u = (*g)[u].char_reach; - const CharReach &cr_v = (*g)[v].char_reach; - return cr_u == cr_v; - } - - const Graph *g = nullptr; -}; - + const CharReach &cr_u = (*g)[u].char_reach; + const CharReach &cr_v = (*g)[v].char_reach; + return cr_u == cr_v; + } + + const Graph *g = nullptr; +}; + using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>, ReachFilter<NGHolder>>; - -struct ReachSubgraph { - vector<NFAVertex> vertices; + +struct ReachSubgraph { + vector<NFAVertex> vertices; depth repeatMin{0}; depth repeatMax{0}; - u32 minPeriod = 1; - bool is_reset = false; - enum RepeatType historyType = REPEAT_RING; - bool bad = false; // if true, ignore this case -}; - -} // namespace - -static -void findInitDepths(const NGHolder &g, + u32 minPeriod = 1; + bool is_reset = false; + enum RepeatType historyType = REPEAT_RING; + bool bad = false; // if true, ignore this case +}; + +} // namespace + +static +void findInitDepths(const NGHolder &g, unordered_map<NFAVertex, NFAVertexDepth> &depths) { auto d = calcDepths(g); - - for (auto v : vertices_range(g)) { + + for (auto v : vertices_range(g)) { size_t idx = g[v].index; - assert(idx < d.size()); + assert(idx < d.size()); depths.emplace(v, d[idx]); - } -} - -static + } +} + +static vector<NFAVertex> buildTopoOrder(const RepeatGraph &g) { /* Note: RepeatGraph is a filtered version of NGHolder and still has * NFAVertex as its vertex descriptor */ typedef unordered_set<NFAEdge> EdgeSet; - EdgeSet deadEdges; - - // We don't have indices spanning [0,N] on our filtered graph, so we - // provide a colour map. + EdgeSet deadEdges; + + // We don't have indices spanning [0,N] on our filtered graph, so we + // provide a colour map. unordered_map<NFAVertex, boost::default_color_type> colours; - - depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)). - color_map(make_assoc_property_map(colours))); + + depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)). + color_map(make_assoc_property_map(colours))); auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); - + vector<NFAVertex> topoOrder; - topological_sort(acyclic_g, back_inserter(topoOrder), - color_map(make_assoc_property_map(colours))); - - reverse(topoOrder.begin(), topoOrder.end()); + topological_sort(acyclic_g, back_inserter(topoOrder), + color_map(make_assoc_property_map(colours))); + + reverse(topoOrder.begin(), topoOrder.end()); return topoOrder; -} - -static -void proper_pred(const NGHolder &g, NFAVertex v, +} + +static +void proper_pred(const NGHolder &g, NFAVertex v, unordered_set<NFAVertex> &p) { - pred(g, v, &p); - p.erase(v); // self-loops -} - -static -void proper_succ(const NGHolder &g, NFAVertex v, + pred(g, v, &p); + p.erase(v); // self-loops +} + +static +void proper_succ(const NGHolder &g, NFAVertex v, unordered_set<NFAVertex> &s) { - succ(g, v, &s); - s.erase(v); // self-loops -} - -static -bool roguePredecessor(const NGHolder &g, NFAVertex v, + succ(g, v, &s); + s.erase(v); // self-loops +} + +static +bool roguePredecessor(const NGHolder &g, NFAVertex v, const unordered_set<NFAVertex> &involved, const unordered_set<NFAVertex> &pred) { - u32 seen = 0; - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (contains(involved, u)) { - continue; - } - if (!contains(pred, u)) { + u32 seen = 0; + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (contains(involved, u)) { + continue; + } + if (!contains(pred, u)) { DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index); - return true; - } - - seen++; - } - - // We must have edges from either (a) none of our external predecessors, or - // (b) all of our external predecessors. - if (!seen) { - return false; - } - return pred.size() != seen; -} - -static -bool rogueSuccessor(const NGHolder &g, NFAVertex v, + return true; + } + + seen++; + } + + // We must have edges from either (a) none of our external predecessors, or + // (b) all of our external predecessors. + if (!seen) { + return false; + } + return pred.size() != seen; +} + +static +bool rogueSuccessor(const NGHolder &g, NFAVertex v, const unordered_set<NFAVertex> &involved, const unordered_set<NFAVertex> &succ) { - u32 seen = 0; - for (auto w : adjacent_vertices_range(v, g)) { - if (contains(involved, w)) { - continue; - } - - if (!contains(succ, w)) { + u32 seen = 0; + for (auto w : adjacent_vertices_range(v, g)) { + if (contains(involved, w)) { + continue; + } + + if (!contains(succ, w)) { DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index); - return true; - } - - seen++; - } - - // We must have edges to either (a) none of our external successors, or - // (b) all of our external successors. - if (!seen) { - return false; - } - return succ.size() != seen; -} - -static -bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) { + return true; + } + + seen++; + } + + // We must have edges to either (a) none of our external successors, or + // (b) all of our external successors. + if (!seen) { + return false; + } + return succ.size() != seen; +} + +static +bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) { /* TODO: check that we need this now that we allow multiple tops */ const flat_set<u32> *tops = nullptr; - - for (auto v : verts) { - for (const auto &e : in_edges_range(v, g)) { - NFAVertex u = source(e, g); - if (u != g.start && u != g.startDs) { - continue; // Only edges from starts have valid top properties. - } + + for (auto v : verts) { + for (const auto &e : in_edges_range(v, g)) { + NFAVertex u = source(e, g); + if (u != g.start && u != g.startDs) { + continue; // Only edges from starts have valid top properties. + } DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index, g[v].index, g[e].tops.size()); if (!tops) { tops = &g[e].tops; } else if (g[e].tops != *tops) { return true; // More than one set of tops. - } - } - } - - return false; -} - -static -bool vertexIsBad(const NGHolder &g, NFAVertex v, + } + } + } + + return false; +} + +static +bool vertexIsBad(const NGHolder &g, NFAVertex v, const unordered_set<NFAVertex> &involved, const unordered_set<NFAVertex> &tail, const unordered_set<NFAVertex> &pred, const unordered_set<NFAVertex> &succ, - const flat_set<ReportID> &reports) { + const flat_set<ReportID> &reports) { DEBUG_PRINTF("check vertex %zu\n", g[v].index); - - // We must drop any vertex that is the target of a back-edge within - // our subgraph. The tail set contains all vertices that are after v in a - // topo ordering. - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (contains(tail, u)) { + + // We must drop any vertex that is the target of a back-edge within + // our subgraph. The tail set contains all vertices that are after v in a + // topo ordering. + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (contains(tail, u)) { DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n", - g[u].index, g[v].index); - return true; - } - } - - // If this vertex has an entry from outside our subgraph, it must have - // edges from *all* the vertices in pred and no other external entries. - // Similarly for exits. - if (roguePredecessor(g, v, involved, pred)) { + g[u].index, g[v].index); + return true; + } + } + + // If this vertex has an entry from outside our subgraph, it must have + // edges from *all* the vertices in pred and no other external entries. + // Similarly for exits. + if (roguePredecessor(g, v, involved, pred)) { DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index); - return true; - } - - if (rogueSuccessor(g, v, involved, succ)) { + return true; + } + + if (rogueSuccessor(g, v, involved, succ)) { DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index); - return true; - } - - // All reporting vertices should have the same reports. - if (is_match_vertex(v, g) && reports != g[v].reports) { + return true; + } + + // All reporting vertices should have the same reports. + if (is_match_vertex(v, g) && reports != g[v].reports) { DEBUG_PRINTF("report mismatch to %zu\n", g[v].index); - return true; - } - - return false; -} - -static -void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts, - const u32 minNumVertices, queue<ReachSubgraph> &q) { - DEBUG_PRINTF("entry\n"); - - // We construct a copy of the graph using just the vertices we want, rather - // than using a filtered_graph -- this way is faster. - NGHolder verts_g; + return true; + } + + return false; +} + +static +void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts, + const u32 minNumVertices, queue<ReachSubgraph> &q) { + DEBUG_PRINTF("entry\n"); + + // We construct a copy of the graph using just the vertices we want, rather + // than using a filtered_graph -- this way is faster. + NGHolder verts_g; unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g - fillHolder(&verts_g, g, verts, &verts_map); - + fillHolder(&verts_g, g, verts, &verts_map); + const auto ug = make_undirected_graph(verts_g); - + unordered_map<NFAVertex, u32> repeatMap; - - size_t num = connected_components(ug, make_assoc_property_map(repeatMap)); - DEBUG_PRINTF("found %zu connected repeat components\n", num); - assert(num > 0); - - vector<ReachSubgraph> rs(num); - - for (auto v : verts) { + + size_t num = connected_components(ug, make_assoc_property_map(repeatMap)); + DEBUG_PRINTF("found %zu connected repeat components\n", num); + assert(num > 0); + + vector<ReachSubgraph> rs(num); + + for (auto v : verts) { assert(!is_special(v, g)); auto vu = verts_map.at(v); - auto rit = repeatMap.find(vu); - if (rit == repeatMap.end()) { - continue; /* not part of a repeat */ - } - u32 comp_id = rit->second; - assert(comp_id < num); - rs[comp_id].vertices.push_back(v); - } - - for (const auto &rsi : rs) { + auto rit = repeatMap.find(vu); + if (rit == repeatMap.end()) { + continue; /* not part of a repeat */ + } + u32 comp_id = rit->second; + assert(comp_id < num); + rs[comp_id].vertices.push_back(v); + } + + for (const auto &rsi : rs) { if (rsi.vertices.empty()) { // Empty elements can happen when connected_components finds a // subgraph consisting entirely of specials (which aren't added to @@ -331,448 +331,448 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts, // these, so we skip them. continue; } - DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size()); - if (rsi.vertices.size() >= minNumVertices) { - DEBUG_PRINTF("enqueuing\n"); - q.push(rsi); - } - } -} - -static -void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi, - flat_set<ReportID> &reports) { - for (auto v : rsi.vertices) { - if (is_match_vertex(v, g)) { - reports = g[v].reports; - return; - } - } -} - -static -void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, - const u32 minNumVertices) { - if (rs.empty()) { - return; - } - - DEBUG_PRINTF("%zu subgraphs\n", rs.size()); - - vector<ReachSubgraph> rs_out; - - queue<ReachSubgraph> q; - for (const auto &rsi : rs) { - if (rsi.vertices.size() < minNumVertices) { - continue; - } - q.push(rsi); - } - - while (!q.empty()) { - const ReachSubgraph &rsi = q.front(); - - if (rsi.vertices.size() < minNumVertices) { - q.pop(); // Too small for consideration as a repeat. - continue; - } - - DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size()); - - // Check that all the edges from outside have the same tops. TODO: we - // don't have to throw the whole subgraph out, we could do this check - // on a per vertex basis. - if (hasDifferentTops(g, rsi.vertices)) { - DEBUG_PRINTF("different tops!\n"); - q.pop(); - continue; - } - + DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size()); + if (rsi.vertices.size() >= minNumVertices) { + DEBUG_PRINTF("enqueuing\n"); + q.push(rsi); + } + } +} + +static +void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi, + flat_set<ReportID> &reports) { + for (auto v : rsi.vertices) { + if (is_match_vertex(v, g)) { + reports = g[v].reports; + return; + } + } +} + +static +void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, + const u32 minNumVertices) { + if (rs.empty()) { + return; + } + + DEBUG_PRINTF("%zu subgraphs\n", rs.size()); + + vector<ReachSubgraph> rs_out; + + queue<ReachSubgraph> q; + for (const auto &rsi : rs) { + if (rsi.vertices.size() < minNumVertices) { + continue; + } + q.push(rsi); + } + + while (!q.empty()) { + const ReachSubgraph &rsi = q.front(); + + if (rsi.vertices.size() < minNumVertices) { + q.pop(); // Too small for consideration as a repeat. + continue; + } + + DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size()); + + // Check that all the edges from outside have the same tops. TODO: we + // don't have to throw the whole subgraph out, we could do this check + // on a per vertex basis. + if (hasDifferentTops(g, rsi.vertices)) { + DEBUG_PRINTF("different tops!\n"); + q.pop(); + continue; + } + unordered_set<NFAVertex> involved(rsi.vertices.begin(), rsi.vertices.end()); unordered_set<NFAVertex> tail(involved); // to look for back-edges. unordered_set<NFAVertex> pred, succ; - proper_pred(g, rsi.vertices.front(), pred); - proper_succ(g, rsi.vertices.back(), succ); - - flat_set<ReportID> reports; - findFirstReports(g, rsi, reports); - - bool recalc = false; - deque<NFAVertex> verts; - - for (auto v : rsi.vertices) { - tail.erase(v); // now contains all vertices _after_ this one. - - if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) { - recalc = true; - continue; - } - - verts.push_back(v); - } - - if (recalc) { - if (verts.size() < minNumVertices) { - DEBUG_PRINTF("subgraph got too small\n"); - q.pop(); - continue; - } - splitSubgraph(g, verts, minNumVertices, q); - } else { - DEBUG_PRINTF("subgraph is ok\n"); - rs_out.push_back(rsi); - } - q.pop(); - } - - rs.swap(rs_out); -} - -namespace { -class DistanceSet { -private: - // We use boost::icl to do the heavy lifting. - typedef boost::icl::closed_interval<u32> ClosedInterval; - typedef boost::icl::interval_set<u32, std::less, ClosedInterval> - IntervalSet; - IntervalSet distances; -public: - // Add a distance. - void insert(u32 d) { - distances.insert(d); - } - - void add(const DistanceSet &a) { - distances += a.distances; // union operation - } - - // Increment all the distances by one and add. - void add_incremented(const DistanceSet &a) { - for (const auto &d : a.distances) { - u32 lo = lower(d) + 1; - u32 hi = upper(d) + 1; - distances.insert(boost::icl::construct<ClosedInterval>(lo, hi)); - } - } - -#ifdef DEBUG - void dump() const { - if (distances.empty()) { - printf("<empty>"); - return; - } - - for (const auto &d : distances) { - printf("[%u,%u] ", lower(d), upper(d)); - } - } -#endif - - // True if this distance set is a single contiguous interval. - bool is_contiguous() const { - IntervalSet::const_iterator it = distances.begin(); - if (it == distances.end()) { - return false; - } - ++it; - return (it == distances.end()); - } - - pair<u32, u32> get_range() const { - assert(is_contiguous()); - return make_pair(lower(distances), upper(distances)); - } -}; -} - -/** - * Returns false if the given bounds are too large to be implemented with our - * runtime engines that handle bounded repeats. - */ -static -bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) { - if (!repeatMin.is_finite()) { - DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str()); - assert(0); // this is a surprise! - return true; - } - - if ((u32)repeatMin >= REPEAT_INF) { - DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str()); - return true; - } - - if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) { - DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str()); - return true; - } - - return false; -} - -/** Returns false if the graph is not a supported bounded repeat. */ -static -bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, - u32 minNumVertices) { - DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size()); - - if (rsi.vertices.size() < minNumVertices) { - DEBUG_PRINTF("too small, min is %u\n", minNumVertices); - return false; - } - - NFAVertex first = rsi.vertices.front(); - NFAVertex last = rsi.vertices.back(); - + proper_pred(g, rsi.vertices.front(), pred); + proper_succ(g, rsi.vertices.back(), succ); + + flat_set<ReportID> reports; + findFirstReports(g, rsi, reports); + + bool recalc = false; + deque<NFAVertex> verts; + + for (auto v : rsi.vertices) { + tail.erase(v); // now contains all vertices _after_ this one. + + if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) { + recalc = true; + continue; + } + + verts.push_back(v); + } + + if (recalc) { + if (verts.size() < minNumVertices) { + DEBUG_PRINTF("subgraph got too small\n"); + q.pop(); + continue; + } + splitSubgraph(g, verts, minNumVertices, q); + } else { + DEBUG_PRINTF("subgraph is ok\n"); + rs_out.push_back(rsi); + } + q.pop(); + } + + rs.swap(rs_out); +} + +namespace { +class DistanceSet { +private: + // We use boost::icl to do the heavy lifting. + typedef boost::icl::closed_interval<u32> ClosedInterval; + typedef boost::icl::interval_set<u32, std::less, ClosedInterval> + IntervalSet; + IntervalSet distances; +public: + // Add a distance. + void insert(u32 d) { + distances.insert(d); + } + + void add(const DistanceSet &a) { + distances += a.distances; // union operation + } + + // Increment all the distances by one and add. + void add_incremented(const DistanceSet &a) { + for (const auto &d : a.distances) { + u32 lo = lower(d) + 1; + u32 hi = upper(d) + 1; + distances.insert(boost::icl::construct<ClosedInterval>(lo, hi)); + } + } + +#ifdef DEBUG + void dump() const { + if (distances.empty()) { + printf("<empty>"); + return; + } + + for (const auto &d : distances) { + printf("[%u,%u] ", lower(d), upper(d)); + } + } +#endif + + // True if this distance set is a single contiguous interval. + bool is_contiguous() const { + IntervalSet::const_iterator it = distances.begin(); + if (it == distances.end()) { + return false; + } + ++it; + return (it == distances.end()); + } + + pair<u32, u32> get_range() const { + assert(is_contiguous()); + return make_pair(lower(distances), upper(distances)); + } +}; +} + +/** + * Returns false if the given bounds are too large to be implemented with our + * runtime engines that handle bounded repeats. + */ +static +bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) { + if (!repeatMin.is_finite()) { + DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str()); + assert(0); // this is a surprise! + return true; + } + + if ((u32)repeatMin >= REPEAT_INF) { + DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str()); + return true; + } + + if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) { + DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str()); + return true; + } + + return false; +} + +/** Returns false if the graph is not a supported bounded repeat. */ +static +bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, + u32 minNumVertices) { + DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size()); + + if (rsi.vertices.size() < minNumVertices) { + DEBUG_PRINTF("too small, min is %u\n", minNumVertices); + return false; + } + + NFAVertex first = rsi.vertices.front(); + NFAVertex last = rsi.vertices.back(); + typedef unordered_map<NFAVertex, DistanceSet> DistanceMap; - DistanceMap dist; - - // Initial distance sets. - for (auto u : inv_adjacent_vertices_range(first, g)) { - if (u == first) { - continue; // no self-loops - } + DistanceMap dist; + + // Initial distance sets. + for (auto u : inv_adjacent_vertices_range(first, g)) { + if (u == first) { + continue; // no self-loops + } DEBUG_PRINTF("pred vertex %zu\n", g[u].index); - dist[u].insert(0); - } - - for (auto v : rsi.vertices) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; // no self-loops - } - - auto di = dist.find(u); - if (di == dist.end()) { - assert(0); - return false; - } - - dist[v].add_incremented(di->second); - } - } - - // Remove pred distances from our map. - for (auto u : inv_adjacent_vertices_range(first, g)) { - if (u == first) { - continue; // no self-loops - } - dist.erase(u); - } - - // Calculate final union of distances. - DistanceSet final_d; - for (auto v : adjacent_vertices_range(last, g)) { - if (v == last) { - continue; // no self-loops - } - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; // no self-loops - } - auto di = dist.find(u); - if (di == dist.end()) { - continue; - } - final_d.add(di->second); - } - } - -#ifdef DEBUG - DEBUG_PRINTF("final_d dists: "); - final_d.dump(); - printf("\n"); -#endif - - if (!final_d.is_contiguous()) { - // not handled right now - DEBUG_PRINTF("not contiguous!\n"); - return false; - } - - pair<u32, u32> range = final_d.get_range(); - if (range.first > depth::max_value() || range.second > depth::max_value()) { - DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n", - range.first, range.second); - return false; - } + dist[u].insert(0); + } + + for (auto v : rsi.vertices) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; // no self-loops + } + + auto di = dist.find(u); + if (di == dist.end()) { + assert(0); + return false; + } + + dist[v].add_incremented(di->second); + } + } + + // Remove pred distances from our map. + for (auto u : inv_adjacent_vertices_range(first, g)) { + if (u == first) { + continue; // no self-loops + } + dist.erase(u); + } + + // Calculate final union of distances. + DistanceSet final_d; + for (auto v : adjacent_vertices_range(last, g)) { + if (v == last) { + continue; // no self-loops + } + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; // no self-loops + } + auto di = dist.find(u); + if (di == dist.end()) { + continue; + } + final_d.add(di->second); + } + } + +#ifdef DEBUG + DEBUG_PRINTF("final_d dists: "); + final_d.dump(); + printf("\n"); +#endif + + if (!final_d.is_contiguous()) { + // not handled right now + DEBUG_PRINTF("not contiguous!\n"); + return false; + } + + pair<u32, u32> range = final_d.get_range(); + if (range.first > depth::max_value() || range.second > depth::max_value()) { + DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n", + range.first, range.second); + return false; + } rsi.repeatMin = depth(range.first); rsi.repeatMax = depth(range.second); - - // If we've got a self-loop anywhere, we've got inf max. - if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) { - DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n"); - rsi.repeatMax = depth::infinity(); - } - - // If our pattern contains a bounded repeat that we wouldn't be able to - // implement as runtime, then we have no strategy that leads to - // implementation -- it's not like falling back to a DFA or other - // non-repeat engine is going to succeed. - if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) { - throw CompileError("Pattern too large."); - } - - return true; -} - -static -bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, + + // If we've got a self-loop anywhere, we've got inf max. + if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) { + DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n"); + rsi.repeatMax = depth::infinity(); + } + + // If our pattern contains a bounded repeat that we wouldn't be able to + // implement as runtime, then we have no strategy that leads to + // implementation -- it's not like falling back to a DFA or other + // non-repeat engine is going to succeed. + if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) { + throw CompileError("Pattern too large."); + } + + return true; +} + +static +bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, const unordered_set<NFAVertex> &involved) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(involved, u)) { - return false; - } - } - return true; -} - -static -void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!contains(involved, u)) { + return false; + } + } + return true; +} + +static +void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, const unordered_set<NFAVertex> &involved, unordered_map<NFAVertex, NFAVertexDepth> &depths, - vector<NFAVertex> &tugs) { - if (allPredsInSubgraph(v, g, involved)) { - // We can transform this vertex into a tug trigger in-place. + vector<NFAVertex> &tugs) { + if (allPredsInSubgraph(v, g, involved)) { + // We can transform this vertex into a tug trigger in-place. DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n", - g[v].index); - add_edge(cyclic, v, g); - tugs.push_back(v); - return; - } - - // Some predecessors of v are not in the subgraph, so we need to clone v - // and split up its in-edges. - NFAVertex t = clone_vertex(g, v); - depths[t] = depths[v]; - + g[v].index); + add_edge(cyclic, v, g); + tugs.push_back(v); + return; + } + + // Some predecessors of v are not in the subgraph, so we need to clone v + // and split up its in-edges. + NFAVertex t = clone_vertex(g, v); + depths[t] = depths[v]; + DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n", - g[t].index, g[v].index); - - tugs.push_back(t); - add_edge(cyclic, t, g); - - // New vertex gets all of v's successors, including v itself if it's - // cyclic. - clone_out_edges(g, v, t); -} - -static -NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) { - NFAVertex last = rsi.vertices.back(); - NFAVertex cyclic = clone_vertex(g, last); - add_edge(cyclic, cyclic, g); - + g[t].index, g[v].index); + + tugs.push_back(t); + add_edge(cyclic, t, g); + + // New vertex gets all of v's successors, including v itself if it's + // cyclic. + clone_out_edges(g, v, t); +} + +static +NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) { + NFAVertex last = rsi.vertices.back(); + NFAVertex cyclic = clone_vertex(g, last); + add_edge(cyclic, cyclic, g); + DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index); - return cyclic; -} - -static -NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) { - NFAVertex pos = add_vertex(g); - NFAVertex first = rsi.vertices.front(); - - g[pos].char_reach = g[first].char_reach; - + return cyclic; +} + +static +NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) { + NFAVertex pos = add_vertex(g); + NFAVertex first = rsi.vertices.front(); + + g[pos].char_reach = g[first].char_reach; + DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index); - return pos; -} - -// 2 if v is directly connected to an accept, or 1 if one hop away, -// or 0 otherwise. -static -u32 isCloseToAccept(const NGHolder &g, NFAVertex v) { - if (is_any_accept(v, g)) { - return 2; - } - - for (auto w : adjacent_vertices_range(v, g)) { - if (is_any_accept(w, g)) { - return 1; - } - } - - return 0; -} - -static -u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) { - const NFAVertex last = rsi.vertices.back(); - u32 rv = 0; - - for (auto v : adjacent_vertices_range(last, g)) { - rv = max(rv, isCloseToAccept(g, v)); - } - - return rv; -} - -static -void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, + return pos; +} + +// 2 if v is directly connected to an accept, or 1 if one hop away, +// or 0 otherwise. +static +u32 isCloseToAccept(const NGHolder &g, NFAVertex v) { + if (is_any_accept(v, g)) { + return 2; + } + + for (auto w : adjacent_vertices_range(v, g)) { + if (is_any_accept(w, g)) { + return 1; + } + } + + return 0; +} + +static +u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) { + const NFAVertex last = rsi.vertices.back(); + u32 rv = 0; + + for (auto v : adjacent_vertices_range(last, g)) { + rv = max(rv, isCloseToAccept(g, v)); + } + + return rv; +} + +static +void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, unordered_map<NFAVertex, NFAVertexDepth> &depths, - vector<NFAVertex> *succs) { - u32 unpeel = unpeelAmount(g, rsi); - DEBUG_PRINTF("unpeeling %u vertices\n", unpeel); - - while (unpeel) { - NFAVertex last = rsi.vertices.back(); - NFAVertex first = rsi.vertices.front(); - - NFAVertex d = clone_vertex(g, last); - depths[d] = depths[last]; + vector<NFAVertex> *succs) { + u32 unpeel = unpeelAmount(g, rsi); + DEBUG_PRINTF("unpeeling %u vertices\n", unpeel); + + while (unpeel) { + NFAVertex last = rsi.vertices.back(); + NFAVertex first = rsi.vertices.front(); + + NFAVertex d = clone_vertex(g, last); + depths[d] = depths[last]; DEBUG_PRINTF("created vertex %zu\n", g[d].index); - - for (auto v : *succs) { - add_edge(d, v, g); - } - - if (rsi.repeatMin > depth(1)) { - rsi.repeatMin -= 1; - } else { - /* Skip edge for the cyclic state; note that we must clone their - * edge properties as they may include tops. */ - for (const auto &e : in_edges_range(first, g)) { - add_edge(source(e, g), d, g[e], g); - } - } - - succs->clear(); - succs->push_back(d); - - rsi.repeatMax -= 1; - - assert(rsi.repeatMin > depth(0)); - assert(rsi.repeatMax > depth(0)); - - unpeel--; - } -} - -/** Fetch the set of successor vertices of this subgraph. */ -static -void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi, - vector<NFAVertex> *succs) { - assert(!rsi.vertices.empty()); - // Successors come from successors of last vertex. - NFAVertex last = rsi.vertices.back(); - - for (auto v : adjacent_vertices_range(last, g)) { - if (v == last) { /* ignore self loop */ - continue; - } - succs->push_back(v); - } -} - -/** Disconnect the given subgraph from its predecessors and successors in the - * NFA graph and replace it with a cyclic state. */ -static -void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, + + for (auto v : *succs) { + add_edge(d, v, g); + } + + if (rsi.repeatMin > depth(1)) { + rsi.repeatMin -= 1; + } else { + /* Skip edge for the cyclic state; note that we must clone their + * edge properties as they may include tops. */ + for (const auto &e : in_edges_range(first, g)) { + add_edge(source(e, g), d, g[e], g); + } + } + + succs->clear(); + succs->push_back(d); + + rsi.repeatMax -= 1; + + assert(rsi.repeatMin > depth(0)); + assert(rsi.repeatMax > depth(0)); + + unpeel--; + } +} + +/** Fetch the set of successor vertices of this subgraph. */ +static +void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi, + vector<NFAVertex> *succs) { + assert(!rsi.vertices.empty()); + // Successors come from successors of last vertex. + NFAVertex last = rsi.vertices.back(); + + for (auto v : adjacent_vertices_range(last, g)) { + if (v == last) { /* ignore self loop */ + continue; + } + succs->push_back(v); + } +} + +/** Disconnect the given subgraph from its predecessors and successors in the + * NFA graph and replace it with a cyclic state. */ +static +void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, vector<BoundedRepeatData> *repeats, unordered_map<NFAVertex, NFAVertexDepth> &depths, unordered_set<NFAVertex> &created) { - assert(!rsi.bad); + assert(!rsi.bad); /* As we may need to unpeel 2 vertices, we need the width to be more than 2. * This should only happen if the graph did not have redundancy pass * performed on as vertex count checks would be prevent us reaching here. @@ -780,396 +780,396 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, if (rsi.repeatMax <= depth(2)) { return; } - assert(rsi.repeatMin > depth(0)); - assert(rsi.repeatMax >= rsi.repeatMin); + assert(rsi.repeatMin > depth(0)); + assert(rsi.repeatMax >= rsi.repeatMin); assert(rsi.repeatMax > depth(2)); - - DEBUG_PRINTF("entry\n"); - + + DEBUG_PRINTF("entry\n"); + const unordered_set<NFAVertex> involved(rsi.vertices.begin(), - rsi.vertices.end()); - vector<NFAVertex> succs; - getSuccessors(g, rsi, &succs); - - unpeelNearEnd(g, rsi, depths, &succs); - - // Create our replacement cyclic state with the same reachability and - // report info as the last vertex in our topo-ordered list. - NFAVertex cyclic = createCyclic(g, rsi); - created.insert(cyclic); - - // One more special vertex is necessary: the positive trigger (same - // reach as cyclic). - NFAVertex pos_trigger = createPos(g, rsi); - created.insert(pos_trigger); - add_edge(pos_trigger, cyclic, g); - - // Update depths for our new vertices. - NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back(); - depths[pos_trigger] = depths[first]; - depths[cyclic].fromStart = - unionDepthMinMax(depths[first].fromStart, depths[last].fromStart); - depths[cyclic].fromStartDotStar = unionDepthMinMax( - depths[first].fromStartDotStar, depths[last].fromStartDotStar); - - // Wire predecessors to positive trigger. - for (const auto &e : in_edges_range(first, g)) { - add_edge(source(e, g), pos_trigger, g[e], g); - } - - // Wire cyclic state to tug trigger states built from successors. - vector<NFAVertex> tugs; - for (auto v : succs) { - buildTugTrigger(g, cyclic, v, involved, depths, tugs); - } - created.insert(tugs.begin(), tugs.end()); - assert(!tugs.empty()); - - // Wire pos trigger to tugs if min repeat is one -- this deals with cases - // where we can get a pos and tug trigger on the same byte. - if (rsi.repeatMin == depth(1)) { - for (auto v : tugs) { - add_edge(pos_trigger, v, g); - } - } - - // Remove the vertices/edges in the subgraph. - remove_vertices(rsi.vertices, g, false); - erase_all(&depths, rsi.vertices); - - repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin, - rsi.repeatMax, rsi.minPeriod, cyclic, - pos_trigger, tugs)); -} - -/** Variant for Rose-specific graphs that terminate in a sole accept, so we can - * use a "lazy tug". See UE-1636. */ -static -void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, - vector<BoundedRepeatData> *repeats, + rsi.vertices.end()); + vector<NFAVertex> succs; + getSuccessors(g, rsi, &succs); + + unpeelNearEnd(g, rsi, depths, &succs); + + // Create our replacement cyclic state with the same reachability and + // report info as the last vertex in our topo-ordered list. + NFAVertex cyclic = createCyclic(g, rsi); + created.insert(cyclic); + + // One more special vertex is necessary: the positive trigger (same + // reach as cyclic). + NFAVertex pos_trigger = createPos(g, rsi); + created.insert(pos_trigger); + add_edge(pos_trigger, cyclic, g); + + // Update depths for our new vertices. + NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back(); + depths[pos_trigger] = depths[first]; + depths[cyclic].fromStart = + unionDepthMinMax(depths[first].fromStart, depths[last].fromStart); + depths[cyclic].fromStartDotStar = unionDepthMinMax( + depths[first].fromStartDotStar, depths[last].fromStartDotStar); + + // Wire predecessors to positive trigger. + for (const auto &e : in_edges_range(first, g)) { + add_edge(source(e, g), pos_trigger, g[e], g); + } + + // Wire cyclic state to tug trigger states built from successors. + vector<NFAVertex> tugs; + for (auto v : succs) { + buildTugTrigger(g, cyclic, v, involved, depths, tugs); + } + created.insert(tugs.begin(), tugs.end()); + assert(!tugs.empty()); + + // Wire pos trigger to tugs if min repeat is one -- this deals with cases + // where we can get a pos and tug trigger on the same byte. + if (rsi.repeatMin == depth(1)) { + for (auto v : tugs) { + add_edge(pos_trigger, v, g); + } + } + + // Remove the vertices/edges in the subgraph. + remove_vertices(rsi.vertices, g, false); + erase_all(&depths, rsi.vertices); + + repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin, + rsi.repeatMax, rsi.minPeriod, cyclic, + pos_trigger, tugs)); +} + +/** Variant for Rose-specific graphs that terminate in a sole accept, so we can + * use a "lazy tug". See UE-1636. */ +static +void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, + vector<BoundedRepeatData> *repeats, unordered_map<NFAVertex, NFAVertexDepth> &depths, unordered_set<NFAVertex> &created) { - assert(!rsi.bad); - assert(rsi.repeatMin); - assert(rsi.repeatMax >= rsi.repeatMin); - - DEBUG_PRINTF("entry\n"); - + assert(!rsi.bad); + assert(rsi.repeatMin); + assert(rsi.repeatMax >= rsi.repeatMin); + + DEBUG_PRINTF("entry\n"); + const unordered_set<NFAVertex> involved(rsi.vertices.begin(), rsi.vertices.end()); - vector<NFAVertex> succs; - getSuccessors(g, rsi, &succs); - - // Create our replacement cyclic state with the same reachability and - // report info as the last vertex in our topo-ordered list. - NFAVertex cyclic = createCyclic(g, rsi); - created.insert(cyclic); - - // One more special vertex is necessary: the positive trigger (same - // reach as cyclic). - NFAVertex pos_trigger = createPos(g, rsi); - created.insert(pos_trigger); - add_edge(pos_trigger, cyclic, g); - - // Update depths for our new vertices. - NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back(); - depths[pos_trigger] = depths[first]; - depths[cyclic].fromStart = - unionDepthMinMax(depths[first].fromStart, depths[last].fromStart); - depths[cyclic].fromStartDotStar = unionDepthMinMax( - depths[first].fromStartDotStar, depths[last].fromStartDotStar); - - // Wire predecessors to positive trigger. - for (const auto &e : in_edges_range(first, g)) { - add_edge(source(e, g), pos_trigger, g[e], g); - } - - // In the rose case, our tug is our cyclic, and it's wired to our - // successors (which should be just the accept). - vector<NFAVertex> tugs; - assert(succs.size() == 1); - for (auto v : succs) { - add_edge(cyclic, v, g); - } - - // Wire pos trigger to accept if min repeat is one -- this deals with cases - // where we can get a pos and tug trigger on the same byte. - if (rsi.repeatMin == depth(1)) { - for (auto v : succs) { - add_edge(pos_trigger, v, g); - g[pos_trigger].reports = g[cyclic].reports; - } - } - - // Remove the vertices/edges in the subgraph. - remove_vertices(rsi.vertices, g, false); - erase_all(&depths, rsi.vertices); - - repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin, - rsi.repeatMax, rsi.minPeriod, cyclic, - pos_trigger, tugs)); -} - -static -bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) { - // filtered_graph doesn't filter the num_vertices call. - size_t n = 0; - RepeatGraph::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) { - if (++n >= minRepeat) { - return true; - } - } - return false; -} - -// Marks the subgraph as bad if it can't be handled. -static -void reprocessSubgraph(const NGHolder &h, const Grey &grey, - ReachSubgraph &rsi) { - vector<ReachSubgraph> rs(1, rsi); - checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize); - if (rs.size() != 1) { - DEBUG_PRINTF("subgraph split into %zu\n", rs.size()); - rsi.bad = true; - return; - } - - rsi = rs.back(); // Potentially modified. - - if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) { - DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n", - rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); - } else { - DEBUG_PRINTF("reprocessed subgraph is bad\n"); - rsi.bad = true; - } -} - -/** Remove vertices from the beginning and end of the vertex set that are - * involved in other repeats as a result of earlier repeat transformations. */ -static -bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, + vector<NFAVertex> succs; + getSuccessors(g, rsi, &succs); + + // Create our replacement cyclic state with the same reachability and + // report info as the last vertex in our topo-ordered list. + NFAVertex cyclic = createCyclic(g, rsi); + created.insert(cyclic); + + // One more special vertex is necessary: the positive trigger (same + // reach as cyclic). + NFAVertex pos_trigger = createPos(g, rsi); + created.insert(pos_trigger); + add_edge(pos_trigger, cyclic, g); + + // Update depths for our new vertices. + NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back(); + depths[pos_trigger] = depths[first]; + depths[cyclic].fromStart = + unionDepthMinMax(depths[first].fromStart, depths[last].fromStart); + depths[cyclic].fromStartDotStar = unionDepthMinMax( + depths[first].fromStartDotStar, depths[last].fromStartDotStar); + + // Wire predecessors to positive trigger. + for (const auto &e : in_edges_range(first, g)) { + add_edge(source(e, g), pos_trigger, g[e], g); + } + + // In the rose case, our tug is our cyclic, and it's wired to our + // successors (which should be just the accept). + vector<NFAVertex> tugs; + assert(succs.size() == 1); + for (auto v : succs) { + add_edge(cyclic, v, g); + } + + // Wire pos trigger to accept if min repeat is one -- this deals with cases + // where we can get a pos and tug trigger on the same byte. + if (rsi.repeatMin == depth(1)) { + for (auto v : succs) { + add_edge(pos_trigger, v, g); + g[pos_trigger].reports = g[cyclic].reports; + } + } + + // Remove the vertices/edges in the subgraph. + remove_vertices(rsi.vertices, g, false); + erase_all(&depths, rsi.vertices); + + repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin, + rsi.repeatMax, rsi.minPeriod, cyclic, + pos_trigger, tugs)); +} + +static +bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) { + // filtered_graph doesn't filter the num_vertices call. + size_t n = 0; + RepeatGraph::vertex_iterator vi, ve; + for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) { + if (++n >= minRepeat) { + return true; + } + } + return false; +} + +// Marks the subgraph as bad if it can't be handled. +static +void reprocessSubgraph(const NGHolder &h, const Grey &grey, + ReachSubgraph &rsi) { + vector<ReachSubgraph> rs(1, rsi); + checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize); + if (rs.size() != 1) { + DEBUG_PRINTF("subgraph split into %zu\n", rs.size()); + rsi.bad = true; + return; + } + + rsi = rs.back(); // Potentially modified. + + if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) { + DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n", + rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); + } else { + DEBUG_PRINTF("reprocessed subgraph is bad\n"); + rsi.bad = true; + } +} + +/** Remove vertices from the beginning and end of the vertex set that are + * involved in other repeats as a result of earlier repeat transformations. */ +static +bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, const unordered_set<NFAVertex> &created) { - assert(!rsi.bad); - - if (created.empty()) { - return true; - } - - if (rsi.vertices.empty()) { - return false; - } - - // Peel involved vertices from the front. - vector<NFAVertex>::iterator zap = rsi.vertices.end(); - for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite; - ++it) { - if (!contains(created, *it)) { - zap = it; - break; - } else { + assert(!rsi.bad); + + if (created.empty()) { + return true; + } + + if (rsi.vertices.empty()) { + return false; + } + + // Peel involved vertices from the front. + vector<NFAVertex>::iterator zap = rsi.vertices.end(); + for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite; + ++it) { + if (!contains(created, *it)) { + zap = it; + break; + } else { DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); - } - } - DEBUG_PRINTF("peeling %zu vertices from front\n", - distance(rsi.vertices.begin(), zap)); - rsi.vertices.erase(rsi.vertices.begin(), zap); - - // Peel involved vertices and vertices with edges to involved vertices from - // the back; otherwise we may try to transform a POS into a TUG. - zap = rsi.vertices.begin(); - for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite; - ++it) { - if (!contains(created, *it) && - !contains_any_of(created, adjacent_vertices(*it, g))) { - zap = it.base(); // Note: erases everything after it. - break; - } else { + } + } + DEBUG_PRINTF("peeling %zu vertices from front\n", + distance(rsi.vertices.begin(), zap)); + rsi.vertices.erase(rsi.vertices.begin(), zap); + + // Peel involved vertices and vertices with edges to involved vertices from + // the back; otherwise we may try to transform a POS into a TUG. + zap = rsi.vertices.begin(); + for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite; + ++it) { + if (!contains(created, *it) && + !contains_any_of(created, adjacent_vertices(*it, g))) { + zap = it.base(); // Note: erases everything after it. + break; + } else { DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); - } - } - DEBUG_PRINTF("peeling %zu vertices from back\n", - distance(zap, rsi.vertices.end())); - rsi.vertices.erase(zap, rsi.vertices.end()); - - // If vertices in the middle are involved in other repeats, it's a definite - // no-no. - for (auto v : rsi.vertices) { - if (contains(created, v)) { + } + } + DEBUG_PRINTF("peeling %zu vertices from back\n", + distance(zap, rsi.vertices.end())); + rsi.vertices.erase(zap, rsi.vertices.end()); + + // If vertices in the middle are involved in other repeats, it's a definite + // no-no. + for (auto v : rsi.vertices) { + if (contains(created, v)) { DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index); - return false; - } - } - - reprocessSubgraph(g, grey, rsi); - return !rsi.bad; -} - -/** For performance reasons, it's nice not to have an exceptional state right - * next to a startDs state: that way we can do double-byte accel, whereas - * otherwise the NEG trigger would limit us to single. This might be a good - * idea to extend to cyclic states, too. */ -static -void peelStartDotStar(const NGHolder &g, + return false; + } + } + + reprocessSubgraph(g, grey, rsi); + return !rsi.bad; +} + +/** For performance reasons, it's nice not to have an exceptional state right + * next to a startDs state: that way we can do double-byte accel, whereas + * otherwise the NEG trigger would limit us to single. This might be a good + * idea to extend to cyclic states, too. */ +static +void peelStartDotStar(const NGHolder &g, const unordered_map<NFAVertex, NFAVertexDepth> &depths, const Grey &grey, ReachSubgraph &rsi) { - if (rsi.vertices.size() < 1) { - return; - } - - NFAVertex first = rsi.vertices.front(); - if (depths.at(first).fromStartDotStar.min == depth(1)) { + if (rsi.vertices.size() < 1) { + return; + } + + NFAVertex first = rsi.vertices.front(); + if (depths.at(first).fromStartDotStar.min == depth(1)) { DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index); - rsi.vertices.erase(rsi.vertices.begin()); - reprocessSubgraph(g, grey, rsi); - } -} - -static -void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, - const u32 minNumVertices) { + rsi.vertices.erase(rsi.vertices.begin()); + reprocessSubgraph(g, grey, rsi); + } +} + +static +void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, + const u32 minNumVertices) { const ReachFilter<NGHolder> fil(&g); const RepeatGraph rg(g, fil, fil); - - if (!isCompBigEnough(rg, minNumVertices)) { - DEBUG_PRINTF("component not big enough, bailing\n"); - return; - } - + + if (!isCompBigEnough(rg, minNumVertices)) { + DEBUG_PRINTF("component not big enough, bailing\n"); + return; + } + const auto ug = make_undirected_graph(rg); - + unordered_map<NFAVertex, u32> repeatMap; - - unsigned int num; - num = connected_components(ug, make_assoc_property_map(repeatMap)); - DEBUG_PRINTF("found %u connected repeat components\n", num); - - // Now, we build a set of topo-ordered ReachSubgraphs. + + unsigned int num; + num = connected_components(ug, make_assoc_property_map(repeatMap)); + DEBUG_PRINTF("found %u connected repeat components\n", num); + + // Now, we build a set of topo-ordered ReachSubgraphs. vector<NFAVertex> topoOrder = buildTopoOrder(rg); - - rs.resize(num); - - for (auto v : topoOrder) { + + rs.resize(num); + + for (auto v : topoOrder) { auto rit = repeatMap.find(v); - if (rit == repeatMap.end()) { - continue; /* not part of a repeat */ - } - u32 comp_id = rit->second; - assert(comp_id < num); - rs[comp_id].vertices.push_back(v); - } - -#ifdef DEBUG - for (size_t i = 0; i < rs.size(); i++) { - DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size()); - } -#endif -} - -static -bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) { - assert(!rsi.vertices.empty()); - - const NFAVertex first = rsi.vertices.front(); - const NFAVertex last = rsi.vertices.back(); - - // All of the preds of first must have edges to all the successors of last. - for (auto u : inv_adjacent_vertices_range(first, g)) { - for (auto v : adjacent_vertices_range(last, g)) { - if (!edge(u, v, g).second) { - return false; - } - } - } - - return true; -} - -/* depth info is valid as calculated at entry */ -static -bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, + if (rit == repeatMap.end()) { + continue; /* not part of a repeat */ + } + u32 comp_id = rit->second; + assert(comp_id < num); + rs[comp_id].vertices.push_back(v); + } + +#ifdef DEBUG + for (size_t i = 0; i < rs.size(); i++) { + DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size()); + } +#endif +} + +static +bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) { + assert(!rsi.vertices.empty()); + + const NFAVertex first = rsi.vertices.front(); + const NFAVertex last = rsi.vertices.back(); + + // All of the preds of first must have edges to all the successors of last. + for (auto u : inv_adjacent_vertices_range(first, g)) { + for (auto v : adjacent_vertices_range(last, g)) { + if (!edge(u, v, g).second) { + return false; + } + } + } + + return true; +} + +/* depth info is valid as calculated at entry */ +static +bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, const unordered_map<NFAVertex, NFAVertexDepth> &depths, const unordered_set<NFAVertex> &reached_by_fixed_tops) { - DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n", - reached_by_fixed_tops.size()); - if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { - /* can't do this for infix/suffixes unless we know trigger literals - * can only occur at one offset */ + DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n", + reached_by_fixed_tops.size()); + if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { + /* can't do this for infix/suffixes unless we know trigger literals + * can only occur at one offset */ DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index); - return false; - } - - if (depths.at(v).fromStartDotStar.min.is_reachable()) { - DEBUG_PRINTF("reachable from startDs\n"); - return false; - } - - /* look at preds as v may be cyclic */ - const depth &first = depths.at(v).fromStart.min; - assert(first.is_reachable()); - if (!first.is_finite()) { - DEBUG_PRINTF("first not finite\n"); - return false; - } - DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str()); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - const depth &u_max_depth = depths.at(u).fromStart.max; + return false; + } + + if (depths.at(v).fromStartDotStar.min.is_reachable()) { + DEBUG_PRINTF("reachable from startDs\n"); + return false; + } + + /* look at preds as v may be cyclic */ + const depth &first = depths.at(v).fromStart.min; + assert(first.is_reachable()); + if (!first.is_finite()) { + DEBUG_PRINTF("first not finite\n"); + return false; + } + DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str()); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + const depth &u_max_depth = depths.at(u).fromStart.max; DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index, u_max_depth.str().c_str()); - if (u_max_depth != first - depth(1)) { - return false; - } - } - - return true; -} - -static -NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger, - u32 top) { - NFAVertex u = g.start; - for (const auto &cr : trigger) { - NFAVertex v = add_vertex(g); - g[v].char_reach = cr; - add_edge(u, v, g); - if (u == g.start) { + if (u_max_depth != first - depth(1)) { + return false; + } + } + + return true; +} + +static +NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger, + u32 top) { + NFAVertex u = g.start; + for (const auto &cr : trigger) { + NFAVertex v = add_vertex(g); + g[v].char_reach = cr; + add_edge(u, v, g); + if (u == g.start) { g[edge(u, v, g)].tops.insert(top); - } - u = v; - } - + } + u = v; + } + DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index); - return u; -} - -/** - * For triggered graphs, replace the "top" edges from start with the triggers - * they represent, for the purposes of determining sole entry. - */ -static -void addTriggers(NGHolder &g, - const map<u32, vector<vector<CharReach>>> &triggers) { - if (!is_triggered(g)) { - assert(triggers.empty()); - return; - } - - vector<NFAEdge> dead; - map<u32, vector<NFAVertex>> starts_by_top; - - for (const auto &e : out_edges_range(g.start, g)) { - const NFAVertex &v = target(e, g); - if (v == g.startDs) { - continue; - } - + return u; +} + +/** + * For triggered graphs, replace the "top" edges from start with the triggers + * they represent, for the purposes of determining sole entry. + */ +static +void addTriggers(NGHolder &g, + const map<u32, vector<vector<CharReach>>> &triggers) { + if (!is_triggered(g)) { + assert(triggers.empty()); + return; + } + + vector<NFAEdge> dead; + map<u32, vector<NFAVertex>> starts_by_top; + + for (const auto &e : out_edges_range(g.start, g)) { + const NFAVertex &v = target(e, g); + if (v == g.startDs) { + continue; + } + const auto &tops = g[e].tops; - - // The caller may not have given us complete trigger information. If we - // don't have any triggers for a particular top, we should just leave - // it alone. + + // The caller may not have given us complete trigger information. If we + // don't have any triggers for a particular top, we should just leave + // it alone. for (u32 top : tops) { if (!contains(triggers, top)) { DEBUG_PRINTF("no triggers for top %u\n", top); @@ -1177,946 +1177,946 @@ void addTriggers(NGHolder &g, } starts_by_top[top].push_back(v); - } - dead.push_back(e); + } + dead.push_back(e); next_edge:; - } - - remove_edges(dead, g); - - for (const auto &m : starts_by_top) { - const auto &top = m.first; - const auto &starts = m.second; - - assert(contains(triggers, top)); - const auto &top_triggers = triggers.at(top); - - for (const auto &trigger : top_triggers) { - NFAVertex u = buildTriggerStates(g, trigger, top); - for (const auto &v : starts) { - add_edge_if_not_present(u, v, g); - } - } - } -} - -static -CharReach predReach(const NGHolder &g, NFAVertex v) { - CharReach cr; - for (auto u : inv_adjacent_vertices_range(v, g)) { - cr |= g[u].char_reach; - } - return cr; -} - -/** - * Filter the given vertex map (which maps from vertices in another graph to - * vertices in subg) so that it only contains vertices that actually exist in - * subg. - */ -static -void filterMap(const NGHolder &subg, + } + + remove_edges(dead, g); + + for (const auto &m : starts_by_top) { + const auto &top = m.first; + const auto &starts = m.second; + + assert(contains(triggers, top)); + const auto &top_triggers = triggers.at(top); + + for (const auto &trigger : top_triggers) { + NFAVertex u = buildTriggerStates(g, trigger, top); + for (const auto &v : starts) { + add_edge_if_not_present(u, v, g); + } + } + } +} + +static +CharReach predReach(const NGHolder &g, NFAVertex v) { + CharReach cr; + for (auto u : inv_adjacent_vertices_range(v, g)) { + cr |= g[u].char_reach; + } + return cr; +} + +/** + * Filter the given vertex map (which maps from vertices in another graph to + * vertices in subg) so that it only contains vertices that actually exist in + * subg. + */ +static +void filterMap(const NGHolder &subg, unordered_map<NFAVertex, NFAVertex> &vmap) { NGHolder::vertex_iterator vi, ve; - tie(vi, ve) = vertices(subg); + tie(vi, ve) = vertices(subg); const unordered_set<NFAVertex> remaining_verts(vi, ve); - + unordered_map<NFAVertex, NFAVertex> fmap; // filtered map - - for (const auto &m : vmap) { - if (contains(remaining_verts, m.second)) { - fmap.insert(m); - } - } - - vmap.swap(fmap); -} - -/** Construct a graph for sole entry analysis that only considers paths through - * the bounded repeat. */ -static -void buildRepeatGraph(NGHolder &rg, + + for (const auto &m : vmap) { + if (contains(remaining_verts, m.second)) { + fmap.insert(m); + } + } + + vmap.swap(fmap); +} + +/** Construct a graph for sole entry analysis that only considers paths through + * the bounded repeat. */ +static +void buildRepeatGraph(NGHolder &rg, unordered_map<NFAVertex, NFAVertex> &rg_map, - const NGHolder &g, const ReachSubgraph &rsi, - const map<u32, vector<vector<CharReach>>> &triggers) { - cloneHolder(rg, g, &rg_map); - assert(rg.kind == g.kind); - - clear_in_edges(rg.accept, rg); - clear_in_edges(rg.acceptEod, rg); - add_edge(rg.accept, rg.acceptEod, rg); - - // Find the set of vertices in rg involved in the repeat. + const NGHolder &g, const ReachSubgraph &rsi, + const map<u32, vector<vector<CharReach>>> &triggers) { + cloneHolder(rg, g, &rg_map); + assert(rg.kind == g.kind); + + clear_in_edges(rg.accept, rg); + clear_in_edges(rg.acceptEod, rg); + add_edge(rg.accept, rg.acceptEod, rg); + + // Find the set of vertices in rg involved in the repeat. unordered_set<NFAVertex> rg_involved; - for (const auto &v : rsi.vertices) { - assert(contains(rg_map, v)); - rg_involved.insert(rg_map.at(v)); - } - - // Remove all out-edges from repeat vertices that aren't to other repeat - // vertices, then connect terminal repeat vertices to accept. - for (const auto &v : rsi.vertices) { - NFAVertex rv = rg_map.at(v); - remove_out_edge_if(rv, [&](const NFAEdge &e) { - return !contains(rg_involved, target(e, rg)); - }, rg); - if (!has_successor(rv, rg)) { // no interior out-edges - add_edge(rv, rg.accept, rg); - } - } - - pruneUseless(rg); - - if (is_triggered(rg)) { - // Add vertices for all our triggers - addTriggers(rg, triggers); + for (const auto &v : rsi.vertices) { + assert(contains(rg_map, v)); + rg_involved.insert(rg_map.at(v)); + } + + // Remove all out-edges from repeat vertices that aren't to other repeat + // vertices, then connect terminal repeat vertices to accept. + for (const auto &v : rsi.vertices) { + NFAVertex rv = rg_map.at(v); + remove_out_edge_if(rv, [&](const NFAEdge &e) { + return !contains(rg_involved, target(e, rg)); + }, rg); + if (!has_successor(rv, rg)) { // no interior out-edges + add_edge(rv, rg.accept, rg); + } + } + + pruneUseless(rg); + + if (is_triggered(rg)) { + // Add vertices for all our triggers + addTriggers(rg, triggers); renumber_vertices(rg); - - // We don't know anything about how often this graph is triggered, so we - // make the start vertex cyclic for the purposes of this analysis ONLY. - add_edge(rg.start, rg.start, rg); - } - - filterMap(rg, rg_map); - - // All of our repeat vertices should have vertices in rg. - assert(all_of(begin(rsi.vertices), end(rsi.vertices), - [&](const NFAVertex &v) { return contains(rg_map, v); })); -} - -/** - * Construct an input DAG which accepts on all entries to the repeat. - */ -static -void buildInputGraph(NGHolder &lhs, + + // We don't know anything about how often this graph is triggered, so we + // make the start vertex cyclic for the purposes of this analysis ONLY. + add_edge(rg.start, rg.start, rg); + } + + filterMap(rg, rg_map); + + // All of our repeat vertices should have vertices in rg. + assert(all_of(begin(rsi.vertices), end(rsi.vertices), + [&](const NFAVertex &v) { return contains(rg_map, v); })); +} + +/** + * Construct an input DAG which accepts on all entries to the repeat. + */ +static +void buildInputGraph(NGHolder &lhs, unordered_map<NFAVertex, NFAVertex> &lhs_map, - const NGHolder &g, const NFAVertex first, - const map<u32, vector<vector<CharReach>>> &triggers) { + const NGHolder &g, const NFAVertex first, + const map<u32, vector<vector<CharReach>>> &triggers) { DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); - cloneHolder(lhs, g, &lhs_map); - assert(g.kind == lhs.kind); - addTriggers(lhs, triggers); + cloneHolder(lhs, g, &lhs_map); + assert(g.kind == lhs.kind); + addTriggers(lhs, triggers); renumber_vertices(lhs); - - // Replace each back-edge (u,v) with an edge (startDs,v), which will - // generate entries at at least the rate of the loop created by that - // back-edge. - set<NFAEdge> dead; - BackEdges<set<NFAEdge> > backEdgeVisitor(dead); + + // Replace each back-edge (u,v) with an edge (startDs,v), which will + // generate entries at at least the rate of the loop created by that + // back-edge. + set<NFAEdge> dead; + BackEdges<set<NFAEdge> > backEdgeVisitor(dead); depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start)); - for (const auto &e : dead) { - const NFAVertex u = source(e, lhs), v = target(e, lhs); - if (u == v) { - continue; // Self-loops are OK. - } - + for (const auto &e : dead) { + const NFAVertex u = source(e, lhs), v = target(e, lhs); + if (u == v) { + continue; // Self-loops are OK. + } + DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n", lhs[u].index, lhs[v].index, lhs[v].index); - - add_edge_if_not_present(lhs.startDs, v, lhs); - remove_edge(e, lhs); - } - - clear_in_edges(lhs.accept, lhs); - clear_in_edges(lhs.acceptEod, lhs); - add_edge(lhs.accept, lhs.acceptEod, lhs); - - // Wire the predecessors of the first repeat vertex to accept, then prune. - NFAVertex lhs_first = lhs_map.at(first); - for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) { - add_edge_if_not_present(u, lhs.accept, lhs); - } - - pruneUseless(lhs); - filterMap(lhs, lhs_map); -} - -/** - * Maximum number of vertices in the input DAG to actually allow sole entry - * calculation (as very large cases make sentClearsTail take a long, long time - * to complete.) - */ -static const size_t MAX_SOLE_ENTRY_VERTICES = 10000; - -/** True if (1) fixed offset or (2) reentries to this subgraph must involve a - * character which escapes the repeat, meaning that we only need to store a - * single offset at runtime. See UE-1361. */ -static -bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, + + add_edge_if_not_present(lhs.startDs, v, lhs); + remove_edge(e, lhs); + } + + clear_in_edges(lhs.accept, lhs); + clear_in_edges(lhs.acceptEod, lhs); + add_edge(lhs.accept, lhs.acceptEod, lhs); + + // Wire the predecessors of the first repeat vertex to accept, then prune. + NFAVertex lhs_first = lhs_map.at(first); + for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) { + add_edge_if_not_present(u, lhs.accept, lhs); + } + + pruneUseless(lhs); + filterMap(lhs, lhs_map); +} + +/** + * Maximum number of vertices in the input DAG to actually allow sole entry + * calculation (as very large cases make sentClearsTail take a long, long time + * to complete.) + */ +static const size_t MAX_SOLE_ENTRY_VERTICES = 10000; + +/** True if (1) fixed offset or (2) reentries to this subgraph must involve a + * character which escapes the repeat, meaning that we only need to store a + * single offset at runtime. See UE-1361. */ +static +bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, const unordered_map<NFAVertex, NFAVertexDepth> &depths, const unordered_set<NFAVertex> &reached_by_fixed_tops, - const map<u32, vector<vector<CharReach>>> &triggers) { - DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(), - rsi.repeatMax.str().c_str()); - NFAVertex first = rsi.vertices.front(); - const CharReach &repeatReach = g[first].char_reach; - - /* trivial case first is at a fixed depth */ - if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) { - DEBUG_PRINTF("fixed depth\n"); - return true; - } - - DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str()); - - // Nothing can escape a dot repeat. - if (repeatReach.all()) { - DEBUG_PRINTF("dot repeat cannot be escaped\n"); - return false; - } - - // Another easy case: if the union of the reach of all entries to the - // repeat will always escape the repeat, we have sole entry. - if (predReach(g, first).isSubsetOf(~repeatReach)) { - DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n", - describeClass(predReach(g, first)).c_str()); - return true; - } - - NGHolder rg; + const map<u32, vector<vector<CharReach>>> &triggers) { + DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(), + rsi.repeatMax.str().c_str()); + NFAVertex first = rsi.vertices.front(); + const CharReach &repeatReach = g[first].char_reach; + + /* trivial case first is at a fixed depth */ + if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) { + DEBUG_PRINTF("fixed depth\n"); + return true; + } + + DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str()); + + // Nothing can escape a dot repeat. + if (repeatReach.all()) { + DEBUG_PRINTF("dot repeat cannot be escaped\n"); + return false; + } + + // Another easy case: if the union of the reach of all entries to the + // repeat will always escape the repeat, we have sole entry. + if (predReach(g, first).isSubsetOf(~repeatReach)) { + DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n", + describeClass(predReach(g, first)).c_str()); + return true; + } + + NGHolder rg; unordered_map<NFAVertex, NFAVertex> rg_map; - buildRepeatGraph(rg, rg_map, g, rsi, triggers); - assert(rg.kind == g.kind); - - NGHolder lhs; + buildRepeatGraph(rg, rg_map, g, rsi, triggers); + assert(rg.kind == g.kind); + + NGHolder lhs; unordered_map<NFAVertex, NFAVertex> lhs_map; - buildInputGraph(lhs, lhs_map, g, first, triggers); - assert(lhs.kind == g.kind); - - if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) { - DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n", - num_vertices(lhs)); - return false; - } - - // Split the repeat graph into two regions: vertices in the LHS input DAG - // are in one region, vertices in the bounded repeat are in another. - const u32 lhs_region = 1; - const u32 repeat_region = 2; + buildInputGraph(lhs, lhs_map, g, first, triggers); + assert(lhs.kind == g.kind); + + if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) { + DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n", + num_vertices(lhs)); + return false; + } + + // Split the repeat graph into two regions: vertices in the LHS input DAG + // are in one region, vertices in the bounded repeat are in another. + const u32 lhs_region = 1; + const u32 repeat_region = 2; unordered_map<NFAVertex, u32> region_map; - - for (const auto &v : rsi.vertices) { - assert(!is_special(v, g)); // no specials in repeats - assert(contains(rg_map, v)); + + for (const auto &v : rsi.vertices) { + assert(!is_special(v, g)); // no specials in repeats + assert(contains(rg_map, v)); DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index); - region_map.emplace(rg_map.at(v), repeat_region); - } - - for (const auto &v : vertices_range(rg)) { - if (!contains(region_map, v)) { + region_map.emplace(rg_map.at(v), repeat_region); + } + + for (const auto &v : vertices_range(rg)) { + if (!contains(region_map, v)) { DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index); - region_map.emplace(v, lhs_region); - } - } - - u32 bad_region = 0; - if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) { - DEBUG_PRINTF("input dag clears repeat: sole entry\n"); - return true; - } - - DEBUG_PRINTF("not sole entry\n"); - return false; -} - -namespace { - -template<class Graph> -struct StrawWalker { - StrawWalker(const NGHolder &h_in, const Graph &g_in, - const vector<BoundedRepeatData> &all_repeats) - : h(h_in), g(g_in), repeats(all_repeats) {} - - /** True if v is a cyclic that belongs to a bounded repeat (one without an - * inf max bound). */ - bool isBoundedRepeatCyclic(NFAVertex v) const { - for (const auto &r : repeats) { - if (r.repeatMax.is_finite() && r.cyclic == v) { - return true; - } - } - return false; - } - - NFAVertex step(NFAVertex v) const { - typename Graph::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(v, g); - assert(ai != ae); - NFAVertex next = *ai; - if (next == v) { // Ignore self loop. - ++ai; - if (ai == ae) { + region_map.emplace(v, lhs_region); + } + } + + u32 bad_region = 0; + if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) { + DEBUG_PRINTF("input dag clears repeat: sole entry\n"); + return true; + } + + DEBUG_PRINTF("not sole entry\n"); + return false; +} + +namespace { + +template<class Graph> +struct StrawWalker { + StrawWalker(const NGHolder &h_in, const Graph &g_in, + const vector<BoundedRepeatData> &all_repeats) + : h(h_in), g(g_in), repeats(all_repeats) {} + + /** True if v is a cyclic that belongs to a bounded repeat (one without an + * inf max bound). */ + bool isBoundedRepeatCyclic(NFAVertex v) const { + for (const auto &r : repeats) { + if (r.repeatMax.is_finite() && r.cyclic == v) { + return true; + } + } + return false; + } + + NFAVertex step(NFAVertex v) const { + typename Graph::adjacency_iterator ai, ae; + tie(ai, ae) = adjacent_vertices(v, g); + assert(ai != ae); + NFAVertex next = *ai; + if (next == v) { // Ignore self loop. + ++ai; + if (ai == ae) { return NGHolder::null_vertex(); - } - next = *ai; - } - ++ai; - if (ai != ae && *ai == v) { // Ignore self loop - ++ai; - } - if (ai != ae) { - DEBUG_PRINTF("more than one succ\n"); - set<NFAVertex> succs; - insert(&succs, adjacent_vertices(v, g)); - succs.erase(v); - for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { - next = *ai; + } + next = *ai; + } + ++ai; + if (ai != ae && *ai == v) { // Ignore self loop + ++ai; + } + if (ai != ae) { + DEBUG_PRINTF("more than one succ\n"); + set<NFAVertex> succs; + insert(&succs, adjacent_vertices(v, g)); + succs.erase(v); + for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { + next = *ai; DEBUG_PRINTF("checking %zu\n", g[next].index); - if (next == v) { - continue; - } - set<NFAVertex> lsuccs; - insert(&lsuccs, adjacent_vertices(next, g)); - - if (lsuccs != succs) { - continue; - } - - // Ensure that if v is in connected to accept, the reports - // on `next` much match. - if (is_match_vertex(v, h) && g[v].reports != g[next].reports) { - DEBUG_PRINTF("report mismatch\n"); - continue; - } - - return next; - } - DEBUG_PRINTF("bailing\n"); + if (next == v) { + continue; + } + set<NFAVertex> lsuccs; + insert(&lsuccs, adjacent_vertices(next, g)); + + if (lsuccs != succs) { + continue; + } + + // Ensure that if v is in connected to accept, the reports + // on `next` much match. + if (is_match_vertex(v, h) && g[v].reports != g[next].reports) { + DEBUG_PRINTF("report mismatch\n"); + continue; + } + + return next; + } + DEBUG_PRINTF("bailing\n"); return NGHolder::null_vertex(); - } - return next; - } - - NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const { + } + return next; + } + + NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const { DEBUG_PRINTF("walk from %zu\n", g[v].index); unordered_set<NFAVertex> visited; - straw.clear(); - - while (!is_special(v, g)) { + straw.clear(); + + while (!is_special(v, g)) { DEBUG_PRINTF("checking %zu\n", g[v].index); - NFAVertex next = step(v); + NFAVertex next = step(v); if (next == NGHolder::null_vertex()) { - break; - } - if (!visited.insert(next).second) { + break; + } + if (!visited.insert(next).second) { DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index); - break; /* don't want to get stuck in any complicated loops */ - } - - const CharReach &reach_v = g[v].char_reach; - const CharReach &reach_next = g[next].char_reach; - if (!reach_v.isSubsetOf(reach_next)) { + break; /* don't want to get stuck in any complicated loops */ + } + + const CharReach &reach_v = g[v].char_reach; + const CharReach &reach_next = g[next].char_reach; + if (!reach_v.isSubsetOf(reach_next)) { DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n", - g[next].index, g[v].index); - break; - } - - // If this is cyclic with the right reach, we're done. Note that - // startDs fulfils this requirement. - if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) { + g[next].index, g[v].index); + break; + } + + // If this is cyclic with the right reach, we're done. Note that + // startDs fulfils this requirement. + if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) { DEBUG_PRINTF("found cyclic %zu\n", g[next].index); - return next; - } - - v = next; - straw.push_back(v); - } - - straw.clear(); + return next; + } + + v = next; + straw.push_back(v); + } + + straw.clear(); return NGHolder::null_vertex(); - } - -private: - const NGHolder &h; // underlying graph - const Graph &g; - const vector<BoundedRepeatData> &repeats; -}; - -} // namespace - -static -NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v, - const vector<BoundedRepeatData> &all_repeats, - vector<NFAVertex> &straw) { + } + +private: + const NGHolder &h; // underlying graph + const Graph &g; + const vector<BoundedRepeatData> &repeats; +}; + +} // namespace + +static +NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v, + const vector<BoundedRepeatData> &all_repeats, + vector<NFAVertex> &straw) { typedef boost::reverse_graph<NGHolder, const NGHolder &> RevGraph; const RevGraph revg(g); - - auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw); - reverse(begin(straw), end(straw)); // path comes from cyclic - return cyclic; -} - -static -NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v, - const vector<BoundedRepeatData> &all_repeats, - vector<NFAVertex> &straw) { + + auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw); + reverse(begin(straw), end(straw)); // path comes from cyclic + return cyclic; +} + +static +NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v, + const vector<BoundedRepeatData> &all_repeats, + vector<NFAVertex> &straw) { return StrawWalker<NGHolder>(g, g, all_repeats).walk(v, straw); -} - -/** True if entries to this subgraph must pass through a cyclic state with - * reachability that is a superset of the reach of the repeat, and - * reachabilities along this path "nest" into the reaches of their - * predecessors. - * - * This is what is called a 'straw' in the region code. */ -static -bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi, - const vector<BoundedRepeatData> &all_repeats) { - // Cope with peeling by following a chain of single vertices backwards - // until we encounter our cyclic, all of which must have superset reach. - vector<NFAVertex> straw; - return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) != +} + +/** True if entries to this subgraph must pass through a cyclic state with + * reachability that is a superset of the reach of the repeat, and + * reachabilities along this path "nest" into the reaches of their + * predecessors. + * + * This is what is called a 'straw' in the region code. */ +static +bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi, + const vector<BoundedRepeatData> &all_repeats) { + // Cope with peeling by following a chain of single vertices backwards + // until we encounter our cyclic, all of which must have superset reach. + vector<NFAVertex> straw; + return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) != NGHolder::null_vertex(); -} - -static -bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi, - const vector<BoundedRepeatData> &all_repeats) { - vector<NFAVertex> straw; - return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) != +} + +static +bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi, + const vector<BoundedRepeatData> &all_repeats) { + vector<NFAVertex> straw; + return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) != NGHolder::null_vertex(); -} - -static -bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) { - const NFAVertex u = rsi.vertices.back(); - for (auto v : adjacent_vertices_range(u, g)) { - if (v != g.accept) { - return false; - } - } - assert(out_degree(u, g)); - return true; -} - -static -bool allSimpleHighlander(const ReportManager &rm, - const flat_set<ReportID> &reports) { - assert(!reports.empty()); - for (auto report : reports) { - if (!isSimpleExhaustible(rm.getReport(report))) { - return false; - } - } - - return true; -} - -// Finds a single, fairly unrefined trigger for the repeat by walking backwards -// and collecting the unioned reach at each step. -static -vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) { - const size_t MAX_TRIGGER_STEPS = 32; - - vector<CharReach> trigger; - +} + +static +bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) { + const NFAVertex u = rsi.vertices.back(); + for (auto v : adjacent_vertices_range(u, g)) { + if (v != g.accept) { + return false; + } + } + assert(out_degree(u, g)); + return true; +} + +static +bool allSimpleHighlander(const ReportManager &rm, + const flat_set<ReportID> &reports) { + assert(!reports.empty()); + for (auto report : reports) { + if (!isSimpleExhaustible(rm.getReport(report))) { + return false; + } + } + + return true; +} + +// Finds a single, fairly unrefined trigger for the repeat by walking backwards +// and collecting the unioned reach at each step. +static +vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) { + const size_t MAX_TRIGGER_STEPS = 32; + + vector<CharReach> trigger; + flat_set<NFAVertex> curr, next; - insert(&curr, inv_adjacent_vertices(v, g)); - - if (contains(curr, g.start)) { - DEBUG_PRINTF("start in repeat's immediate preds\n"); - trigger.push_back(CharReach::dot()); // Trigger could be anything! - return trigger; - } - - for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) { - next.clear(); - trigger.push_back(CharReach()); - CharReach &cr = trigger.back(); - - for (auto v_c : curr) { - cr |= g[v_c].char_reach; - insert(&next, inv_adjacent_vertices(v_c, g)); - } - - DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str()); - - if (next.empty() || contains(next, g.start)) { - break; - } - - curr.swap(next); - } - - reverse(trigger.begin(), trigger.end()); - return trigger; -} - -static -vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g, - const NFAVertex sink) { - const size_t MAX_TRIGGER_STEPS = 32; - const size_t UNIONED_FALLBACK_THRESHOLD = 100; - - using Path = deque<NFAVertex>; - - vector<vector<CharReach>> triggers; - - deque<Path> q; // work queue - deque<Path> done; // finished paths - - size_t max_len = MAX_TRIGGER_STEPS; - - // Find a set of paths leading to vertex v by depth first search. - - for (auto u : inv_adjacent_vertices_range(sink, g)) { - if (is_any_start(u, g)) { - triggers.push_back({}); // empty - return triggers; - } - q.push_back(Path(1, u)); - } - - while (!q.empty()) { - Path &path = q.front(); - NFAVertex v = path.back(); - - if (path.size() >= max_len) { - max_len = min(max_len, path.size()); - done.push_back(path); - goto next_path; - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (is_any_start(u, g)) { - // Found an accept. There's no point expanding this path any - // further, we're done. - max_len = min(max_len, path.size()); - done.push_back(path); - goto next_path; - } - - if (path.size() + 1 >= max_len) { - done.push_back(path); - done.back().push_back(u); - } else { - q.push_back(path); // copy - q.back().push_back(u); - } - } - - next_path: - q.pop_front(); - - // If our queue or our finished trigger list gets too large, fall back - // to generating a single trigger with union reach. - if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) { - DEBUG_PRINTF("search too large, fall back to union trigger\n"); - triggers.clear(); - triggers.push_back(getUnionedTrigger(g, sink)); - return triggers; - } - } - - assert(!done.empty()); - - // Convert our path list into a set of unique triggers. + insert(&curr, inv_adjacent_vertices(v, g)); + + if (contains(curr, g.start)) { + DEBUG_PRINTF("start in repeat's immediate preds\n"); + trigger.push_back(CharReach::dot()); // Trigger could be anything! + return trigger; + } + + for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) { + next.clear(); + trigger.push_back(CharReach()); + CharReach &cr = trigger.back(); + + for (auto v_c : curr) { + cr |= g[v_c].char_reach; + insert(&next, inv_adjacent_vertices(v_c, g)); + } + + DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str()); + + if (next.empty() || contains(next, g.start)) { + break; + } + + curr.swap(next); + } + + reverse(trigger.begin(), trigger.end()); + return trigger; +} + +static +vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g, + const NFAVertex sink) { + const size_t MAX_TRIGGER_STEPS = 32; + const size_t UNIONED_FALLBACK_THRESHOLD = 100; + + using Path = deque<NFAVertex>; + + vector<vector<CharReach>> triggers; + + deque<Path> q; // work queue + deque<Path> done; // finished paths + + size_t max_len = MAX_TRIGGER_STEPS; + + // Find a set of paths leading to vertex v by depth first search. + + for (auto u : inv_adjacent_vertices_range(sink, g)) { + if (is_any_start(u, g)) { + triggers.push_back({}); // empty + return triggers; + } + q.push_back(Path(1, u)); + } + + while (!q.empty()) { + Path &path = q.front(); + NFAVertex v = path.back(); + + if (path.size() >= max_len) { + max_len = min(max_len, path.size()); + done.push_back(path); + goto next_path; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (is_any_start(u, g)) { + // Found an accept. There's no point expanding this path any + // further, we're done. + max_len = min(max_len, path.size()); + done.push_back(path); + goto next_path; + } + + if (path.size() + 1 >= max_len) { + done.push_back(path); + done.back().push_back(u); + } else { + q.push_back(path); // copy + q.back().push_back(u); + } + } + + next_path: + q.pop_front(); + + // If our queue or our finished trigger list gets too large, fall back + // to generating a single trigger with union reach. + if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) { + DEBUG_PRINTF("search too large, fall back to union trigger\n"); + triggers.clear(); + triggers.push_back(getUnionedTrigger(g, sink)); + return triggers; + } + } + + assert(!done.empty()); + + // Convert our path list into a set of unique triggers. ue2_unordered_set<vector<CharReach>> unique_triggers; - for (const auto &path : done) { - vector<CharReach> reach_path; - for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) { - reach_path.push_back(g[*jt].char_reach); - } - unique_triggers.insert(reach_path); - } - - insert(&triggers, triggers.end(), unique_triggers); - sort(triggers.begin(), triggers.end()); - DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(), - max_len); - return triggers; -} - -static -void findMinPeriod(const NGHolder &g, - const map<u32, vector<vector<CharReach>>> &triggers, - ReachSubgraph &rsi) { - const auto v = rsi.vertices.front(); - const CharReach &cr = g[v].char_reach; - - vector<vector<CharReach>> repeat_triggers; - - if (is_triggered(g)) { - // Construct a temporary copy of the graph that also contains its - // triggers, potentially lengthening the repeat's triggers. - NGHolder tg; - unordered_map<NFAVertex, NFAVertex> tg_map; - cloneHolder(tg, g, &tg_map); - addTriggers(tg, triggers); - assert(contains(tg_map, v)); - repeat_triggers = getRepeatTriggers(tg, tg_map.at(v)); - } else { - // Not triggered, no need to mutate the graph. - repeat_triggers = getRepeatTriggers(g, v); - } - - rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset); - DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n", - repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset); -} - -static -void -selectHistoryScheme(const NGHolder &g, const ReportManager *rm, - ReachSubgraph &rsi, + for (const auto &path : done) { + vector<CharReach> reach_path; + for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) { + reach_path.push_back(g[*jt].char_reach); + } + unique_triggers.insert(reach_path); + } + + insert(&triggers, triggers.end(), unique_triggers); + sort(triggers.begin(), triggers.end()); + DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(), + max_len); + return triggers; +} + +static +void findMinPeriod(const NGHolder &g, + const map<u32, vector<vector<CharReach>>> &triggers, + ReachSubgraph &rsi) { + const auto v = rsi.vertices.front(); + const CharReach &cr = g[v].char_reach; + + vector<vector<CharReach>> repeat_triggers; + + if (is_triggered(g)) { + // Construct a temporary copy of the graph that also contains its + // triggers, potentially lengthening the repeat's triggers. + NGHolder tg; + unordered_map<NFAVertex, NFAVertex> tg_map; + cloneHolder(tg, g, &tg_map); + addTriggers(tg, triggers); + assert(contains(tg_map, v)); + repeat_triggers = getRepeatTriggers(tg, tg_map.at(v)); + } else { + // Not triggered, no need to mutate the graph. + repeat_triggers = getRepeatTriggers(g, v); + } + + rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset); + DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n", + repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset); +} + +static +void +selectHistoryScheme(const NGHolder &g, const ReportManager *rm, + ReachSubgraph &rsi, const unordered_map<NFAVertex, NFAVertexDepth> &depths, const unordered_set<NFAVertex> &reached_by_fixed_tops, - const map<u32, vector<vector<CharReach>>> &triggers, - const vector<BoundedRepeatData> &all_repeats, - const bool simple_model_selection) { - // {N,} cases use the FIRST history mechanism. - if (rsi.repeatMax.is_infinite()) { - DEBUG_PRINTF("selected FIRST history\n"); - rsi.historyType = REPEAT_FIRST; - return; - } - - /* If we have a repeat which only raises a highlander, only the first match - * matters */ - if (rm && leadsOnlyToAccept(g, rsi) - && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) { - DEBUG_PRINTF("selected FIRST history (as highlander)\n"); - rsi.historyType = REPEAT_FIRST; - rsi.repeatMax = depth::infinity(); /* for consistency */ - return; - } - - // {N,M} cases can use the FIRST mechanism if they follow a cyclic which - // includes their reachability via a "straw" path. (see UE-1589) - if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) { - DEBUG_PRINTF("selected FIRST history due to cyclic pred with " - "superset of reach\n"); - rsi.historyType = REPEAT_FIRST; - rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */ - return; - } - - // Similarly, {N,M} cases can use the FIRST mechanism if they precede a - // cyclic which includes their reachability via a "straw" path. - if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) { - DEBUG_PRINTF("selected FIRST history due to cyclic succ with " - "superset of reach\n"); - rsi.historyType = REPEAT_FIRST; - rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */ - return; - } - - // Could have skip edges and therefore be a {0,N} repeat. - if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) { - DEBUG_PRINTF("selected LAST history\n"); - rsi.historyType = REPEAT_LAST; - return; - } - - // Fill minPeriod, is_reset flags - findMinPeriod(g, triggers, rsi); - - // If we can't re-enter this cyclic state, we have a reset case. - // This check can be very expensive, so we don't do it if we've been asked - // for simple model selection. - if (!simple_model_selection && !rsi.is_reset && - hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) { - DEBUG_PRINTF("repeat is sole entry -> reset\n"); - rsi.is_reset = true; - } - - // We can lean on the common selection code for the remainder of our repeat - // models. - rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax, - rsi.minPeriod, rsi.is_reset); -} - -static -void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, + const map<u32, vector<vector<CharReach>>> &triggers, + const vector<BoundedRepeatData> &all_repeats, + const bool simple_model_selection) { + // {N,} cases use the FIRST history mechanism. + if (rsi.repeatMax.is_infinite()) { + DEBUG_PRINTF("selected FIRST history\n"); + rsi.historyType = REPEAT_FIRST; + return; + } + + /* If we have a repeat which only raises a highlander, only the first match + * matters */ + if (rm && leadsOnlyToAccept(g, rsi) + && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) { + DEBUG_PRINTF("selected FIRST history (as highlander)\n"); + rsi.historyType = REPEAT_FIRST; + rsi.repeatMax = depth::infinity(); /* for consistency */ + return; + } + + // {N,M} cases can use the FIRST mechanism if they follow a cyclic which + // includes their reachability via a "straw" path. (see UE-1589) + if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) { + DEBUG_PRINTF("selected FIRST history due to cyclic pred with " + "superset of reach\n"); + rsi.historyType = REPEAT_FIRST; + rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */ + return; + } + + // Similarly, {N,M} cases can use the FIRST mechanism if they precede a + // cyclic which includes their reachability via a "straw" path. + if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) { + DEBUG_PRINTF("selected FIRST history due to cyclic succ with " + "superset of reach\n"); + rsi.historyType = REPEAT_FIRST; + rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */ + return; + } + + // Could have skip edges and therefore be a {0,N} repeat. + if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) { + DEBUG_PRINTF("selected LAST history\n"); + rsi.historyType = REPEAT_LAST; + return; + } + + // Fill minPeriod, is_reset flags + findMinPeriod(g, triggers, rsi); + + // If we can't re-enter this cyclic state, we have a reset case. + // This check can be very expensive, so we don't do it if we've been asked + // for simple model selection. + if (!simple_model_selection && !rsi.is_reset && + hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) { + DEBUG_PRINTF("repeat is sole entry -> reset\n"); + rsi.is_reset = true; + } + + // We can lean on the common selection code for the remainder of our repeat + // models. + rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax, + rsi.minPeriod, rsi.is_reset); +} + +static +void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, unordered_set<NFAVertex> &created, - const vector<NFAVertex> &straw) { - if (!g[rd.cyclic].char_reach.all()) { - // Create another cyclic feeder state with flipped reach. It has an - // edge from the repeat's cyclic state and pos_trigger, an edge to the - // straw, and edges from every vertex along the straw. - NFAVertex feeder = clone_vertex(g, rd.cyclic); - created.insert(feeder); - g[feeder].char_reach.flip(); - add_edge(feeder, feeder, g); - add_edge(rd.pos_trigger, feeder, g); - add_edge(rd.cyclic, feeder, g); - add_edge(feeder, straw.front(), g); - - // An edge from every vertex in the straw. - for (auto v : straw) { - add_edge(v, feeder, g); - } - - // An edge to the feeder from the first vertex in the straw and all of - // its predecessors (other than the feeder itself, we've already - // created that edge!) - for (auto u : inv_adjacent_vertices_range(straw.front(), g)) { - if (u == feeder) { - continue; - } - add_edge(u, feeder, g); - } - + const vector<NFAVertex> &straw) { + if (!g[rd.cyclic].char_reach.all()) { + // Create another cyclic feeder state with flipped reach. It has an + // edge from the repeat's cyclic state and pos_trigger, an edge to the + // straw, and edges from every vertex along the straw. + NFAVertex feeder = clone_vertex(g, rd.cyclic); + created.insert(feeder); + g[feeder].char_reach.flip(); + add_edge(feeder, feeder, g); + add_edge(rd.pos_trigger, feeder, g); + add_edge(rd.cyclic, feeder, g); + add_edge(feeder, straw.front(), g); + + // An edge from every vertex in the straw. + for (auto v : straw) { + add_edge(v, feeder, g); + } + + // An edge to the feeder from the first vertex in the straw and all of + // its predecessors (other than the feeder itself, we've already + // created that edge!) + for (auto u : inv_adjacent_vertices_range(straw.front(), g)) { + if (u == feeder) { + continue; + } + add_edge(u, feeder, g); + } + DEBUG_PRINTF("added feeder %zu\n", g[feeder].index); - } else { - // No neg trigger means feeder is empty, and unnecessary. - assert(g[rd.pos_trigger].char_reach.all()); - } -} - -/** - * If we have a leading first repeat, we can split startDs so that it is not - * cyclic so that the repeat is only triggered once, rather than every byte. If we - * perform this transform we must create another cyclic state to retrigger the - * repeat after we see an escape for the repeat. - * - * We do not use the anchored start state to allow us to restart the NFA at a deep - * offset. - */ -static -bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, + } else { + // No neg trigger means feeder is empty, and unnecessary. + assert(g[rd.pos_trigger].char_reach.all()); + } +} + +/** + * If we have a leading first repeat, we can split startDs so that it is not + * cyclic so that the repeat is only triggered once, rather than every byte. If we + * perform this transform we must create another cyclic state to retrigger the + * repeat after we see an escape for the repeat. + * + * We do not use the anchored start state to allow us to restart the NFA at a deep + * offset. + */ +static +bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, unordered_set<NFAVertex> &created, - const vector<BoundedRepeatData> &all_repeats) { - assert(edge(g.startDs, g.startDs, g).second); - - // UE-1617: can rewire FIRST history cases that are preceded by - // startDs. - if (rd.type != REPEAT_FIRST) { - return false; - } - - const CharReach &cyc_cr = g[rd.cyclic].char_reach; - - // This transformation is only worth doing if this would allow us to - // accelerate the cyclic state (UE-2055). - if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("we wouldn't be able to accel this case\n"); - return false; - } - - vector<NFAVertex> straw; - NFAVertex pred = - walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); - if (pred != g.startDs) { - DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); - return false; - } - - // This transformation is only safe if the straw path from startDs that - // we've discovered can *only* lead to this repeat, since we're going to - // remove the self-loop on startDs. + const vector<BoundedRepeatData> &all_repeats) { + assert(edge(g.startDs, g.startDs, g).second); + + // UE-1617: can rewire FIRST history cases that are preceded by + // startDs. + if (rd.type != REPEAT_FIRST) { + return false; + } + + const CharReach &cyc_cr = g[rd.cyclic].char_reach; + + // This transformation is only worth doing if this would allow us to + // accelerate the cyclic state (UE-2055). + if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("we wouldn't be able to accel this case\n"); + return false; + } + + vector<NFAVertex> straw; + NFAVertex pred = + walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); + if (pred != g.startDs) { + DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); + return false; + } + + // This transformation is only safe if the straw path from startDs that + // we've discovered can *only* lead to this repeat, since we're going to + // remove the self-loop on startDs. if (proper_out_degree(g.startDs, g) > 1) { - DEBUG_PRINTF("startDs has other successors\n"); - return false; - } - for (const auto &v : straw) { - if (proper_out_degree(v, g) != 1) { + DEBUG_PRINTF("startDs has other successors\n"); + return false; + } + for (const auto &v : straw) { + if (proper_out_degree(v, g) != 1) { DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n", - g[v].index); - return false; - } - } - - if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("entry is narrow, could be accelerable\n"); - return false; - } - - assert(!straw.empty()); - - /* If there is overlap between the feeder and the first vertex in the straw - * fun things happen. TODO: handle fun things happening (requires more - * edges and more vertices). */ - if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) { - DEBUG_PRINTF("straw has `interesting' reach\n"); - return false; - } - - DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n"); - - // Remove the self-loop on startDs! What a blast! - remove_edge(g.startDs, g.startDs, g); - - // Wire up feeder state to straw. - buildFeeder(g, rd, created, straw); - - return true; -} - -static -vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, - const vector<NFAVertex> &straw) { - // Straw runs from startDs to our pos trigger. - assert(!straw.empty()); - assert(edge(g.startDs, straw.front(), g).second); - assert(edge(straw.back(), rd.pos_trigger, g).second); - - vector<NFAVertex> own_straw; - for (const auto &v : straw) { - NFAVertex v2 = clone_vertex(g, v); - if (hasSelfLoop(v, g)) { - add_edge(v2, v2, g); - } - if (!own_straw.empty()) { - add_edge(own_straw.back(), v2, g); - } - own_straw.push_back(v2); - } - - // Wire our straw to start, not startDs. - add_edge(g.start, own_straw.front(), g); - - // Swap over to using our own straw to get to the POS trigger. - remove_edge(straw.back(), rd.pos_trigger, g); - add_edge(own_straw.back(), rd.pos_trigger, g); - - return own_straw; -} - -/** - * Specialized version of improveLeadingRepeat for outfixes, in which we can - * rewire the straw to start instead of removing the startDs self-loop. - */ -static -bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, + g[v].index); + return false; + } + } + + if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("entry is narrow, could be accelerable\n"); + return false; + } + + assert(!straw.empty()); + + /* If there is overlap between the feeder and the first vertex in the straw + * fun things happen. TODO: handle fun things happening (requires more + * edges and more vertices). */ + if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) { + DEBUG_PRINTF("straw has `interesting' reach\n"); + return false; + } + + DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n"); + + // Remove the self-loop on startDs! What a blast! + remove_edge(g.startDs, g.startDs, g); + + // Wire up feeder state to straw. + buildFeeder(g, rd, created, straw); + + return true; +} + +static +vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, + const vector<NFAVertex> &straw) { + // Straw runs from startDs to our pos trigger. + assert(!straw.empty()); + assert(edge(g.startDs, straw.front(), g).second); + assert(edge(straw.back(), rd.pos_trigger, g).second); + + vector<NFAVertex> own_straw; + for (const auto &v : straw) { + NFAVertex v2 = clone_vertex(g, v); + if (hasSelfLoop(v, g)) { + add_edge(v2, v2, g); + } + if (!own_straw.empty()) { + add_edge(own_straw.back(), v2, g); + } + own_straw.push_back(v2); + } + + // Wire our straw to start, not startDs. + add_edge(g.start, own_straw.front(), g); + + // Swap over to using our own straw to get to the POS trigger. + remove_edge(straw.back(), rd.pos_trigger, g); + add_edge(own_straw.back(), rd.pos_trigger, g); + + return own_straw; +} + +/** + * Specialized version of improveLeadingRepeat for outfixes, in which we can + * rewire the straw to start instead of removing the startDs self-loop. + */ +static +bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, unordered_set<NFAVertex> &created, - const vector<BoundedRepeatData> &all_repeats) { - assert(g.kind == NFA_OUTFIX); - - // UE-1617: can rewire FIRST history cases that are preceded by - // startDs. - if (rd.type != REPEAT_FIRST) { - return false; - } - - const CharReach &cyc_cr = g[rd.cyclic].char_reach; - - // This transformation is only worth doing if this would allow us to - // accelerate the cyclic state (UE-2055). - if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("we wouldn't be able to accel this case\n"); - return false; - } - - vector<NFAVertex> straw; - NFAVertex pred = - walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); - if (pred != g.startDs) { - DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); - return false; - } - - if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("entry is narrow, could be accelerable\n"); - return false; - } - - assert(!straw.empty()); - - /* If there is overlap between the feeder and the first vertex in the straw - * fun things happen. TODO: handle fun things happening (requires more - * edges and more vertices). */ - if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) { - DEBUG_PRINTF("straw has `interesting' reach\n"); - return false; - } - - DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n"); - - const auto own_straw = makeOwnStraw(g, rd, straw); - insert(&created, own_straw); - - // Wire up feeder state to our new straw. - buildFeeder(g, rd, created, own_straw); - - // We may no longer need the original straw. - pruneUseless(g); - - return true; -} - -/** Returns true if doing the bounded repeat transformation on this case - * results in a smaller NFA model. */ -static -bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) { - static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow. - - // We use vertex count as an upper bound for the number of states. - u32 curr_states = num_vertices(g) - 2; // accepts don't have states - - if (curr_states <= MAX_FAST_STATES) { - return false; - } - if (curr_states > NFA_MAX_STATES) { - return true; - } - - u32 expected_states = curr_states; - for (const auto &rsi : rs) { - /* may be off as unpeeling not done yet */ - expected_states += 2; /* cyclic and pos */ - expected_states -= rsi.vertices.size(); - } - - return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128); -} - -/** True if this repeat terminates with a vertex that leads only to accept. */ -static -bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) { - NFAVertex last = rsi.vertices.back(); - return getSoleDestVertex(g, last) == g.accept; -} - -static -bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) { - NFAVertex last = rsi.vertices.back(); - return getSoleDestVertex(g, last) == g.acceptEod; -} - -namespace { -class pfti_visitor : public boost::default_dfs_visitor { -public: + const vector<BoundedRepeatData> &all_repeats) { + assert(g.kind == NFA_OUTFIX); + + // UE-1617: can rewire FIRST history cases that are preceded by + // startDs. + if (rd.type != REPEAT_FIRST) { + return false; + } + + const CharReach &cyc_cr = g[rd.cyclic].char_reach; + + // This transformation is only worth doing if this would allow us to + // accelerate the cyclic state (UE-2055). + if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("we wouldn't be able to accel this case\n"); + return false; + } + + vector<NFAVertex> straw; + NFAVertex pred = + walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); + if (pred != g.startDs) { + DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); + return false; + } + + if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("entry is narrow, could be accelerable\n"); + return false; + } + + assert(!straw.empty()); + + /* If there is overlap between the feeder and the first vertex in the straw + * fun things happen. TODO: handle fun things happening (requires more + * edges and more vertices). */ + if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) { + DEBUG_PRINTF("straw has `interesting' reach\n"); + return false; + } + + DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n"); + + const auto own_straw = makeOwnStraw(g, rd, straw); + insert(&created, own_straw); + + // Wire up feeder state to our new straw. + buildFeeder(g, rd, created, own_straw); + + // We may no longer need the original straw. + pruneUseless(g); + + return true; +} + +/** Returns true if doing the bounded repeat transformation on this case + * results in a smaller NFA model. */ +static +bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) { + static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow. + + // We use vertex count as an upper bound for the number of states. + u32 curr_states = num_vertices(g) - 2; // accepts don't have states + + if (curr_states <= MAX_FAST_STATES) { + return false; + } + if (curr_states > NFA_MAX_STATES) { + return true; + } + + u32 expected_states = curr_states; + for (const auto &rsi : rs) { + /* may be off as unpeeling not done yet */ + expected_states += 2; /* cyclic and pos */ + expected_states -= rsi.vertices.size(); + } + + return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128); +} + +/** True if this repeat terminates with a vertex that leads only to accept. */ +static +bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) { + NFAVertex last = rsi.vertices.back(); + return getSoleDestVertex(g, last) == g.accept; +} + +static +bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) { + NFAVertex last = rsi.vertices.back(); + return getSoleDestVertex(g, last) == g.acceptEod; +} + +namespace { +class pfti_visitor : public boost::default_dfs_visitor { +public: pfti_visitor(unordered_map<NFAVertex, depth> &top_depths_in, - const depth &our_depth_in) - : top_depths(top_depths_in), our_depth(our_depth_in) {} - + const depth &our_depth_in) + : top_depths(top_depths_in), our_depth(our_depth_in) {} + void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) { DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index, - our_depth.str().c_str()); - - auto it = top_depths.find(v); - if (it != top_depths.end() && it->second != our_depth) { - // already seen at a different depth, remove from consideration. - it->second = depth::infinity(); - } else { - top_depths[v] = our_depth; - } - } + our_depth.str().c_str()); + + auto it = top_depths.find(v); + if (it != top_depths.end() && it->second != our_depth) { + // already seen at a different depth, remove from consideration. + it->second = depth::infinity(); + } else { + top_depths[v] = our_depth; + } + } unordered_map<NFAVertex, depth> &top_depths; - const depth &our_depth; -}; -} // namespace - -static -void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops, - const NGHolder &g, + const depth &our_depth; +}; +} // namespace + +static +void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops, + const NGHolder &g, unordered_set<NFAVertex> *reached_by_fixed_tops) { - if (fixed_depth_tops.empty()) { - return; /* we will never find anything */ - } - - assert(!proper_out_degree(g.startDs, g)); + if (fixed_depth_tops.empty()) { + return; /* we will never find anything */ + } + + assert(!proper_out_degree(g.startDs, g)); unordered_map<NFAVertex, depth> top_depths; auto colours = make_small_color_map(g); - - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - - depth td = depth::infinity(); + + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + if (v == g.startDs) { + continue; + } + + depth td = depth::infinity(); for (u32 top : g[e].tops) { if (!contains(fixed_depth_tops, top)) { td = depth::infinity(); @@ -2131,417 +2131,417 @@ void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops, td = depth::infinity(); break; } - } - + } + DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index, td.str().c_str()); - /* for each vertex reachable from v update its map to reflect that it is - * reachable from a top of depth td. */ - + /* for each vertex reachable from v update its map to reflect that it is + * reachable from a top of depth td. */ + depth_first_visit(g, v, pfti_visitor(top_depths, td), colours); - } - - for (const auto &v_depth : top_depths) { - const NFAVertex v = v_depth.first; - const depth &d = v_depth.second; - if (d.is_finite()) { + } + + for (const auto &v_depth : top_depths) { + const NFAVertex v = v_depth.first; + const depth &d = v_depth.second; + if (d.is_finite()) { DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n", - g[v].index, d.str().c_str()); - reached_by_fixed_tops->insert(v); - } - } -} - -#ifndef NDEBUG -/** Assertion use only. Returns true if the given bounded repeats share any - * vertices, which we don't allow. */ -static -bool hasOverlappingRepeats(UNUSED const NGHolder &g, - const vector<BoundedRepeatData> &repeats) { + g[v].index, d.str().c_str()); + reached_by_fixed_tops->insert(v); + } + } +} + +#ifndef NDEBUG +/** Assertion use only. Returns true if the given bounded repeats share any + * vertices, which we don't allow. */ +static +bool hasOverlappingRepeats(UNUSED const NGHolder &g, + const vector<BoundedRepeatData> &repeats) { unordered_set<NFAVertex> involved; - - for (const auto &br : repeats) { - if (contains(involved, br.cyclic)) { + + for (const auto &br : repeats) { + if (contains(involved, br.cyclic)) { DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index); - return true; - } - if (contains(involved, br.pos_trigger)) { + return true; + } + if (contains(involved, br.pos_trigger)) { DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index); - return true; - } - for (auto v : br.tug_triggers) { - if (contains(involved, v)) { + return true; + } + for (auto v : br.tug_triggers) { + if (contains(involved, v)) { DEBUG_PRINTF("already seen tug %zu\n", g[v].index); - return true; - } - } - - involved.insert(br.cyclic); - involved.insert(br.pos_trigger); - involved.insert(br.tug_triggers.begin(), br.tug_triggers.end()); - } - - return false; -} - -#endif // NDEBUG - -/** - * Identifies so-called "nasty" repeats, in which the reachability of both the - * repeat itself and its tugs are wide, which means that executing the NFA will - * likely be bogged down in exception processing. - */ -static -bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi, + return true; + } + } + + involved.insert(br.cyclic); + involved.insert(br.pos_trigger); + involved.insert(br.tug_triggers.begin(), br.tug_triggers.end()); + } + + return false; +} + +#endif // NDEBUG + +/** + * Identifies so-called "nasty" repeats, in which the reachability of both the + * repeat itself and its tugs are wide, which means that executing the NFA will + * likely be bogged down in exception processing. + */ +static +bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi, const unordered_map<NFAVertex, NFAVertexDepth> &depths) { - if (num_vertices(g) > NFA_MAX_STATES) { - // We may have no choice but to implement this repeat to get the graph - // down to a tractable number of vertices. - return false; - } - - if (!generates_callbacks(g) && endsInAccept(g, rsi)) { - DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n"); - return false; - } - - const NFAVertex first = rsi.vertices.front(); - DEBUG_PRINTF("min depth from startds = %s\n", - depths.at(first).fromStartDotStar.min.str().c_str()); - if (depths.at(first).fromStartDotStar.min > depth(2)) { - return false; - } - - NFAVertex last = rsi.vertices.back(); - const CharReach &cyclicreach = g[last].char_reach; - CharReach tugreach; - for (auto v : adjacent_vertices_range(last, g)) { - if (v == last || is_special(v, g)) { - continue; - } - tugreach |= g[v].char_reach; - } - // Deal with unpeeled cases. - if (tugreach.none()) { - tugreach = cyclicreach; - } - DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n", - tugreach.count(), cyclicreach.count()); - return (tugreach.count() > 200) && (cyclicreach.count() > 200); -} - -void analyseRepeats(NGHolder &g, const ReportManager *rm, - const map<u32, u32> &fixed_depth_tops, - const map<u32, vector<vector<CharReach>>> &triggers, - vector<BoundedRepeatData> *repeats, bool streaming, - bool simple_model_selection, const Grey &grey, - bool *reformed_start_ds) { - if (!grey.allowExtendedNFA || !grey.allowLimExNFA) { - return; - } - - // Quick sanity test. - assert(allMatchStatesHaveReports(g)); - -#ifndef NDEBUG - // So we can assert that the number of tops hasn't changed at the end of - // this analysis. + if (num_vertices(g) > NFA_MAX_STATES) { + // We may have no choice but to implement this repeat to get the graph + // down to a tractable number of vertices. + return false; + } + + if (!generates_callbacks(g) && endsInAccept(g, rsi)) { + DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n"); + return false; + } + + const NFAVertex first = rsi.vertices.front(); + DEBUG_PRINTF("min depth from startds = %s\n", + depths.at(first).fromStartDotStar.min.str().c_str()); + if (depths.at(first).fromStartDotStar.min > depth(2)) { + return false; + } + + NFAVertex last = rsi.vertices.back(); + const CharReach &cyclicreach = g[last].char_reach; + CharReach tugreach; + for (auto v : adjacent_vertices_range(last, g)) { + if (v == last || is_special(v, g)) { + continue; + } + tugreach |= g[v].char_reach; + } + // Deal with unpeeled cases. + if (tugreach.none()) { + tugreach = cyclicreach; + } + DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n", + tugreach.count(), cyclicreach.count()); + return (tugreach.count() > 200) && (cyclicreach.count() > 200); +} + +void analyseRepeats(NGHolder &g, const ReportManager *rm, + const map<u32, u32> &fixed_depth_tops, + const map<u32, vector<vector<CharReach>>> &triggers, + vector<BoundedRepeatData> *repeats, bool streaming, + bool simple_model_selection, const Grey &grey, + bool *reformed_start_ds) { + if (!grey.allowExtendedNFA || !grey.allowLimExNFA) { + return; + } + + // Quick sanity test. + assert(allMatchStatesHaveReports(g)); + +#ifndef NDEBUG + // So we can assert that the number of tops hasn't changed at the end of + // this analysis. const flat_set<u32> allTops = getTops(g); -#endif - - // Later on, we're (a little bit) dependent on depth information for - // unpeeling and so forth. Note that these depths MUST be maintained when - // new vertices are added. +#endif + + // Later on, we're (a little bit) dependent on depth information for + // unpeeling and so forth. Note that these depths MUST be maintained when + // new vertices are added. unordered_map<NFAVertex, NFAVertexDepth> depths; - findInitDepths(g, depths); - - // Construct our list of subgraphs with the same reach using BGL magic. - vector<ReachSubgraph> rs; - buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize); - - // Validate and split subgraphs. - checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize); - - // Identify which subgraphs represent bounded repeats in forms ("cliches") - // that we accept, and mark the others as bad. - for (auto &rsi: rs) { - if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) { - rsi.bad = true; - continue; - } - - DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(), - rsi.repeatMax.str().c_str()); - - // Identify repeats with wide cyclic and tug reach which will produce - // low-performance implementations and avoid doing them. - if (repeatIsNasty(g, rsi, depths)) { - DEBUG_PRINTF("marking nasty repeat as bad\n"); - rsi.bad = true; - } - } - - // Remove bad cases, then sort remaining subgraphs in descending size - // order. - rs.erase(remove_if(rs.begin(), rs.end(), - [](const ReachSubgraph &r) { return r.bad; }), - rs.end()); - stable_sort(rs.begin(), rs.end(), - [](const ReachSubgraph &a, const ReachSubgraph &b) { - return a.vertices.size() > b.vertices.size(); - }); - - if (!streaming && !givesBetterModel(g, rs)) { - /* in block mode, there is no state space so we are only looking for - * performance wins */ - DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n"); - return; - } - - if (rs.empty()) { - /* no good repeats */ - return; - } - - // Store a copy of the original, unmodified graph in case we need to revert - // back: in particular, due to tug cloning it is possible to build a graph - // that was bigger than the original. See UE-2370. FIXME: smarter analysis - // could make this unnecessary? - const unique_ptr<const NGHolder> orig_g(cloneHolder(g)); - + findInitDepths(g, depths); + + // Construct our list of subgraphs with the same reach using BGL magic. + vector<ReachSubgraph> rs; + buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize); + + // Validate and split subgraphs. + checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize); + + // Identify which subgraphs represent bounded repeats in forms ("cliches") + // that we accept, and mark the others as bad. + for (auto &rsi: rs) { + if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) { + rsi.bad = true; + continue; + } + + DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(), + rsi.repeatMax.str().c_str()); + + // Identify repeats with wide cyclic and tug reach which will produce + // low-performance implementations and avoid doing them. + if (repeatIsNasty(g, rsi, depths)) { + DEBUG_PRINTF("marking nasty repeat as bad\n"); + rsi.bad = true; + } + } + + // Remove bad cases, then sort remaining subgraphs in descending size + // order. + rs.erase(remove_if(rs.begin(), rs.end(), + [](const ReachSubgraph &r) { return r.bad; }), + rs.end()); + stable_sort(rs.begin(), rs.end(), + [](const ReachSubgraph &a, const ReachSubgraph &b) { + return a.vertices.size() > b.vertices.size(); + }); + + if (!streaming && !givesBetterModel(g, rs)) { + /* in block mode, there is no state space so we are only looking for + * performance wins */ + DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n"); + return; + } + + if (rs.empty()) { + /* no good repeats */ + return; + } + + // Store a copy of the original, unmodified graph in case we need to revert + // back: in particular, due to tug cloning it is possible to build a graph + // that was bigger than the original. See UE-2370. FIXME: smarter analysis + // could make this unnecessary? + const unique_ptr<const NGHolder> orig_g(cloneHolder(g)); + unordered_set<NFAVertex> reached_by_fixed_tops; - if (is_triggered(g)) { - populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops); - } - - // Go to town on the remaining acceptable subgraphs. + if (is_triggered(g)) { + populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops); + } + + // Go to town on the remaining acceptable subgraphs. unordered_set<NFAVertex> created; - for (auto &rsi : rs) { + for (auto &rsi : rs) { DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", - g[rsi.vertices.front()].index, - rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); - - if (!peelSubgraph(g, grey, rsi, created)) { - DEBUG_PRINTF("peel failed, skipping\n"); - continue; - } - - // Attempt to peel a vertex if we're up against startDs, for - // performance reasons. - peelStartDotStar(g, depths, grey, rsi); - - // Our peeling passes may have killed off this repeat. - if (rsi.bad) { - continue; - } - - selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers, - *repeats, simple_model_selection); - - if (!generates_callbacks(g) && endsInAccept(g, rsi)) { - DEBUG_PRINTF("accepty-rosy graph\n"); - replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created); - } else if (endsInAcceptEod(g, rsi)) { - DEBUG_PRINTF("accepty-rosy graph\n"); - replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created); - } else { - replaceSubgraphWithSpecial(g, rsi, repeats, depths, created); - } - - // Some of our analyses require correctly numbered vertices, so we - // renumber after changes. + g[rsi.vertices.front()].index, + rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); + + if (!peelSubgraph(g, grey, rsi, created)) { + DEBUG_PRINTF("peel failed, skipping\n"); + continue; + } + + // Attempt to peel a vertex if we're up against startDs, for + // performance reasons. + peelStartDotStar(g, depths, grey, rsi); + + // Our peeling passes may have killed off this repeat. + if (rsi.bad) { + continue; + } + + selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers, + *repeats, simple_model_selection); + + if (!generates_callbacks(g) && endsInAccept(g, rsi)) { + DEBUG_PRINTF("accepty-rosy graph\n"); + replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created); + } else if (endsInAcceptEod(g, rsi)) { + DEBUG_PRINTF("accepty-rosy graph\n"); + replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created); + } else { + replaceSubgraphWithSpecial(g, rsi, repeats, depths, created); + } + + // Some of our analyses require correctly numbered vertices, so we + // renumber after changes. renumber_vertices(g); - } - - bool modified_start_ds = false; - - // We may be able to make improvements to the graph for performance - // reasons. Note that this may do 'orrible things like remove the startDs - // cycle, this should only happen quite late in the graph lifecycle. - if (repeats->size() == 1) { - if (g.kind == NFA_OUTFIX) { - improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats); - // (Does not modify startDs, so we don't need to set - // reformed_start_ds for this case.) - } else { - modified_start_ds = - improveLeadingRepeat(g, repeats->back(), created, *repeats); - } - } - - if (reformed_start_ds) { - *reformed_start_ds = modified_start_ds; - } - - if (!repeats->empty()) { - if (num_vertices(g) > NFA_MAX_STATES) { - // We've managed to build an unimplementable NFA. Swap back to the - // original. - DEBUG_PRINTF("NFA has %zu vertices; swapping back to the " - "original graph\n", num_vertices(g)); - clear_graph(g); - assert(orig_g); - cloneHolder(g, *orig_g); - repeats->clear(); - } - - // Sanity test: we don't want any repeats that share special vertices - // as our construction code later can't cope with it. - assert(!hasOverlappingRepeats(g, *repeats)); - - // We have modified the graph, so we need to ensure that our edges - // and vertices are correctly numbered. + } + + bool modified_start_ds = false; + + // We may be able to make improvements to the graph for performance + // reasons. Note that this may do 'orrible things like remove the startDs + // cycle, this should only happen quite late in the graph lifecycle. + if (repeats->size() == 1) { + if (g.kind == NFA_OUTFIX) { + improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats); + // (Does not modify startDs, so we don't need to set + // reformed_start_ds for this case.) + } else { + modified_start_ds = + improveLeadingRepeat(g, repeats->back(), created, *repeats); + } + } + + if (reformed_start_ds) { + *reformed_start_ds = modified_start_ds; + } + + if (!repeats->empty()) { + if (num_vertices(g) > NFA_MAX_STATES) { + // We've managed to build an unimplementable NFA. Swap back to the + // original. + DEBUG_PRINTF("NFA has %zu vertices; swapping back to the " + "original graph\n", num_vertices(g)); + clear_graph(g); + assert(orig_g); + cloneHolder(g, *orig_g); + repeats->clear(); + } + + // Sanity test: we don't want any repeats that share special vertices + // as our construction code later can't cope with it. + assert(!hasOverlappingRepeats(g, *repeats)); + + // We have modified the graph, so we need to ensure that our edges + // and vertices are correctly numbered. renumber_vertices(g); renumber_edges(g); - // Remove stray report IDs. - clearReports(g); - } - - // Quick sanity tests. - assert(allMatchStatesHaveReports(g)); - assert(!is_triggered(g) || getTops(g) == allTops); -} - -/** - * \brief True if the non-special vertices in the given graph all have the same - * character reachability. - */ -static -bool allOneReach(const NGHolder &g) { - const CharReach *cr = nullptr; - for (const auto &v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - if (!cr) { - cr = &g[v].char_reach; - } else { - if (*cr != g[v].char_reach) { - return false; - } - } - } - return true; -} - -bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { - assert(allMatchStatesHaveReports(g)); - - DEBUG_PRINTF("entry\n"); - - // Must be start anchored. - assert(edge(g.startDs, g.startDs, g).second); + // Remove stray report IDs. + clearReports(g); + } + + // Quick sanity tests. + assert(allMatchStatesHaveReports(g)); + assert(!is_triggered(g) || getTops(g) == allTops); +} + +/** + * \brief True if the non-special vertices in the given graph all have the same + * character reachability. + */ +static +bool allOneReach(const NGHolder &g) { + const CharReach *cr = nullptr; + for (const auto &v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + if (!cr) { + cr = &g[v].char_reach; + } else { + if (*cr != g[v].char_reach) { + return false; + } + } + } + return true; +} + +bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { + assert(allMatchStatesHaveReports(g)); + + DEBUG_PRINTF("entry\n"); + + // Must be start anchored. + assert(edge(g.startDs, g.startDs, g).second); if (out_degree(g.startDs, g) > 1) { - DEBUG_PRINTF("Unanchored\n"); - return false; - } - - // Must not be EOD-anchored. - assert(edge(g.accept, g.acceptEod, g).second); + DEBUG_PRINTF("Unanchored\n"); + return false; + } + + // Must not be EOD-anchored. + assert(edge(g.accept, g.acceptEod, g).second); if (in_degree(g.acceptEod, g) > 1) { - DEBUG_PRINTF("EOD anchored\n"); - return false; - } - - // Must have precisely one top. + DEBUG_PRINTF("EOD anchored\n"); + return false; + } + + // Must have precisely one top. if (is_triggered(g) && !onlyOneTop(g)) { - DEBUG_PRINTF("Too many tops\n"); - return false; - } - - if (!allOneReach(g)) { - DEBUG_PRINTF("vertices with different reach\n"); - return false; - } - - // We allow this code to report true for any repeat, even for '.*' or '.+' - // cases. - const u32 minNumVertices = 1; - - vector<ReachSubgraph> rs; - buildReachSubgraphs(g, rs, minNumVertices); - checkReachSubgraphs(g, rs, minNumVertices); - if (rs.size() != 1) { - DEBUG_PRINTF("too many subgraphs\n"); - return false; - } - - ReachSubgraph &rsi = *rs.begin(); - if (!processSubgraph(g, rsi, minNumVertices)) { - DEBUG_PRINTF("not a supported repeat\n"); - return false; - } - - if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) { - DEBUG_PRINTF("repeat doesn't span graph\n"); - return false; - } - - assert(!rsi.bad); - assert(rsi.vertices.size() >= minNumVertices); - - const NFAVertex v = rsi.vertices.back(); - - repeat.reach = g[v].char_reach; - repeat.bounds.min = rsi.repeatMin; - repeat.bounds.max = rsi.repeatMax; - insert(&repeat.reports, g[v].reports); - - if (isVacuous(g)) { - // This graph might be a {0,N} or {0,} repeat. For this to be true, we - // must have found a {1,N} or {1,} repeat and the start vertex must - // have the same report set as the vertices in the repeat. - if (repeat.bounds.min == depth(1) && - g[g.start].reports == g[v].reports) { + DEBUG_PRINTF("Too many tops\n"); + return false; + } + + if (!allOneReach(g)) { + DEBUG_PRINTF("vertices with different reach\n"); + return false; + } + + // We allow this code to report true for any repeat, even for '.*' or '.+' + // cases. + const u32 minNumVertices = 1; + + vector<ReachSubgraph> rs; + buildReachSubgraphs(g, rs, minNumVertices); + checkReachSubgraphs(g, rs, minNumVertices); + if (rs.size() != 1) { + DEBUG_PRINTF("too many subgraphs\n"); + return false; + } + + ReachSubgraph &rsi = *rs.begin(); + if (!processSubgraph(g, rsi, minNumVertices)) { + DEBUG_PRINTF("not a supported repeat\n"); + return false; + } + + if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) { + DEBUG_PRINTF("repeat doesn't span graph\n"); + return false; + } + + assert(!rsi.bad); + assert(rsi.vertices.size() >= minNumVertices); + + const NFAVertex v = rsi.vertices.back(); + + repeat.reach = g[v].char_reach; + repeat.bounds.min = rsi.repeatMin; + repeat.bounds.max = rsi.repeatMax; + insert(&repeat.reports, g[v].reports); + + if (isVacuous(g)) { + // This graph might be a {0,N} or {0,} repeat. For this to be true, we + // must have found a {1,N} or {1,} repeat and the start vertex must + // have the same report set as the vertices in the repeat. + if (repeat.bounds.min == depth(1) && + g[g.start].reports == g[v].reports) { repeat.bounds.min = depth(0); - DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str()); - } else { - DEBUG_PRINTF("not a supported repeat\n"); - return false; - } - } - - assert(all_reports(g) == set<ReportID>(begin(g[v].reports), - end(g[v].reports))); - return true; -} - -void findRepeats(const NGHolder &h, u32 minRepeatVertices, - vector<GraphRepeatInfo> *repeats_out) { - // Construct our list of subgraphs with the same reach using BGL magic. - vector<ReachSubgraph> rs; - buildReachSubgraphs(h, rs, minRepeatVertices); - checkReachSubgraphs(h, rs, minRepeatVertices); - - for (auto &rsi : rs) { - if (!processSubgraph(h, rsi, minRepeatVertices)) { - continue; - } - - DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(), - rsi.repeatMax.str().c_str()); - - depth repeatMax = rsi.repeatMax; - - vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in - * this path */ - if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) { - DEBUG_PRINTF("selected FIRST history due to cyclic pred with " - "superset of reach\n"); - repeatMax = depth::infinity(); /* will continue to pump out matches */ - } - if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) { - DEBUG_PRINTF("selected FIRST history due to cyclic succ with " - "superset of reach\n"); - repeatMax = depth::infinity(); /* will continue to pump out matches */ - } - - repeats_out->push_back(GraphRepeatInfo()); - GraphRepeatInfo &ri = repeats_out->back(); - ri.vertices.swap(rsi.vertices); - ri.repeatMin = rsi.repeatMin; - ri.repeatMax = repeatMax; - } -} - -} // namespace ue2 + DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str()); + } else { + DEBUG_PRINTF("not a supported repeat\n"); + return false; + } + } + + assert(all_reports(g) == set<ReportID>(begin(g[v].reports), + end(g[v].reports))); + return true; +} + +void findRepeats(const NGHolder &h, u32 minRepeatVertices, + vector<GraphRepeatInfo> *repeats_out) { + // Construct our list of subgraphs with the same reach using BGL magic. + vector<ReachSubgraph> rs; + buildReachSubgraphs(h, rs, minRepeatVertices); + checkReachSubgraphs(h, rs, minRepeatVertices); + + for (auto &rsi : rs) { + if (!processSubgraph(h, rsi, minRepeatVertices)) { + continue; + } + + DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(), + rsi.repeatMax.str().c_str()); + + depth repeatMax = rsi.repeatMax; + + vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in + * this path */ + if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) { + DEBUG_PRINTF("selected FIRST history due to cyclic pred with " + "superset of reach\n"); + repeatMax = depth::infinity(); /* will continue to pump out matches */ + } + if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) { + DEBUG_PRINTF("selected FIRST history due to cyclic succ with " + "superset of reach\n"); + repeatMax = depth::infinity(); /* will continue to pump out matches */ + } + + repeats_out->push_back(GraphRepeatInfo()); + GraphRepeatInfo &ri = repeats_out->back(); + ri.vertices.swap(rsi.vertices); + ri.repeatMin = rsi.repeatMin; + ri.repeatMax = repeatMax; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h index cfd804b7ef..330e33c340 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h @@ -1,160 +1,160 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded repeat analysis. - */ - -#ifndef NG_REPEAT_H -#define NG_REPEAT_H - -#include "ng_holder.h" -#include "ue2common.h" -#include "nfa/repeat_internal.h" -#include "util/depth.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded repeat analysis. + */ + +#ifndef NG_REPEAT_H +#define NG_REPEAT_H + +#include "ng_holder.h" +#include "ue2common.h" +#include "nfa/repeat_internal.h" +#include "util/depth.h" #include "util/flat_containers.h" - -#include <map> -#include <vector> - -namespace ue2 { - -class NGHolder; -class ReportManager; -struct Grey; - -/** - * \brief Everything you need to know about a bounded repeat that we have - * transformed. - */ -struct BoundedRepeatData { - BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z, - u32 minPeriod_in, NFAVertex cyc, NFAVertex pos, - const std::vector<NFAVertex> &tug_in) - : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in), - cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {} - - BoundedRepeatData() = delete; // no default construction allowed. - - enum RepeatType type; //!< selected type based on bounds and structure - depth repeatMin; //!< minimum repeat bound - depth repeatMax; //!< maximum repeat bound - u32 minPeriod; //!< min trigger period - NFAVertex cyclic; //!< cyclic vertex representing repeat in graph - NFAVertex pos_trigger; //!< positive trigger vertex - std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices -}; - -/** - * \brief Run the bounded repeat analysis and transform the graph where - * bounded repeats are found. - * - * \param h - * Graph to operate on. - * \param rm - * ReportManager, or nullptr if the graph's reports are internal (e.g. for - * Rose use). - * \param fixed_depth_tops - * Map of top to possible trigger depth. - * \param triggers - * Map of top to the vector of triggers (i.e. preceding literals/masks) - * \param repeats - * Repeat info is filled in for caller here. - * \param streaming - * True if we're in streaming mode. - * \param simple_model_selection - * Don't perform complex (and slow) model selection analysis, e.g. - * determining whether the repeat is sole entry. - * \param grey - * Grey box object. - * \param reformed_start_ds - * If supplied, this will be set to true if the graph was optimised for a - * leading first repeat, resulting in the output graph having no self-loop - * on startDs. - */ -void analyseRepeats(NGHolder &h, const ReportManager *rm, - const std::map<u32, u32> &fixed_depth_tops, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, - std::vector<BoundedRepeatData> *repeats, bool streaming, - bool simple_model_selection, const Grey &grey, - bool *reformed_start_ds = nullptr); - -/** - * \brief Information on repeats in a holder, returned from \ref findRepeats. - */ -struct GraphRepeatInfo { - depth repeatMin; /**< minimum bound */ - depth repeatMax; /**< effective max bound */ - std::vector<NFAVertex> vertices; /**< vertices involved in repeat */ -}; - -/** - * \brief Provides information on repeats in the graph. - */ -void findRepeats(const NGHolder &h, u32 minRepeatVertices, - std::vector<GraphRepeatInfo> *repeats_out); - -struct PureRepeat { - CharReach reach; - DepthMinMax bounds; + +#include <map> +#include <vector> + +namespace ue2 { + +class NGHolder; +class ReportManager; +struct Grey; + +/** + * \brief Everything you need to know about a bounded repeat that we have + * transformed. + */ +struct BoundedRepeatData { + BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z, + u32 minPeriod_in, NFAVertex cyc, NFAVertex pos, + const std::vector<NFAVertex> &tug_in) + : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in), + cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {} + + BoundedRepeatData() = delete; // no default construction allowed. + + enum RepeatType type; //!< selected type based on bounds and structure + depth repeatMin; //!< minimum repeat bound + depth repeatMax; //!< maximum repeat bound + u32 minPeriod; //!< min trigger period + NFAVertex cyclic; //!< cyclic vertex representing repeat in graph + NFAVertex pos_trigger; //!< positive trigger vertex + std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices +}; + +/** + * \brief Run the bounded repeat analysis and transform the graph where + * bounded repeats are found. + * + * \param h + * Graph to operate on. + * \param rm + * ReportManager, or nullptr if the graph's reports are internal (e.g. for + * Rose use). + * \param fixed_depth_tops + * Map of top to possible trigger depth. + * \param triggers + * Map of top to the vector of triggers (i.e. preceding literals/masks) + * \param repeats + * Repeat info is filled in for caller here. + * \param streaming + * True if we're in streaming mode. + * \param simple_model_selection + * Don't perform complex (and slow) model selection analysis, e.g. + * determining whether the repeat is sole entry. + * \param grey + * Grey box object. + * \param reformed_start_ds + * If supplied, this will be set to true if the graph was optimised for a + * leading first repeat, resulting in the output graph having no self-loop + * on startDs. + */ +void analyseRepeats(NGHolder &h, const ReportManager *rm, + const std::map<u32, u32> &fixed_depth_tops, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, + std::vector<BoundedRepeatData> *repeats, bool streaming, + bool simple_model_selection, const Grey &grey, + bool *reformed_start_ds = nullptr); + +/** + * \brief Information on repeats in a holder, returned from \ref findRepeats. + */ +struct GraphRepeatInfo { + depth repeatMin; /**< minimum bound */ + depth repeatMax; /**< effective max bound */ + std::vector<NFAVertex> vertices; /**< vertices involved in repeat */ +}; + +/** + * \brief Provides information on repeats in the graph. + */ +void findRepeats(const NGHolder &h, u32 minRepeatVertices, + std::vector<GraphRepeatInfo> *repeats_out); + +struct PureRepeat { + CharReach reach; + DepthMinMax bounds; flat_set<ReportID> reports; - - bool operator==(const PureRepeat &a) const { - return reach == a.reach && bounds == a.bounds && reports == a.reports; - } - - bool operator!=(const PureRepeat &a) const { return !(*this == a); } - - bool operator<(const PureRepeat &a) const { - if (reach != a.reach) { - return reach < a.reach; - } - if (bounds != a.bounds) { - return bounds < a.bounds; - } - return reports < a.reports; - } -}; - -/** - * \brief Returns true and fills the given PureRepeat structure if the graph is - * wholly a repeat over a single character class. - * - * For example, something like: - * - * /^[a-z]{10,20}/ - * - * - Note: graph must not use SDS or EOD. - * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper - * bound on the repeat. - */ -bool isPureRepeat(const NGHolder &h, PureRepeat &r); - -} // namespace ue2 - -#endif // NG_REPEAT_H + + bool operator==(const PureRepeat &a) const { + return reach == a.reach && bounds == a.bounds && reports == a.reports; + } + + bool operator!=(const PureRepeat &a) const { return !(*this == a); } + + bool operator<(const PureRepeat &a) const { + if (reach != a.reach) { + return reach < a.reach; + } + if (bounds != a.bounds) { + return bounds < a.bounds; + } + return reports < a.reports; + } +}; + +/** + * \brief Returns true and fills the given PureRepeat structure if the graph is + * wholly a repeat over a single character class. + * + * For example, something like: + * + * /^[a-z]{10,20}/ + * + * - Note: graph must not use SDS or EOD. + * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper + * bound on the repeat. + */ +bool isPureRepeat(const NGHolder &h, PureRepeat &r); + +} // namespace ue2 + +#endif // NG_REPEAT_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp index 4e9b498df0..ed85863b08 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp @@ -1,70 +1,70 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Utility functions for working with Report ID sets. - */ -#include "ng_reports.h" - -#include "ng_holder.h" -#include "util/container.h" -#include "util/compile_context.h" -#include "util/graph_range.h" -#include "util/report_manager.h" - -using namespace std; - -namespace ue2 { - -/** Returns the set of all reports in the graph. */ -set<ReportID> all_reports(const NGHolder &g) { - set<ReportID> rv; - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - insert(&rv, g[v].reports); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - insert(&rv, g[v].reports); - } - - return rv; -} - -/** True if *all* reports in the graph are exhaustible. */ -bool can_exhaust(const NGHolder &g, const ReportManager &rm) { - for (ReportID report_id : all_reports(g)) { - if (rm.getReport(report_id).ekey == INVALID_EKEY) { - return false; - } - } - - return true; -} - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Utility functions for working with Report ID sets. + */ +#include "ng_reports.h" + +#include "ng_holder.h" +#include "util/container.h" +#include "util/compile_context.h" +#include "util/graph_range.h" +#include "util/report_manager.h" + +using namespace std; + +namespace ue2 { + +/** Returns the set of all reports in the graph. */ +set<ReportID> all_reports(const NGHolder &g) { + set<ReportID> rv; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + insert(&rv, g[v].reports); + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + insert(&rv, g[v].reports); + } + + return rv; +} + +/** True if *all* reports in the graph are exhaustible. */ +bool can_exhaust(const NGHolder &g, const ReportManager &rm) { + for (ReportID report_id : all_reports(g)) { + if (rm.getReport(report_id).ekey == INVALID_EKEY) { + return false; + } + } + + return true; +} + void set_report(NGHolder &g, ReportID internal_report) { // First, wipe the report IDs on all vertices. for (auto v : vertices_range(g)) { @@ -85,22 +85,22 @@ void set_report(NGHolder &g, ReportID internal_report) { } } -/** Derive a maximum offset for the graph from the max_offset values of its - * reports. Returns MAX_OFFSET for inf. */ -u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) { - u64a maxOffset = 0; - set<ReportID> reports = all_reports(g); - assert(!reports.empty()); - - for (ReportID report_id : all_reports(g)) { - const Report &ir = rm.getReport(report_id); - if (ir.hasBounds()) { - maxOffset = max(maxOffset, ir.maxOffset); - } else { - return MAX_OFFSET; - } - } - return maxOffset; -} - -} // namespace ue2 +/** Derive a maximum offset for the graph from the max_offset values of its + * reports. Returns MAX_OFFSET for inf. */ +u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) { + u64a maxOffset = 0; + set<ReportID> reports = all_reports(g); + assert(!reports.empty()); + + for (ReportID report_id : all_reports(g)) { + const Report &ir = rm.getReport(report_id); + if (ir.hasBounds()) { + maxOffset = max(maxOffset, ir.maxOffset); + } else { + return MAX_OFFSET; + } + } + return maxOffset; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h index 31c9530880..0f1b43c482 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h @@ -1,61 +1,61 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Utility functions for working with Report ID sets. - */ - -#ifndef NG_REPORTS_H -#define NG_REPORTS_H - -#include "ue2common.h" - -#include <set> - -namespace ue2 { - -class NGHolder; -class ReportManager; - -/** Returns the set of all reports in the graph. */ -std::set<ReportID> all_reports(const NGHolder &g); - -/** True if *all* reports in the graph are exhaustible. */ -bool can_exhaust(const NGHolder &g, const ReportManager &rm); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Utility functions for working with Report ID sets. + */ + +#ifndef NG_REPORTS_H +#define NG_REPORTS_H + +#include "ue2common.h" + +#include <set> + +namespace ue2 { + +class NGHolder; +class ReportManager; + +/** Returns the set of all reports in the graph. */ +std::set<ReportID> all_reports(const NGHolder &g); + +/** True if *all* reports in the graph are exhaustible. */ +bool can_exhaust(const NGHolder &g, const ReportManager &rm); + /** Replaces all existing reports on the holder with the provided internal * report id. */ void set_report(NGHolder &g, ReportID internal_report); -/** Derive a maximum offset for the graph from the max_offset values of its - * reports. Returns MAX_OFFSET for inf. */ -u64a findMaxOffset(const NGHolder &g, const ReportManager &rm); - -} // namespace ue2 - -#endif // NG_REPORTS_H +/** Derive a maximum offset for the graph from the max_offset values of its + * reports. Returns MAX_OFFSET for inf. */ +u64a findMaxOffset(const NGHolder &g, const ReportManager &rm); + +} // namespace ue2 + +#endif // NG_REPORTS_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp index 704697e57f..c746877678 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp @@ -1,70 +1,70 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief State numbering and late graph restructuring code. - */ -#include "ng_restructuring.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/graph_range.h" - -#include <algorithm> -#include <cassert> - -#include <boost/graph/transpose_graph.hpp> - -using namespace std; - -namespace ue2 { - -/** Connect the start vertex to each of the vertices in \p tops. This is useful - * temporarily for when we need to run a graph algorithm that expects a single - * source vertex. */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief State numbering and late graph restructuring code. + */ +#include "ng_restructuring.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/graph_range.h" + +#include <algorithm> +#include <cassert> + +#include <boost/graph/transpose_graph.hpp> + +using namespace std; + +namespace ue2 { + +/** Connect the start vertex to each of the vertices in \p tops. This is useful + * temporarily for when we need to run a graph algorithm that expects a single + * source vertex. */ static void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops, vector<NFAEdge> &tempEdges) { for (NFAVertex v : tops) { - assert(!isLeafNode(v, g)); - + assert(!isLeafNode(v, g)); + const NFAEdge &e = add_edge(g.start, v, g); tempEdges.push_back(e); - } -} - + } +} + /** * Returns true if start's successors (aside from startDs) are subset of * startDs's proper successors or if start has no successors other than startDs. */ -static +static bool startIsRedundant(const NGHolder &g) { /* We ignore startDs as the self-loop may have been stripped as an * optimisation for repeats (improveLeadingRepeats()). */ @@ -92,130 +92,130 @@ bool startIsRedundant(const NGHolder &g) { static void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops, - vector<NFAVertex> &ordering) { - // First, wire up our "tops" to start so that we have a single source, - // which will give a nicer topo order. + vector<NFAVertex> &ordering) { + // First, wire up our "tops" to start so that we have a single source, + // which will give a nicer topo order. vector<NFAEdge> tempEdges; wireStartToTops(g, tops, tempEdges); - + renumber_vertices(g); - - vector<NFAVertex> temp = getTopoOrdering(g); - + + vector<NFAVertex> temp = getTopoOrdering(g); + remove_edges(tempEdges, g); - - // Move {start, startDs} to the end, so they'll be first when we reverse + + // Move {start, startDs} to the end, so they'll be first when we reverse // the ordering (if they are required). - temp.erase(remove(temp.begin(), temp.end(), g.startDs)); - temp.erase(remove(temp.begin(), temp.end(), g.start)); + temp.erase(remove(temp.begin(), temp.end(), g.startDs)); + temp.erase(remove(temp.begin(), temp.end(), g.start)); if (proper_out_degree(g.startDs, g)) { temp.push_back(g.startDs); } if (!startIsRedundant(g)) { temp.push_back(g.start); } - - // Walk ordering, remove vertices that shouldn't be participating in state - // numbering, such as accepts. - for (auto v : temp) { - if (is_any_accept(v, g)) { - continue; // accepts don't need states - } - - ordering.push_back(v); - } - - // Output of topo order was in reverse. - reverse(ordering.begin(), ordering.end()); -} - -// Returns the number of states. -static + + // Walk ordering, remove vertices that shouldn't be participating in state + // numbering, such as accepts. + for (auto v : temp) { + if (is_any_accept(v, g)) { + continue; // accepts don't need states + } + + ordering.push_back(v); + } + + // Output of topo order was in reverse. + reverse(ordering.begin(), ordering.end()); +} + +// Returns the number of states. +static unordered_map<NFAVertex, u32> -getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) { +getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) { unordered_map<NFAVertex, u32> states; - for (const auto &v : vertices_range(h)) { - states[v] = NO_STATE; - } - - u32 stateNum = 0; - for (auto v : ordering) { + for (const auto &v : vertices_range(h)) { + states[v] = NO_STATE; + } + + u32 stateNum = 0; + for (auto v : ordering) { DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum, - h[v].index); - states[v] = stateNum++; - } - return states; -} - -/** UE-1648: A state with a single successor that happens to be a predecessor - * can be given any ol' state ID by the topological ordering, so we sink it - * next to its pred. This enables better merging. */ -static -void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) { - deque<pair<NFAVertex, NFAVertex>> candidates; - - auto start = ordering.begin(); - for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) { - NFAVertex v = *it; - if (is_special(v, g)) { - continue; - } - - if (out_degree(v, g) == 1) { - NFAVertex t = *(adjacent_vertices(v, g).first); - if (v == t) { - continue; - } - if (edge(t, v, g).second && find(start, it, t) != ite) { - candidates.push_back(make_pair(v, t)); - } - } - } - - for (const auto &cand : candidates) { - NFAVertex v = cand.first, u = cand.second; - auto u_it = find(ordering.begin(), ordering.end(), u); - auto v_it = find(ordering.begin(), ordering.end(), v); - - // Only move candidates backwards in the ordering, and only move them - // when necessary. - if (u_it >= v_it || distance(u_it, v_it) == 1) { - continue; - } - + h[v].index); + states[v] = stateNum++; + } + return states; +} + +/** UE-1648: A state with a single successor that happens to be a predecessor + * can be given any ol' state ID by the topological ordering, so we sink it + * next to its pred. This enables better merging. */ +static +void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) { + deque<pair<NFAVertex, NFAVertex>> candidates; + + auto start = ordering.begin(); + for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) { + NFAVertex v = *it; + if (is_special(v, g)) { + continue; + } + + if (out_degree(v, g) == 1) { + NFAVertex t = *(adjacent_vertices(v, g).first); + if (v == t) { + continue; + } + if (edge(t, v, g).second && find(start, it, t) != ite) { + candidates.push_back(make_pair(v, t)); + } + } + } + + for (const auto &cand : candidates) { + NFAVertex v = cand.first, u = cand.second; + auto u_it = find(ordering.begin(), ordering.end(), u); + auto v_it = find(ordering.begin(), ordering.end(), v); + + // Only move candidates backwards in the ordering, and only move them + // when necessary. + if (u_it >= v_it || distance(u_it, v_it) == 1) { + continue; + } + DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index); - - ordering.erase(v_it); - ordering.insert(++u_it, v); - } -} - + + ordering.erase(v_it); + ordering.insert(++u_it, v); + } +} + unordered_map<NFAVertex, u32> numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) { - DEBUG_PRINTF("numbering states for holder %p\n", &h); - - vector<NFAVertex> ordering; - getStateOrdering(h, tops, ordering); - - optimiseTightLoops(h, ordering); - + DEBUG_PRINTF("numbering states for holder %p\n", &h); + + vector<NFAVertex> ordering; + getStateOrdering(h, tops, ordering); + + optimiseTightLoops(h, ordering); + return getStateIndices(h, ordering); -} - +} + u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) { - if (state_ids.empty()) { - return 0; - } - - u32 max_state = 0; - for (const auto &m : state_ids) { - if (m.second != NO_STATE) { - max_state = max(m.second, max_state); - } - } - u32 num_states = max_state + 1; - - return num_states; -} - -} // namespace ue2 + if (state_ids.empty()) { + return 0; + } + + u32 max_state = 0; + for (const auto &m : state_ids) { + if (m.second != NO_STATE) { + max_state = max(m.second, max_state); + } + } + u32 num_states = max_state + 1; + + return num_states; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h index 75d19c6294..7c381748fc 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h @@ -1,64 +1,64 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief State numbering and late graph restructuring code. */ - -/** \file - * \brief State numbering and late graph restructuring code. - */ - -#ifndef NG_RESTRUCTURING_H -#define NG_RESTRUCTURING_H - -#include "ng_holder.h" -#include "ue2common.h" + +#ifndef NG_RESTRUCTURING_H +#define NG_RESTRUCTURING_H + +#include "ng_holder.h" +#include "ue2common.h" #include "util/flat_containers.h" - + #include <unordered_map> - -namespace ue2 { - -/** - * \brief Special state index value meaning that the vertex will not - * participate in an (NFA/DFA/etc) implementation. - */ -static constexpr u32 NO_STATE = ~0; - -/** - * \brief Gives each participating vertex in the graph a unique state index. - */ + +namespace ue2 { + +/** + * \brief Special state index value meaning that the vertex will not + * participate in an (NFA/DFA/etc) implementation. + */ +static constexpr u32 NO_STATE = ~0; + +/** + * \brief Gives each participating vertex in the graph a unique state index. + */ std::unordered_map<NFAVertex, u32> numberStates(NGHolder &h, const flat_set<NFAVertex> &tops); - -/** - * \brief Counts the number of states (vertices with state indices) in the - * graph. - */ + +/** + * \brief Counts the number of states (vertices with state indices) in the + * graph. + */ u32 countStates(const std::unordered_map<NFAVertex, u32> &state_ids); - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp index 0f932668c9..bc21d3a13b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp @@ -1,299 +1,299 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Reverse acceleration analysis. - */ -#include "ng_revacc.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ue2common.h" -#include "nfa/accel.h" -#include "nfa/nfa_internal.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/graph_range.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Reverse acceleration analysis. + */ +#include "ng_revacc.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ue2common.h" +#include "nfa/accel.h" +#include "nfa/nfa_internal.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/graph_range.h" + #include <set> -using namespace std; - -namespace ue2 { - -static -bool isPseudoNoCaseChar(const CharReach &cr) { - return cr.count() == 2 && !(cr.find_first() & 32) - && cr.test(cr.find_first() | 32); -} - -static -bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth, - NFA *nfa) { - DEBUG_PRINTF("pure eod triggered pattern\n"); - - /* 2 char */ - for (u8 nocase = 0; nocase < 2; nocase++) { - for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { - const CharReach &cr = rev_info.acceptEodReach[i]; - const CharReach &cr2 = rev_info.acceptEodReach[i - 1]; - - if (!nocase && cr.count() == 1 && cr2.count() == 1) { - assert(i < minWidth); - if (i >= minWidth) { - goto single; - } - nfa->rAccelType = ACCEL_RDEOD; - nfa->rAccelData.array[0] = (u8)cr.find_first(); - nfa->rAccelData.array[1] = (u8)cr2.find_first(); - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel eod x2 %u %04hx\n", - nfa->rAccelOffset, nfa->rAccelData.dc); - return true; - } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) - && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { - assert(i < minWidth); - if (i >= minWidth) { - goto single; - } - nfa->rAccelType = ACCEL_RDEOD_NOCASE; - nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */ - nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n", - nfa->rAccelOffset, nfa->rAccelData.dc); - return true; - } - } - } - - single: - /* 1 char */ - for (u8 nocase = 0; nocase < 2; nocase++) { - for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { - const CharReach &cr = rev_info.acceptEodReach[i]; - if (!nocase && cr.count() == 1) { - assert(i < minWidth); - if (i >= minWidth) { - return false; - } - nfa->rAccelType = ACCEL_REOD; - nfa->rAccelData.c = (u8) cr.find_first(); - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel eod %u %02hhx\n", - nfa->rAccelOffset, nfa->rAccelData.c); - return true; - } else if (nocase && isPseudoNoCaseChar(cr)) { - assert(i < minWidth); - if (i >= minWidth) { - return false; - } - nfa->rAccelType = ACCEL_REOD_NOCASE; - nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */ - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel nc eod %u %02hhx\n", - nfa->rAccelOffset, nfa->rAccelData.c); - return true; - } - } - } - - return false; -} - -static -bool lookForFloatingSchemes(const RevAccInfo &rev_info, - const u32 minWidth, NFA *nfa) { - /* 2 char */ - for (u8 nocase = 0; nocase < 2; nocase++) { - for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { - CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; - CharReach cr2 = rev_info.acceptEodReach[i - 1] - | rev_info.acceptReach[i - 1]; - if (!nocase && cr.count() == 1 && cr2.count() == 1) { - assert((u8)(i - 1) < minWidth); - if (i > minWidth) { - goto single; - } - nfa->rAccelType = ACCEL_RDVERM; - nfa->rAccelData.array[0] = (u8)cr.find_first(); - nfa->rAccelData.array[1] = (u8)cr2.find_first(); - nfa->rAccelOffset = i; - DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n", - nfa->rAccelOffset, nfa->rAccelData.array[0], - nfa->rAccelData.array[1]); - return true; - } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) - && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { - assert((u8)(i - 1) < minWidth); - if (i > minWidth) { - goto single; - } - nfa->rAccelType = ACCEL_RDVERM_NOCASE; - nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; - nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; - nfa->rAccelOffset = i; - DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n", - nfa->rAccelOffset, nfa->rAccelData.array[0], - nfa->rAccelData.array[1]); - return true; - } - } - } - - single: - /* 1 char */ - for (u8 nocase = 0; nocase < 2; nocase++) { - for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { - CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; - if (!nocase && cr.count() == 1) { - assert(i < minWidth); - if (i >= minWidth) { - return false; - } - nfa->rAccelType = ACCEL_RVERM; - nfa->rAccelData.c = (u8)cr.find_first(); - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset, - nfa->rAccelData.c); - return true; - } else if (nocase && isPseudoNoCaseChar(cr)) { - assert(i < minWidth); - if (i >= minWidth) { - return false; - } - nfa->rAccelType = ACCEL_RVERM_NOCASE; - nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */ - nfa->rAccelOffset = i + 1; - DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset, - nfa->rAccelData.c); - return true; - } - } - } - - return false; -} - -void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info, - u32 min_width, bool eod_only) { - assert(nfa); - - if (!rev_info.valid) { - return; - } - - nfa->rAccelOffset = 1; - - assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any()); - if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) { - DEBUG_PRINTF("expected path to accept\n"); - return; - } - - if (rev_info.acceptReach[0].none()) { - /* eod only */ - - if (lookForEodSchemes(rev_info, min_width, nfa)) { - assert(nfa->rAccelOffset <= min_width); - return; - } - } - - if (eod_only) { - return; - } - - if (!lookForFloatingSchemes(rev_info, min_width, nfa)) { - DEBUG_PRINTF("failed to accelerate\n"); - } -} - -static -void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal, - vector<CharReach> *reach) { - set<NFAVertex> vset; - - for (auto v : inv_adjacent_vertices_range(terminal, g)) { - if (!is_special(v, g)) { - vset.insert(v); - } - } - - for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) { - set<NFAVertex> next; - - for (auto v : vset) { - const CharReach &cr = g[v].char_reach; - (*reach)[offset] |= cr; - - DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(), - (*reach)[offset].count()); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start || u == g.startDs) { - /* kill all subsequent offsets by setting to dot, setting - * to dot is in someways not accurate as there may be no - * data at all but neither case can be accelerated */ - for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) { - (*reach)[i].setall(); - } - break; - } else if (!is_special(u, g)) { - next.insert(u); - } - } - } - - swap(vset, next); - } -} - -void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) { - DEBUG_PRINTF("pop rev info\n"); - populateRevAccelInfo(g, g.accept, &rai.acceptReach); - populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach); - rai.valid = true; -} - -void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) { - DEBUG_PRINTF("merging ra\n"); - - dest.valid &= vic.valid; - - for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { - dest.acceptReach[i] |= vic.acceptReach[i]; - dest.acceptEodReach[i] |= vic.acceptEodReach[i]; - } -} - -RevAccInfo::RevAccInfo(void) - : valid(false), acceptReach(MAX_RACCEL_OFFSET), - acceptEodReach(MAX_RACCEL_OFFSET) {} - -} // namespace ue2 +using namespace std; + +namespace ue2 { + +static +bool isPseudoNoCaseChar(const CharReach &cr) { + return cr.count() == 2 && !(cr.find_first() & 32) + && cr.test(cr.find_first() | 32); +} + +static +bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth, + NFA *nfa) { + DEBUG_PRINTF("pure eod triggered pattern\n"); + + /* 2 char */ + for (u8 nocase = 0; nocase < 2; nocase++) { + for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { + const CharReach &cr = rev_info.acceptEodReach[i]; + const CharReach &cr2 = rev_info.acceptEodReach[i - 1]; + + if (!nocase && cr.count() == 1 && cr2.count() == 1) { + assert(i < minWidth); + if (i >= minWidth) { + goto single; + } + nfa->rAccelType = ACCEL_RDEOD; + nfa->rAccelData.array[0] = (u8)cr.find_first(); + nfa->rAccelData.array[1] = (u8)cr2.find_first(); + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel eod x2 %u %04hx\n", + nfa->rAccelOffset, nfa->rAccelData.dc); + return true; + } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) + && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { + assert(i < minWidth); + if (i >= minWidth) { + goto single; + } + nfa->rAccelType = ACCEL_RDEOD_NOCASE; + nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */ + nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n", + nfa->rAccelOffset, nfa->rAccelData.dc); + return true; + } + } + } + + single: + /* 1 char */ + for (u8 nocase = 0; nocase < 2; nocase++) { + for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { + const CharReach &cr = rev_info.acceptEodReach[i]; + if (!nocase && cr.count() == 1) { + assert(i < minWidth); + if (i >= minWidth) { + return false; + } + nfa->rAccelType = ACCEL_REOD; + nfa->rAccelData.c = (u8) cr.find_first(); + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel eod %u %02hhx\n", + nfa->rAccelOffset, nfa->rAccelData.c); + return true; + } else if (nocase && isPseudoNoCaseChar(cr)) { + assert(i < minWidth); + if (i >= minWidth) { + return false; + } + nfa->rAccelType = ACCEL_REOD_NOCASE; + nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */ + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel nc eod %u %02hhx\n", + nfa->rAccelOffset, nfa->rAccelData.c); + return true; + } + } + } + + return false; +} + +static +bool lookForFloatingSchemes(const RevAccInfo &rev_info, + const u32 minWidth, NFA *nfa) { + /* 2 char */ + for (u8 nocase = 0; nocase < 2; nocase++) { + for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { + CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; + CharReach cr2 = rev_info.acceptEodReach[i - 1] + | rev_info.acceptReach[i - 1]; + if (!nocase && cr.count() == 1 && cr2.count() == 1) { + assert((u8)(i - 1) < minWidth); + if (i > minWidth) { + goto single; + } + nfa->rAccelType = ACCEL_RDVERM; + nfa->rAccelData.array[0] = (u8)cr.find_first(); + nfa->rAccelData.array[1] = (u8)cr2.find_first(); + nfa->rAccelOffset = i; + DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n", + nfa->rAccelOffset, nfa->rAccelData.array[0], + nfa->rAccelData.array[1]); + return true; + } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) + && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { + assert((u8)(i - 1) < minWidth); + if (i > minWidth) { + goto single; + } + nfa->rAccelType = ACCEL_RDVERM_NOCASE; + nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; + nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; + nfa->rAccelOffset = i; + DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n", + nfa->rAccelOffset, nfa->rAccelData.array[0], + nfa->rAccelData.array[1]); + return true; + } + } + } + + single: + /* 1 char */ + for (u8 nocase = 0; nocase < 2; nocase++) { + for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { + CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; + if (!nocase && cr.count() == 1) { + assert(i < minWidth); + if (i >= minWidth) { + return false; + } + nfa->rAccelType = ACCEL_RVERM; + nfa->rAccelData.c = (u8)cr.find_first(); + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset, + nfa->rAccelData.c); + return true; + } else if (nocase && isPseudoNoCaseChar(cr)) { + assert(i < minWidth); + if (i >= minWidth) { + return false; + } + nfa->rAccelType = ACCEL_RVERM_NOCASE; + nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */ + nfa->rAccelOffset = i + 1; + DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset, + nfa->rAccelData.c); + return true; + } + } + } + + return false; +} + +void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info, + u32 min_width, bool eod_only) { + assert(nfa); + + if (!rev_info.valid) { + return; + } + + nfa->rAccelOffset = 1; + + assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any()); + if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) { + DEBUG_PRINTF("expected path to accept\n"); + return; + } + + if (rev_info.acceptReach[0].none()) { + /* eod only */ + + if (lookForEodSchemes(rev_info, min_width, nfa)) { + assert(nfa->rAccelOffset <= min_width); + return; + } + } + + if (eod_only) { + return; + } + + if (!lookForFloatingSchemes(rev_info, min_width, nfa)) { + DEBUG_PRINTF("failed to accelerate\n"); + } +} + +static +void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal, + vector<CharReach> *reach) { + set<NFAVertex> vset; + + for (auto v : inv_adjacent_vertices_range(terminal, g)) { + if (!is_special(v, g)) { + vset.insert(v); + } + } + + for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) { + set<NFAVertex> next; + + for (auto v : vset) { + const CharReach &cr = g[v].char_reach; + (*reach)[offset] |= cr; + + DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(), + (*reach)[offset].count()); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start || u == g.startDs) { + /* kill all subsequent offsets by setting to dot, setting + * to dot is in someways not accurate as there may be no + * data at all but neither case can be accelerated */ + for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) { + (*reach)[i].setall(); + } + break; + } else if (!is_special(u, g)) { + next.insert(u); + } + } + } + + swap(vset, next); + } +} + +void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) { + DEBUG_PRINTF("pop rev info\n"); + populateRevAccelInfo(g, g.accept, &rai.acceptReach); + populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach); + rai.valid = true; +} + +void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) { + DEBUG_PRINTF("merging ra\n"); + + dest.valid &= vic.valid; + + for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { + dest.acceptReach[i] |= vic.acceptReach[i]; + dest.acceptEodReach[i] |= vic.acceptEodReach[i]; + } +} + +RevAccInfo::RevAccInfo(void) + : valid(false), acceptReach(MAX_RACCEL_OFFSET), + acceptEodReach(MAX_RACCEL_OFFSET) {} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h index bde54574cb..0ab6a338c2 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h @@ -1,65 +1,65 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Reverse acceleration analysis. - */ - -#ifndef NG_REVACC_H -#define NG_REVACC_H - -#include "util/charreach.h" - -#include <vector> - -struct NFA; - -namespace ue2 { - -class NGHolder; - -#define MAX_RACCEL_OFFSET 16 - -struct RevAccInfo { - RevAccInfo(void); - bool valid; - std::vector<CharReach> acceptReach; /**< bytes which can appear n - * bytes before a match */ - std::vector<CharReach> acceptEodReach; /**< bytes which can appear n - * bytes before eod match */ -}; - -void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info, - u32 min_width, bool eod_only = false); - -void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g); -void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Reverse acceleration analysis. + */ + +#ifndef NG_REVACC_H +#define NG_REVACC_H + +#include "util/charreach.h" + +#include <vector> + +struct NFA; + +namespace ue2 { + +class NGHolder; + +#define MAX_RACCEL_OFFSET 16 + +struct RevAccInfo { + RevAccInfo(void); + bool valid; + std::vector<CharReach> acceptReach; /**< bytes which can appear n + * bytes before a match */ + std::vector<CharReach> acceptEodReach; /**< bytes which can appear n + * bytes before eod match */ +}; + +void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info, + u32 min_width, bool eod_only = false); + +void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g); +void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp index 86528b4a00..82ee226cec 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp @@ -1,93 +1,93 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Short Exhaustible Passthroughs. - * - * Analysis code for determining whether a graph should be treated specially - * because it is short and contains exhaustible reports; typically we turn - * these into outfixes rather than risk them becoming Rose literals. - * - * For example, the pattern: - * - * /[a-f]/H - * - * ... is far better suited to becoming a small outfix that generates one match - * and goes dead than being split into six one-byte Rose literals that end up - * in the literal matcher. - */ -#include "ng_sep.h" - -#include "grey.h" -#include "ng_holder.h" -#include "ng_reports.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/graph_range.h" - -using namespace std; - -namespace ue2 { - -static -bool checkFromVertex(const NGHolder &g, NFAVertex start) { - for (auto v : adjacent_vertices_range(start, g)) { - if (v == g.startDs) { - continue; - } - - assert(!is_special(v, g)); /* should not be vacuous */ - - if (!edge(g.startDs, v, g).second) { /* only floating starts */ - return false; - } else if (out_degree(v, g) == 1 - && edge(v, g.accept, g).second) { /* only floating end */ - ; /* possible sep */ - } else { - return false; - } - } - return true; -} - -bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) { - if (!grey.mergeSEP || !can_exhaust(g, rm)) { - return false; - } - - if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) { - return false; - } - - assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g)); - - DEBUG_PRINTF("graph is an SEP\n"); - return true; -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Short Exhaustible Passthroughs. + * + * Analysis code for determining whether a graph should be treated specially + * because it is short and contains exhaustible reports; typically we turn + * these into outfixes rather than risk them becoming Rose literals. + * + * For example, the pattern: + * + * /[a-f]/H + * + * ... is far better suited to becoming a small outfix that generates one match + * and goes dead than being split into six one-byte Rose literals that end up + * in the literal matcher. + */ +#include "ng_sep.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_reports.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/graph_range.h" + +using namespace std; + +namespace ue2 { + +static +bool checkFromVertex(const NGHolder &g, NFAVertex start) { + for (auto v : adjacent_vertices_range(start, g)) { + if (v == g.startDs) { + continue; + } + + assert(!is_special(v, g)); /* should not be vacuous */ + + if (!edge(g.startDs, v, g).second) { /* only floating starts */ + return false; + } else if (out_degree(v, g) == 1 + && edge(v, g.accept, g).second) { /* only floating end */ + ; /* possible sep */ + } else { + return false; + } + } + return true; +} + +bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) { + if (!grey.mergeSEP || !can_exhaust(g, rm)) { + return false; + } + + if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) { + return false; + } + + assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g)); + + DEBUG_PRINTF("graph is an SEP\n"); + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h index 4a2bef34f7..d4195c5ef4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h @@ -1,46 +1,46 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Short Exhaustible Passthroughs. - */ - -#ifndef NG_SEP_H -#define NG_SEP_H - -namespace ue2 { - -struct Grey; -class NGHolder; -class ReportManager; - -bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Short Exhaustible Passthroughs. + */ + +#ifndef NG_SEP_H +#define NG_SEP_H + +namespace ue2 { + +struct Grey; +class NGHolder; +class ReportManager; + +bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp index 9c2d9ba38d..9c07f2087c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp @@ -1,268 +1,268 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder for cases representing small literal - * sets. - */ -#include "ng_small_literal_set.h" - -#include "grey.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose construction from NGHolder for cases representing small literal + * sets. + */ +#include "ng_small_literal_set.h" + +#include "grey.h" #include "ng_holder.h" -#include "ng_util.h" -#include "rose/rose_build.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph_range.h" -#include "util/order_check.h" -#include "util/ue2string.h" -#include "ue2common.h" - -#include <map> -#include <set> -#include <vector> -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_keys; - -namespace ue2 { - -/** \brief The maximum number of literals to accept per pattern. */ -static const size_t MAX_LITERAL_SET_SIZE = 30; - -/** - * \brief The maximum number of literals to accept per pattern where at least - * one is weak (has period < MIN_STRONG_PERIOD). - */ -static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20; - -/** - * \brief The minimum string period to consider a literal "strong" (and not - * apply the weak size limit). - */ -static const size_t MIN_STRONG_PERIOD = 3; - -namespace { - -struct sls_literal { - bool anchored; - bool eod; - ue2_literal s; - - explicit sls_literal(bool a) : anchored(a), eod(false) {} - - sls_literal append(char c, bool nocase) const { - sls_literal rv(anchored); - rv.s = s; - rv.s.push_back(ue2_literal::elem(c, nocase)); - - return rv; - } -}; - -static -bool operator<(const sls_literal &a, const sls_literal &b) { - ORDER_CHECK(anchored); - ORDER_CHECK(eod); - ORDER_CHECK(s); - - return false; -} - -} // namespace - -static -bool checkLongMixedSensitivityLiterals( +#include "ng_util.h" +#include "rose/rose_build.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph_range.h" +#include "util/order_check.h" +#include "util/ue2string.h" +#include "ue2common.h" + +#include <map> +#include <set> +#include <vector> +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +/** \brief The maximum number of literals to accept per pattern. */ +static const size_t MAX_LITERAL_SET_SIZE = 30; + +/** + * \brief The maximum number of literals to accept per pattern where at least + * one is weak (has period < MIN_STRONG_PERIOD). + */ +static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20; + +/** + * \brief The minimum string period to consider a literal "strong" (and not + * apply the weak size limit). + */ +static const size_t MIN_STRONG_PERIOD = 3; + +namespace { + +struct sls_literal { + bool anchored; + bool eod; + ue2_literal s; + + explicit sls_literal(bool a) : anchored(a), eod(false) {} + + sls_literal append(char c, bool nocase) const { + sls_literal rv(anchored); + rv.s = s; + rv.s.push_back(ue2_literal::elem(c, nocase)); + + return rv; + } +}; + +static +bool operator<(const sls_literal &a, const sls_literal &b) { + ORDER_CHECK(anchored); + ORDER_CHECK(eod); + ORDER_CHECK(s); + + return false; +} + +} // namespace + +static +bool checkLongMixedSensitivityLiterals( const map<sls_literal, flat_set<ReportID>> &literals) { - const size_t len = MAX_MASK2_WIDTH; - - for (const sls_literal &lit : literals | map_keys) { - if (mixed_sensitivity(lit.s) && lit.s.length() > len) { - return false; - } - } - - return true; -} - -static -bool findLiterals(const NGHolder &g, + const size_t len = MAX_MASK2_WIDTH; + + for (const sls_literal &lit : literals | map_keys) { + if (mixed_sensitivity(lit.s) && lit.s.length() > len) { + return false; + } + } + + return true; +} + +static +bool findLiterals(const NGHolder &g, map<sls_literal, flat_set<ReportID>> *literals) { - vector<NFAVertex> order = getTopoOrdering(g); - - vector<set<sls_literal>> built(num_vertices(g)); - vector<size_t> read_count(num_vertices(g)); - - for (auto it = order.rbegin(); it != order.rend(); ++it) { - NFAVertex v = *it; - set<sls_literal> &out = built[g[v].index]; - read_count[g[v].index] = out_degree(v, g); - + vector<NFAVertex> order = getTopoOrdering(g); + + vector<set<sls_literal>> built(num_vertices(g)); + vector<size_t> read_count(num_vertices(g)); + + for (auto it = order.rbegin(); it != order.rend(); ++it) { + NFAVertex v = *it; + set<sls_literal> &out = built[g[v].index]; + read_count[g[v].index] = out_degree(v, g); + DEBUG_PRINTF("setting read_count to %zu for %zu\n", - read_count[g[v].index], g[v].index); - - assert(out.empty()); - if (v == g.start) { - out.insert(sls_literal(true)); - continue; - } else if (v == g.startDs) { - out.insert(sls_literal(false)); - continue; - } - - bool eod = v == g.acceptEod; - bool accept = v == g.accept || v == g.acceptEod; - const CharReach &cr = g[v].char_reach; - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.accept) { - continue; - } - - if (u == g.start && edge(g.startDs, v, g).second) { - /* floating start states may have connections to start and - * startDs - don't create duplicate anchored literals */ - DEBUG_PRINTF("skipping as floating\n"); - continue; - } - - set<sls_literal> &in = built[g[u].index]; + read_count[g[v].index], g[v].index); + + assert(out.empty()); + if (v == g.start) { + out.insert(sls_literal(true)); + continue; + } else if (v == g.startDs) { + out.insert(sls_literal(false)); + continue; + } + + bool eod = v == g.acceptEod; + bool accept = v == g.accept || v == g.acceptEod; + const CharReach &cr = g[v].char_reach; + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.accept) { + continue; + } + + if (u == g.start && edge(g.startDs, v, g).second) { + /* floating start states may have connections to start and + * startDs - don't create duplicate anchored literals */ + DEBUG_PRINTF("skipping as floating\n"); + continue; + } + + set<sls_literal> &in = built[g[u].index]; DEBUG_PRINTF("getting from %zu (%zu reads to go)\n", - g[u].index, read_count[g[u].index]); - assert(!in.empty()); - assert(read_count[g[u].index]); - - for (const sls_literal &lit : in) { - if (accept) { - sls_literal accept_lit = lit; // copy - accept_lit.eod = eod; - insert(&(*literals)[accept_lit], g[u].reports); - continue; - } - - for (size_t c = cr.find_first(); c != cr.npos; - c = cr.find_next(c)) { - bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) - && cr.test(mytolower(c)); - - if (nocase && (char)c == mytolower(c)) { - continue; /* uppercase already handled us */ - } - - out.insert(lit.append((u8)c, nocase)); - - if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) { - DEBUG_PRINTF("too big %zu + %zu\n", out.size(), - literals->size()); - return false; - } - } - } - - read_count[g[u].index]--; - if (!read_count[g[u].index]) { + g[u].index, read_count[g[u].index]); + assert(!in.empty()); + assert(read_count[g[u].index]); + + for (const sls_literal &lit : in) { + if (accept) { + sls_literal accept_lit = lit; // copy + accept_lit.eod = eod; + insert(&(*literals)[accept_lit], g[u].reports); + continue; + } + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) + && cr.test(mytolower(c)); + + if (nocase && (char)c == mytolower(c)) { + continue; /* uppercase already handled us */ + } + + out.insert(lit.append((u8)c, nocase)); + + if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) { + DEBUG_PRINTF("too big %zu + %zu\n", out.size(), + literals->size()); + return false; + } + } + } + + read_count[g[u].index]--; + if (!read_count[g[u].index]) { DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); - in.clear(); - } - } - } - - return true; -} - -static + in.clear(); + } + } + } + + return true; +} + +static size_t min_period(const map<sls_literal, flat_set<ReportID>> &literals) { - size_t rv = SIZE_MAX; - - for (const sls_literal &lit : literals | map_keys) { - rv = min(rv, minStringPeriod(lit.s)); - } - DEBUG_PRINTF("min period %zu\n", rv); - return rv; -} - -// If this component is just a small set of literals and can be handled by -// Rose, feed it directly into rose. -bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g, - const CompileContext &cc) { - if (!cc.grey.allowSmallLiteralSet) { - return false; - } - - if (!isAcyclic(g)) { - /* literal sets would typically be acyclic... */ - DEBUG_PRINTF("not acyclic\n"); - return false; - } - + size_t rv = SIZE_MAX; + + for (const sls_literal &lit : literals | map_keys) { + rv = min(rv, minStringPeriod(lit.s)); + } + DEBUG_PRINTF("min period %zu\n", rv); + return rv; +} + +// If this component is just a small set of literals and can be handled by +// Rose, feed it directly into rose. +bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g, + const CompileContext &cc) { + if (!cc.grey.allowSmallLiteralSet) { + return false; + } + + if (!isAcyclic(g)) { + /* literal sets would typically be acyclic... */ + DEBUG_PRINTF("not acyclic\n"); + return false; + } + if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) { DEBUG_PRINTF("vertex with wide reach found\n"); return false; } - DEBUG_PRINTF("looking for literals\n"); - + DEBUG_PRINTF("looking for literals\n"); + map<sls_literal, flat_set<ReportID>> literals; - if (!findLiterals(g, &literals)) { - DEBUG_PRINTF(":(\n"); - return false; - } - - assert(!literals.empty()); - - if (literals.size() > MAX_LITERAL_SET_SIZE) { - /* try a mask instead */ - DEBUG_PRINTF("too many literals\n"); - return false; - } - - size_t period = min_period(literals); - if (period < MIN_STRONG_PERIOD && - literals.size() > MAX_WEAK_LITERAL_SET_SIZE) { - DEBUG_PRINTF("too many literals with weak period\n"); - return false; - } - - if (!checkLongMixedSensitivityLiterals(literals)) { - DEBUG_PRINTF("long mixed\n"); - return false; - } - - DEBUG_PRINTF("adding %zu literals\n", literals.size()); - for (const auto &m : literals) { - const sls_literal &lit = m.first; - const auto &reports = m.second; - rose.add(lit.anchored, lit.eod, lit.s, reports); - } - - return true; -} - -} // namespace ue2 + if (!findLiterals(g, &literals)) { + DEBUG_PRINTF(":(\n"); + return false; + } + + assert(!literals.empty()); + + if (literals.size() > MAX_LITERAL_SET_SIZE) { + /* try a mask instead */ + DEBUG_PRINTF("too many literals\n"); + return false; + } + + size_t period = min_period(literals); + if (period < MIN_STRONG_PERIOD && + literals.size() > MAX_WEAK_LITERAL_SET_SIZE) { + DEBUG_PRINTF("too many literals with weak period\n"); + return false; + } + + if (!checkLongMixedSensitivityLiterals(literals)) { + DEBUG_PRINTF("long mixed\n"); + return false; + } + + DEBUG_PRINTF("adding %zu literals\n", literals.size()); + for (const auto &m : literals) { + const sls_literal &lit = m.first; + const auto &reports = m.second; + rose.add(lit.anchored, lit.eod, lit.s, reports); + } + + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h index e626627071..0beca09a96 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h @@ -1,50 +1,50 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder for cases representing small literal - * sets. - */ - -#ifndef NG_SMALL_LITERAL_SET_H -#define NG_SMALL_LITERAL_SET_H - -namespace ue2 { - -class RoseBuild; -class NGHolder; -struct CompileContext; - -/** \brief If the graph represents a small set of literals, feed them directly - * to rose. Returns true if successful. */ -bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NG_SMALL_LITERAL_SET_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose construction from NGHolder for cases representing small literal + * sets. + */ + +#ifndef NG_SMALL_LITERAL_SET_H +#define NG_SMALL_LITERAL_SET_H + +namespace ue2 { + +class RoseBuild; +class NGHolder; +struct CompileContext; + +/** \brief If the graph represents a small set of literals, feed them directly + * to rose. Returns true if successful. */ +bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h, + const CompileContext &cc); + +} // namespace ue2 + +#endif // NG_SMALL_LITERAL_SET_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp index d23ac408b0..7383817ad8 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp @@ -1,747 +1,747 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief SOM ("Start of Match") analysis. - */ + * \brief SOM ("Start of Match") analysis. + */ #include "ng_som.h" -#include "ng.h" -#include "ng_dump.h" -#include "ng_equivalence.h" -#include "ng_execute.h" -#include "ng_haig.h" -#include "ng_limex.h" -#include "ng_literal_analysis.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_reports.h" -#include "ng_som_add_redundancy.h" -#include "ng_som_util.h" -#include "ng_split.h" -#include "ng_util.h" +#include "ng.h" +#include "ng_dump.h" +#include "ng_equivalence.h" +#include "ng_execute.h" +#include "ng_haig.h" +#include "ng_limex.h" +#include "ng_literal_analysis.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_region.h" +#include "ng_reports.h" +#include "ng_som_add_redundancy.h" +#include "ng_som_util.h" +#include "ng_split.h" +#include "ng_util.h" #include "ng_violet.h" -#include "ng_width.h" -#include "grey.h" -#include "ue2common.h" +#include "ng_width.h" +#include "grey.h" +#include "ue2common.h" #include "compiler/compiler.h" -#include "nfa/goughcompile.h" -#include "nfa/nfa_internal.h" // for MO_INVALID_IDX -#include "parser/position.h" -#include "som/som.h" -#include "rose/rose_build.h" -#include "rose/rose_in_util.h" -#include "util/alloc.h" -#include "util/compare.h" -#include "util/compile_error.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" -#include "util/make_unique.h" - -#include <algorithm> -#include <map> +#include "nfa/goughcompile.h" +#include "nfa/nfa_internal.h" // for MO_INVALID_IDX +#include "parser/position.h" +#include "som/som.h" +#include "rose/rose_build.h" +#include "rose/rose_in_util.h" +#include "util/alloc.h" +#include "util/compare.h" +#include "util/compile_error.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" +#include "util/make_unique.h" + +#include <algorithm> +#include <map> #include <unordered_map> #include <unordered_set> -#include <vector> - -using namespace std; - -namespace ue2 { - -static const size_t MAX_SOM_PLANS = 10; -static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000; - -#define MAX_REV_NFA_PREFIX 80 - -namespace { -struct som_plan { - som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i, - u32 parent_in) : prefix(p), escapes(e), is_reset(i), - no_implement(false), parent(parent_in) { } - shared_ptr<NGHolder> prefix; - CharReach escapes; - bool is_reset; - bool no_implement; - u32 parent; // index of parent plan in the vector. - - // Reporters: a list of vertices in the graph that must be have their - // reports updated at implementation time to report this plan's - // som_loc_out. - vector<NFAVertex> reporters; - - // Similar, but these report the som_loc_in. - vector<NFAVertex> reporters_in; -}; -} - -static -bool regionCanEstablishSom(const NGHolder &g, +#include <vector> + +using namespace std; + +namespace ue2 { + +static const size_t MAX_SOM_PLANS = 10; +static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000; + +#define MAX_REV_NFA_PREFIX 80 + +namespace { +struct som_plan { + som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i, + u32 parent_in) : prefix(p), escapes(e), is_reset(i), + no_implement(false), parent(parent_in) { } + shared_ptr<NGHolder> prefix; + CharReach escapes; + bool is_reset; + bool no_implement; + u32 parent; // index of parent plan in the vector. + + // Reporters: a list of vertices in the graph that must be have their + // reports updated at implementation time to report this plan's + // som_loc_out. + vector<NFAVertex> reporters; + + // Similar, but these report the som_loc_in. + vector<NFAVertex> reporters_in; +}; +} + +static +bool regionCanEstablishSom(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const u32 region, const vector<NFAVertex> &r_exits, - const vector<DepthMinMax> &depths) { - if (region == regions.at(g.accept) || - region == regions.at(g.acceptEod)) { - DEBUG_PRINTF("accept in region\n"); - return false; - } - - DEBUG_PRINTF("region %u\n", region); - for (UNUSED auto v : r_exits) { + const u32 region, const vector<NFAVertex> &r_exits, + const vector<DepthMinMax> &depths) { + if (region == regions.at(g.accept) || + region == regions.at(g.acceptEod)) { + DEBUG_PRINTF("accept in region\n"); + return false; + } + + DEBUG_PRINTF("region %u\n", region); + for (UNUSED auto v : r_exits) { DEBUG_PRINTF(" exit %zu\n", g[v].index); - } - - /* simple if each region exit is at fixed distance from SOM. Note SOM does - not include virtual starts */ - for (auto v : r_exits) { - assert(regions.at(v) == region); - const DepthMinMax &d = depths.at(g[v].index); - if (d.min != d.max) { + } + + /* simple if each region exit is at fixed distance from SOM. Note SOM does + not include virtual starts */ + for (auto v : r_exits) { + assert(regions.at(v) == region); + const DepthMinMax &d = depths.at(g[v].index); + if (d.min != d.max) { DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index, - d.min.str().c_str(), d.max.str().c_str()); - return false; - } - } + d.min.str().c_str(), d.max.str().c_str()); + return false; + } + } DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]), - g[r_exits[0]].index); - - return true; -} - -namespace { - -struct region_info { - region_info() : optional(false), dag(false) {} - vector<NFAVertex> enters; - vector<NFAVertex> exits; - vector<NFAVertex> full; - bool optional; /* skip edges around region */ - bool dag; /* completely acyclic */ -}; - -} - -static -void buildRegionMapping(const NGHolder &g, + g[r_exits[0]].index); + + return true; +} + +namespace { + +struct region_info { + region_info() : optional(false), dag(false) {} + vector<NFAVertex> enters; + vector<NFAVertex> exits; + vector<NFAVertex> full; + bool optional; /* skip edges around region */ + bool dag; /* completely acyclic */ +}; + +} + +static +void buildRegionMapping(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - map<u32, region_info> &info, - bool include_region_0 = false) { - for (auto v : vertices_range(g)) { - u32 region = regions.at(v); - if (!include_region_0 && (is_any_start(v, g) || region == 0)) { - continue; - } - assert(!region || !is_any_start(v, g)); - - if (is_any_accept(v, g)) { - continue; - } - - if (isRegionEntry(g, v, regions)) { - info[region].enters.push_back(v); - } - if (isRegionExit(g, v, regions)) { - info[region].exits.push_back(v); - } - info[region].full.push_back(v); - } - - for (auto &m : info) { - if (!m.second.enters.empty() - && isOptionalRegion(g, m.second.enters.front(), regions)) { - m.second.optional = true; - } - m.second.dag = true; /* will be cleared for cyclic regions later */ - } - - set<NFAEdge> be; - BackEdges<set<NFAEdge> > backEdgeVisitor(be); + map<u32, region_info> &info, + bool include_region_0 = false) { + for (auto v : vertices_range(g)) { + u32 region = regions.at(v); + if (!include_region_0 && (is_any_start(v, g) || region == 0)) { + continue; + } + assert(!region || !is_any_start(v, g)); + + if (is_any_accept(v, g)) { + continue; + } + + if (isRegionEntry(g, v, regions)) { + info[region].enters.push_back(v); + } + if (isRegionExit(g, v, regions)) { + info[region].exits.push_back(v); + } + info[region].full.push_back(v); + } + + for (auto &m : info) { + if (!m.second.enters.empty() + && isOptionalRegion(g, m.second.enters.front(), regions)) { + m.second.optional = true; + } + m.second.dag = true; /* will be cleared for cyclic regions later */ + } + + set<NFAEdge> be; + BackEdges<set<NFAEdge> > backEdgeVisitor(be); boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); - - for (const auto &e : be) { - NFAVertex u = source(e, g); - NFAVertex v = target(e, g); - if (is_special(u, g) || is_special(v, g)) { - assert(is_special(u, g) && is_special(v, g)); - continue; - } - u32 r = regions.at(v); - assert(regions.at(u) == r); - info[r].dag = false; - } - - if (include_region_0) { - info[0].dag = false; - } - - #ifdef DEBUG - for (const auto &m : info) { - u32 r = m.first; - const region_info &r_i = m.second; - DEBUG_PRINTF("region %u:%s%s\n", r, - r_i.dag ? " (dag)" : "", - r_i.optional ? " (optional)" : ""); - DEBUG_PRINTF(" enters:"); - for (u32 i = 0; i < r_i.enters.size(); i++) { + + for (const auto &e : be) { + NFAVertex u = source(e, g); + NFAVertex v = target(e, g); + if (is_special(u, g) || is_special(v, g)) { + assert(is_special(u, g) && is_special(v, g)); + continue; + } + u32 r = regions.at(v); + assert(regions.at(u) == r); + info[r].dag = false; + } + + if (include_region_0) { + info[0].dag = false; + } + + #ifdef DEBUG + for (const auto &m : info) { + u32 r = m.first; + const region_info &r_i = m.second; + DEBUG_PRINTF("region %u:%s%s\n", r, + r_i.dag ? " (dag)" : "", + r_i.optional ? " (optional)" : ""); + DEBUG_PRINTF(" enters:"); + for (u32 i = 0; i < r_i.enters.size(); i++) { printf(" %zu", g[r_i.enters[i]].index); - } - printf("\n"); - DEBUG_PRINTF(" exits:"); - for (u32 i = 0; i < r_i.exits.size(); i++) { + } + printf("\n"); + DEBUG_PRINTF(" exits:"); + for (u32 i = 0; i < r_i.exits.size(); i++) { printf(" %zu", g[r_i.exits[i]].index); - } - printf("\n"); - DEBUG_PRINTF(" all:"); - for (u32 i = 0; i < r_i.full.size(); i++) { + } + printf("\n"); + DEBUG_PRINTF(" all:"); + for (u32 i = 0; i < r_i.full.size(); i++) { printf(" %zu", g[r_i.full[i]].index); - } - printf("\n"); - } - #endif -} - -static -bool validateXSL(const NGHolder &g, + } + printf("\n"); + } + #endif +} + +static +bool validateXSL(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const u32 region, const CharReach &escapes, u32 *bad_region) { - /* need to check that the escapes escape all of the graph past region */ - u32 first_bad_region = ~0U; - for (auto v : vertices_range(g)) { - u32 v_region = regions.at(v); - if (!is_special(v, g) && v_region > region && - (escapes & g[v].char_reach).any()) { + const u32 region, const CharReach &escapes, u32 *bad_region) { + /* need to check that the escapes escape all of the graph past region */ + u32 first_bad_region = ~0U; + for (auto v : vertices_range(g)) { + u32 v_region = regions.at(v); + if (!is_special(v, g) && v_region > region && + (escapes & g[v].char_reach).any()) { DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index); - first_bad_region = MIN(first_bad_region, v_region); - } - } - - if (first_bad_region != ~0U) { - *bad_region = first_bad_region; - return false; - } - - return true; -} - -static -bool validateEXSL(const NGHolder &g, + first_bad_region = MIN(first_bad_region, v_region); + } + } + + if (first_bad_region != ~0U) { + *bad_region = first_bad_region; + return false; + } + + return true; +} + +static +bool validateEXSL(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const u32 region, const CharReach &escapes, - const NGHolder &prefix, u32 *bad_region) { - /* EXSL: To be a valid EXSL with escapes e, we require that all states - * go dead after /[e][^e]*{subsequent prefix match}/. - */ - - /* TODO: this is overly conservative as it allow partial matches from the - * prefix to be considered even when the tail has processed some [^e] */ - - u32 first_bad_region = ~0U; - const vector<CharReach> escapes_vec(1, escapes); - const vector<CharReach> notescapes_vec(1, ~escapes); - + const u32 region, const CharReach &escapes, + const NGHolder &prefix, u32 *bad_region) { + /* EXSL: To be a valid EXSL with escapes e, we require that all states + * go dead after /[e][^e]*{subsequent prefix match}/. + */ + + /* TODO: this is overly conservative as it allow partial matches from the + * prefix to be considered even when the tail has processed some [^e] */ + + u32 first_bad_region = ~0U; + const vector<CharReach> escapes_vec(1, escapes); + const vector<CharReach> notescapes_vec(1, ~escapes); + flat_set<NFAVertex> states; - /* turn on all states past the prefix */ - DEBUG_PRINTF("region %u is cutover\n", region); - for (auto v : vertices_range(g)) { - if (!is_special(v, g) && regions.at(v) > region) { - states.insert(v); - } - } - - /* process the escapes */ - states = execute_graph(g, escapes_vec, states); - - /* flood with any number of not escapes */ + /* turn on all states past the prefix */ + DEBUG_PRINTF("region %u is cutover\n", region); + for (auto v : vertices_range(g)) { + if (!is_special(v, g) && regions.at(v) > region) { + states.insert(v); + } + } + + /* process the escapes */ + states = execute_graph(g, escapes_vec, states); + + /* flood with any number of not escapes */ flat_set<NFAVertex> prev_states; - while (prev_states != states) { - prev_states = states; - states = execute_graph(g, notescapes_vec, states); - insert(&states, prev_states); - } - - /* find input starts to use for when we are running the prefix through as - * when the escape character arrives we may be in matching the prefix - * already */ + while (prev_states != states) { + prev_states = states; + states = execute_graph(g, notescapes_vec, states); + insert(&states, prev_states); + } + + /* find input starts to use for when we are running the prefix through as + * when the escape character arrives we may be in matching the prefix + * already */ flat_set<NFAVertex> prefix_start_states; - for (auto v : vertices_range(prefix)) { - if (v != prefix.accept && v != prefix.acceptEod - /* and as we have already made it past the prefix once */ - && v != prefix.start) { - prefix_start_states.insert(v); - } - } - - prefix_start_states = - execute_graph(prefix, escapes_vec, prefix_start_states); - - assert(contains(prefix_start_states, prefix.startDs)); - /* see what happens after we feed it the prefix */ - states = execute_graph(g, prefix, prefix_start_states, states); - - for (auto v : states) { - assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be - * on */ - DEBUG_PRINTF("state still active\n"); - first_bad_region = MIN(first_bad_region, regions.at(v)); - } - - if (first_bad_region != ~0U) { - *bad_region = first_bad_region; - return false; - } - - return true; -} - -static -bool isPossibleLock(const NGHolder &g, - map<u32, region_info>::const_iterator region, - const map<u32, region_info> &info, - CharReach *escapes_out) { - /* TODO: we could also check for self-loops on curr region */ - - /* TODO: some straw-walking logic. lowish priority has we know there can - * only be optional regions between us and the cyclic */ - - assert(region != info.end()); - map<u32, region_info>::const_iterator next_region = region; - ++next_region; - if (next_region == info.end()) { - assert(0); /* odd */ - return false; - } - - const region_info &next_info = next_region->second; - if (next_info.enters.empty()) { - assert(0); /* odd */ - return false; - } - - if (next_info.full.size() == 1 && !next_info.dag) { - *escapes_out = ~g[next_info.full.front()].char_reach; - return true; - } - - return false; -} - -static -unique_ptr<NGHolder> + for (auto v : vertices_range(prefix)) { + if (v != prefix.accept && v != prefix.acceptEod + /* and as we have already made it past the prefix once */ + && v != prefix.start) { + prefix_start_states.insert(v); + } + } + + prefix_start_states = + execute_graph(prefix, escapes_vec, prefix_start_states); + + assert(contains(prefix_start_states, prefix.startDs)); + /* see what happens after we feed it the prefix */ + states = execute_graph(g, prefix, prefix_start_states, states); + + for (auto v : states) { + assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be + * on */ + DEBUG_PRINTF("state still active\n"); + first_bad_region = MIN(first_bad_region, regions.at(v)); + } + + if (first_bad_region != ~0U) { + *bad_region = first_bad_region; + return false; + } + + return true; +} + +static +bool isPossibleLock(const NGHolder &g, + map<u32, region_info>::const_iterator region, + const map<u32, region_info> &info, + CharReach *escapes_out) { + /* TODO: we could also check for self-loops on curr region */ + + /* TODO: some straw-walking logic. lowish priority has we know there can + * only be optional regions between us and the cyclic */ + + assert(region != info.end()); + map<u32, region_info>::const_iterator next_region = region; + ++next_region; + if (next_region == info.end()) { + assert(0); /* odd */ + return false; + } + + const region_info &next_info = next_region->second; + if (next_info.enters.empty()) { + assert(0); /* odd */ + return false; + } + + if (next_info.full.size() == 1 && !next_info.dag) { + *escapes_out = ~g[next_info.full.front()].char_reach; + return true; + } + + return false; +} + +static +unique_ptr<NGHolder> makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const region_info &curr, const region_info &next, - bool renumber = true) { - const vector<NFAVertex> &curr_exits = curr.exits; - const vector<NFAVertex> &next_enters = next.enters; - - assert(!next_enters.empty()); - assert(!curr_exits.empty()); - - unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>(); - NGHolder &prefix = *prefix_ptr; - - deque<NFAVertex> lhs_verts; - insert(&lhs_verts, lhs_verts.end(), vertices(g)); - + const region_info &curr, const region_info &next, + bool renumber = true) { + const vector<NFAVertex> &curr_exits = curr.exits; + const vector<NFAVertex> &next_enters = next.enters; + + assert(!next_enters.empty()); + assert(!curr_exits.empty()); + + unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>(); + NGHolder &prefix = *prefix_ptr; + + deque<NFAVertex> lhs_verts; + insert(&lhs_verts, lhs_verts.end(), vertices(g)); + unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix - fillHolder(&prefix, g, lhs_verts, &lhs_map); - prefix.kind = NFA_OUTFIX; - - // We need a reverse mapping to track regions. + fillHolder(&prefix, g, lhs_verts, &lhs_map); + prefix.kind = NFA_OUTFIX; + + // We need a reverse mapping to track regions. unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g - for (const auto &e : lhs_map) { - rev_map.emplace(e.second, e.first); - } - - clear_in_edges(prefix.accept, prefix); - clear_in_edges(prefix.acceptEod, prefix); - add_edge(prefix.accept, prefix.acceptEod, prefix); - - assert(!next_enters.empty()); + for (const auto &e : lhs_map) { + rev_map.emplace(e.second, e.first); + } + + clear_in_edges(prefix.accept, prefix); + clear_in_edges(prefix.acceptEod, prefix); + add_edge(prefix.accept, prefix.acceptEod, prefix); + + assert(!next_enters.empty()); assert(next_enters.front() != NGHolder::null_vertex()); - u32 dead_region = regions.at(next_enters.front()); - DEBUG_PRINTF("curr_region %u, dead_region %u\n", - regions.at(curr_exits.front()), dead_region); - for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) { - if (regions.at(v) >= dead_region) { - continue; - } - /* add edge to new accepts */ - NFAVertex p_v = lhs_map[v]; - add_edge(p_v, prefix.accept, prefix); - } - - assert(in_degree(prefix.accept, prefix) != 0); - - /* prune everything past the picked region */ - vector<NFAVertex> to_clear; - assert(contains(lhs_map, curr_exits.front())); - NFAVertex p_u = lhs_map[curr_exits.front()]; + u32 dead_region = regions.at(next_enters.front()); + DEBUG_PRINTF("curr_region %u, dead_region %u\n", + regions.at(curr_exits.front()), dead_region); + for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) { + if (regions.at(v) >= dead_region) { + continue; + } + /* add edge to new accepts */ + NFAVertex p_v = lhs_map[v]; + add_edge(p_v, prefix.accept, prefix); + } + + assert(in_degree(prefix.accept, prefix) != 0); + + /* prune everything past the picked region */ + vector<NFAVertex> to_clear; + assert(contains(lhs_map, curr_exits.front())); + NFAVertex p_u = lhs_map[curr_exits.front()]; DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index); - for (auto p_v : adjacent_vertices_range(p_u, prefix)) { - auto v = rev_map.at(p_v); - if (p_v == prefix.accept || regions.at(v) < dead_region) { - continue; - } - to_clear.push_back(p_v); - } - - for (auto v : to_clear) { + for (auto p_v : adjacent_vertices_range(p_u, prefix)) { + auto v = rev_map.at(p_v); + if (p_v == prefix.accept || regions.at(v) < dead_region) { + continue; + } + to_clear.push_back(p_v); + } + + for (auto v : to_clear) { DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index); - clear_in_edges(v, prefix); - } - - pruneUseless(prefix, renumber /* sometimes we want no renumber to keep - depth map valid */); - - assert(num_vertices(prefix) > N_SPECIALS); - return prefix_ptr; -} - -static -void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) { - const u32 temp_slot = UINT32_MAX; - /* update the som slot on the prefix report */ - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - auto &reports = g[v].reports; - assert(reports.size() == 1); - Report ir = rm.getReport(*reports.begin()); - if (ir.onmatch != temp_slot) { - continue; - } - ir.onmatch = real_slot; - ReportID rep = rm.getInternalId(ir); - - assert(reports.size() == 1); - reports.clear(); - reports.insert(rep); - } -} - -static + clear_in_edges(v, prefix); + } + + pruneUseless(prefix, renumber /* sometimes we want no renumber to keep + depth map valid */); + + assert(num_vertices(prefix) > N_SPECIALS); + return prefix_ptr; +} + +static +void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) { + const u32 temp_slot = UINT32_MAX; + /* update the som slot on the prefix report */ + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + auto &reports = g[v].reports; + assert(reports.size() == 1); + Report ir = rm.getReport(*reports.begin()); + if (ir.onmatch != temp_slot) { + continue; + } + ir.onmatch = real_slot; + ReportID rep = rm.getInternalId(ir); + + assert(reports.size() == 1); + reports.clear(); + reports.insert(rep); + } +} + +static void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, u32 som_loc, const vector<DepthMinMax> &depths, bool prefix_by_rev) { - Report ir = makeCallback(0U, 0); - ir.type = ir_type; - ir.onmatch = som_loc; - - /* add report for storing in som location on new accepts */ - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (prefix_by_rev) { - ir.somDistance = MO_INVALID_IDX; /* will be populated properly - * later */ - } else { - const DepthMinMax &d = depths.at(g[v].index); - assert(d.min == d.max); - ir.somDistance = d.max; - } - ReportID rep = rm.getInternalId(ir); - - auto &reports = g[v].reports; - reports.clear(); - reports.insert(rep); - } -} - -static + Report ir = makeCallback(0U, 0); + ir.type = ir_type; + ir.onmatch = som_loc; + + /* add report for storing in som location on new accepts */ + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (prefix_by_rev) { + ir.somDistance = MO_INVALID_IDX; /* will be populated properly + * later */ + } else { + const DepthMinMax &d = depths.at(g[v].index); + assert(d.min == d.max); + ir.somDistance = d.max; + } + ReportID rep = rm.getInternalId(ir); + + auto &reports = g[v].reports; + reports.clear(); + reports.insert(rep); + } +} + +static void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) { - /* update the som action on the prefix report */ - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - auto &reports = g[v].reports; - assert(reports.size() == 1); - Report ir = rm.getReport(*reports.begin()); - ir.type = ir_type; - ReportID rep = rm.getInternalId(ir); - - assert(reports.size() == 1); - reports.clear(); - reports.insert(rep); - } -} - -static -void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g, - u32 rev_comp_id) { - /* update the action on the prefix report, to refer to a reverse nfa, - * report type is also adjusted. */ - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - auto &reports = g[v].reports; - assert(reports.size() == 1); - Report ir = rm.getReport(*reports.begin()); - switch (ir.type) { - case INTERNAL_SOM_LOC_SET: - ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE; - break; - default: - assert(0); - break; - } - - ir.revNfaIndex = rev_comp_id; - ReportID rep = rm.getInternalId(ir); - - assert(reports.size() == 1); - reports.clear(); - reports.insert(rep); - } -} - -static -void setMidfixReports(ReportManager &rm, const som_plan &item, - const u32 som_slot_in, const u32 som_slot_out) { - assert(item.prefix); - NGHolder &g = *item.prefix; - - Report ir = makeCallback(0U, 0); - ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY - : INTERNAL_SOM_LOC_COPY_IF_WRITABLE; - ir.onmatch = som_slot_out; - ir.somDistance = som_slot_in; - ReportID rep = rm.getInternalId(ir); - - /* add report for storing in som location on new accepts */ - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - auto &reports = g[v].reports; - reports.clear(); - reports.insert(rep); - } -} - -static -bool finalRegion(const NGHolder &g, + /* update the som action on the prefix report */ + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + auto &reports = g[v].reports; + assert(reports.size() == 1); + Report ir = rm.getReport(*reports.begin()); + ir.type = ir_type; + ReportID rep = rm.getInternalId(ir); + + assert(reports.size() == 1); + reports.clear(); + reports.insert(rep); + } +} + +static +void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g, + u32 rev_comp_id) { + /* update the action on the prefix report, to refer to a reverse nfa, + * report type is also adjusted. */ + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + auto &reports = g[v].reports; + assert(reports.size() == 1); + Report ir = rm.getReport(*reports.begin()); + switch (ir.type) { + case INTERNAL_SOM_LOC_SET: + ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE; + break; + default: + assert(0); + break; + } + + ir.revNfaIndex = rev_comp_id; + ReportID rep = rm.getInternalId(ir); + + assert(reports.size() == 1); + reports.clear(); + reports.insert(rep); + } +} + +static +void setMidfixReports(ReportManager &rm, const som_plan &item, + const u32 som_slot_in, const u32 som_slot_out) { + assert(item.prefix); + NGHolder &g = *item.prefix; + + Report ir = makeCallback(0U, 0); + ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY + : INTERNAL_SOM_LOC_COPY_IF_WRITABLE; + ir.onmatch = som_slot_out; + ir.somDistance = som_slot_in; + ReportID rep = rm.getInternalId(ir); + + /* add report for storing in som location on new accepts */ + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + auto &reports = g[v].reports; + reports.clear(); + reports.insert(rep); + } +} + +static +bool finalRegion(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - NFAVertex v) { - u32 region = regions.at(v); - for (auto w : adjacent_vertices_range(v, g)) { - if (w != g.accept && w != g.acceptEod && regions.at(w) != region) { - return false; - } - } - - return true; -} - -static -void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, + NFAVertex v) { + u32 region = regions.at(v); + for (auto w : adjacent_vertices_range(v, g)) { + if (w != g.accept && w != g.acceptEod && regions.at(w) != region) { + return false; + } + } + + return true; +} + +static +void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, NFAVertex v, ReportType ir_type, u64a param) { - assert(!g[v].reports.empty()); - - flat_set<ReportID> r_new; - - for (const ReportID &report_id : g[v].reports) { - Report ir = rm.getReport(report_id); - - if (ir.type != EXTERNAL_CALLBACK) { - /* we must have already done whatever magic we needed to do to this - * report */ - r_new.insert(report_id); - continue; - } - - ir.type = ir_type; - ir.somDistance = param; - ReportID rep = rm.getInternalId(ir); - + assert(!g[v].reports.empty()); + + flat_set<ReportID> r_new; + + for (const ReportID &report_id : g[v].reports) { + Report ir = rm.getReport(report_id); + + if (ir.type != EXTERNAL_CALLBACK) { + /* we must have already done whatever magic we needed to do to this + * report */ + r_new.insert(report_id); + continue; + } + + ir.type = ir_type; + ir.somDistance = param; + ReportID rep = rm.getInternalId(ir); + DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n", - g[v].index, report_id, rep, ir_type); - r_new.insert(rep); - } - g[v].reports = r_new; -} - -/* updates the reports on all vertices leading to the sink */ -static -void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink, - const vector<DepthMinMax> &depths) { - for (auto v : inv_adjacent_vertices_range(sink, g)) { - if (v == g.accept) { - continue; - } - - const DepthMinMax &d = depths.at(g[v].index); - assert(d.min == d.max); - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, - d.min); - } -} - -/* updates the reports on all the provided vertices */ -static -void makeSomRelReports(ReportManager &rm, NGHolder &g, - const vector<NFAVertex> &to_update, - const vector<DepthMinMax> &depths) { - for (auto v : to_update) { - const DepthMinMax &d = depths.at(g[v].index); - assert(d.min == d.max); - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, - d.min); - } -} - -static -void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) { - for (auto v : inv_adjacent_vertices_range(sink, g)) { - if (v == g.accept) { - continue; - } - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS, - 0); - } -} - -static -void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) { - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, - som_loc); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, - som_loc); - } -} - -static -void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, - const vector<NFAVertex> &to_update, - u32 som_loc) { - for (auto v : to_update) { - replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, - som_loc); - } -} - -static -bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes, - u32 som_loc) { - ReportManager &rm = ng.rm; - - /* escaper = /prefix[^escapes]*[escapes]/ */ - DEBUG_PRINTF("creating escaper for %u\n", som_loc); - NGHolder h; - cloneHolder(h, prefix); - assert(h.kind == NFA_OUTFIX); - - NFAVertex u = add_vertex(h); - h[u].char_reach = ~escapes; - - NFAVertex v = add_vertex(h); - h[v].char_reach = escapes; - - for (auto w : inv_adjacent_vertices_range(h.accept, h)) { - add_edge(w, u, h); - add_edge(w, v, h); - h[w].reports.clear(); - } - - clear_in_edges(h.accept, h); - - add_edge(u, v, h); - add_edge(u, u, h); - add_edge(v, h.accept, h); - - Report ir = makeCallback(0U, 0); - ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE; - ir.onmatch = som_loc; - h[v].reports.insert(rm.getInternalId(ir)); - return ng.addHolder(h); -} - -static -void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator picked) { - /* NOTE: This is appropriate for firstMatchIsFirst */ - DEBUG_PRINTF("prepping for lock check\n"); - - NGHolder &midfix = *out; - - map<NFAVertex, NFAVertex> v_map; - v_map[g.start] = midfix.start; - v_map[g.startDs] = midfix.startDs; - - /* include the lock region */ + g[v].index, report_id, rep, ir_type); + r_new.insert(rep); + } + g[v].reports = r_new; +} + +/* updates the reports on all vertices leading to the sink */ +static +void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink, + const vector<DepthMinMax> &depths) { + for (auto v : inv_adjacent_vertices_range(sink, g)) { + if (v == g.accept) { + continue; + } + + const DepthMinMax &d = depths.at(g[v].index); + assert(d.min == d.max); + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, + d.min); + } +} + +/* updates the reports on all the provided vertices */ +static +void makeSomRelReports(ReportManager &rm, NGHolder &g, + const vector<NFAVertex> &to_update, + const vector<DepthMinMax> &depths) { + for (auto v : to_update) { + const DepthMinMax &d = depths.at(g[v].index); + assert(d.min == d.max); + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, + d.min); + } +} + +static +void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) { + for (auto v : inv_adjacent_vertices_range(sink, g)) { + if (v == g.accept) { + continue; + } + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS, + 0); + } +} + +static +void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) { + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, + som_loc); + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, + som_loc); + } +} + +static +void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, + const vector<NFAVertex> &to_update, + u32 som_loc) { + for (auto v : to_update) { + replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, + som_loc); + } +} + +static +bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes, + u32 som_loc) { + ReportManager &rm = ng.rm; + + /* escaper = /prefix[^escapes]*[escapes]/ */ + DEBUG_PRINTF("creating escaper for %u\n", som_loc); + NGHolder h; + cloneHolder(h, prefix); + assert(h.kind == NFA_OUTFIX); + + NFAVertex u = add_vertex(h); + h[u].char_reach = ~escapes; + + NFAVertex v = add_vertex(h); + h[v].char_reach = escapes; + + for (auto w : inv_adjacent_vertices_range(h.accept, h)) { + add_edge(w, u, h); + add_edge(w, v, h); + h[w].reports.clear(); + } + + clear_in_edges(h.accept, h); + + add_edge(u, v, h); + add_edge(u, u, h); + add_edge(v, h.accept, h); + + Report ir = makeCallback(0U, 0); + ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE; + ir.onmatch = som_loc; + h[v].reports.insert(rm.getInternalId(ir)); + return ng.addHolder(h); +} + +static +void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator picked) { + /* NOTE: This is appropriate for firstMatchIsFirst */ + DEBUG_PRINTF("prepping for lock check\n"); + + NGHolder &midfix = *out; + + map<NFAVertex, NFAVertex> v_map; + v_map[g.start] = midfix.start; + v_map[g.startDs] = midfix.startDs; + + /* include the lock region */ assert(picked != info.end()); auto graph_last = next(picked); - + assert(!graph_last->second.dag); assert(graph_last->second.full.size() == 1); for (auto jt = graph_last; ; --jt) { - DEBUG_PRINTF("adding r %u to midfix\n", jt->first); - - /* add all vertices in region, create mapping */ - for (auto v : jt->second.full) { + DEBUG_PRINTF("adding r %u to midfix\n", jt->first); + + /* add all vertices in region, create mapping */ + for (auto v : jt->second.full) { DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); - if (contains(v_map, v)) { - continue; - } - - /* treat all virtual starts as happening anywhere, so that the - * virtual start is not counted as part of the SoM */ - if (is_virtual_start(v, g)) { - v_map[v] = midfix.startDs; - continue; - } - - NFAVertex vnew = add_vertex(g[v], midfix); - v_map[v] = vnew; - } - - /* add edges leaving region verts based on mapping */ - for (auto v : jt->second.full) { - NFAVertex u = v_map[v]; - for (auto w : adjacent_vertices_range(v, g)) { - if (w == g.accept || w == g.acceptEod) { - add_edge_if_not_present(u, midfix.accept, midfix); - continue; - } - if (!contains(v_map, w)) { - add_edge_if_not_present(u, midfix.accept, midfix); - } else { - add_edge_if_not_present(u, v_map[w], midfix); - } - } - } - + if (contains(v_map, v)) { + continue; + } + + /* treat all virtual starts as happening anywhere, so that the + * virtual start is not counted as part of the SoM */ + if (is_virtual_start(v, g)) { + v_map[v] = midfix.startDs; + continue; + } + + NFAVertex vnew = add_vertex(g[v], midfix); + v_map[v] = vnew; + } + + /* add edges leaving region verts based on mapping */ + for (auto v : jt->second.full) { + NFAVertex u = v_map[v]; + for (auto w : adjacent_vertices_range(v, g)) { + if (w == g.accept || w == g.acceptEod) { + add_edge_if_not_present(u, midfix.accept, midfix); + continue; + } + if (!contains(v_map, w)) { + add_edge_if_not_present(u, midfix.accept, midfix); + } else { + add_edge_if_not_present(u, v_map[w], midfix); + } + } + } + if (jt == info.begin()) { break; } @@ -750,1671 +750,1671 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, /* add edges from startds to the enters of all the initial optional * regions and the first mandatory region. */ for (auto jt = info.begin(); ; ++jt) { - for (auto enter : jt->second.enters) { - assert(contains(v_map, enter)); - NFAVertex v = v_map[enter]; - add_edge_if_not_present(midfix.startDs, v, midfix); - } - + for (auto enter : jt->second.enters) { + assert(contains(v_map, enter)); + NFAVertex v = v_map[enter]; + add_edge_if_not_present(midfix.startDs, v, midfix); + } + if (!jt->second.optional) { - break; - } + break; + } if (jt == graph_last) { /* all regions are optional - add a direct edge to accept */ add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); break; } - } - - assert(in_degree(midfix.accept, midfix)); + } + + assert(in_degree(midfix.accept, midfix)); renumber_vertices(midfix); -} - -static -void fillRoughMidfix(NGHolder *out, const NGHolder &g, +} + +static +void fillRoughMidfix(NGHolder *out, const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator picked) { - /* as we are not the first prefix, we are probably not acyclic. We need to - * generate an acyclic holder to acts a fake prefix to sentClearsTail. - * This will result in a more conservative estimate. */ - /* NOTE: This is not appropriate for firstMatchIsFirst */ - NGHolder &midfix = *out; - add_edge(midfix.startDs, midfix.accept, midfix); - - map<NFAVertex, NFAVertex> v_map; - - map<u32, region_info>::const_iterator jt = picked; - for (; jt->second.dag; --jt) { - DEBUG_PRINTF("adding r %u to midfix\n", jt->first); - if (!jt->second.optional) { - clear_out_edges(midfix.startDs, midfix); - add_edge(midfix.startDs, midfix.startDs, midfix); - } - - /* add all vertices in region, create mapping */ - for (auto v : jt->second.full) { + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator picked) { + /* as we are not the first prefix, we are probably not acyclic. We need to + * generate an acyclic holder to acts a fake prefix to sentClearsTail. + * This will result in a more conservative estimate. */ + /* NOTE: This is not appropriate for firstMatchIsFirst */ + NGHolder &midfix = *out; + add_edge(midfix.startDs, midfix.accept, midfix); + + map<NFAVertex, NFAVertex> v_map; + + map<u32, region_info>::const_iterator jt = picked; + for (; jt->second.dag; --jt) { + DEBUG_PRINTF("adding r %u to midfix\n", jt->first); + if (!jt->second.optional) { + clear_out_edges(midfix.startDs, midfix); + add_edge(midfix.startDs, midfix.startDs, midfix); + } + + /* add all vertices in region, create mapping */ + for (auto v : jt->second.full) { DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); - NFAVertex vnew = add_vertex(g[v], midfix); - v_map[v] = vnew; - } - - /* add edges leaving region verts based on mapping */ - for (auto v : jt->second.full) { - NFAVertex u = v_map[v]; - for (auto w : adjacent_vertices_range(v, g)) { - if (w == g.accept || w == g.acceptEod) { - continue; - } - if (!contains(v_map, w)) { - add_edge_if_not_present(u, midfix.accept, midfix); - } else { - add_edge_if_not_present(u, v_map[w], midfix); - } - } - } - - /* add edges from startds to enters */ - for (auto enter : jt->second.enters) { - assert(contains(v_map, enter)); - NFAVertex v = v_map[enter]; - add_edge(midfix.startDs, v, midfix); - } - - if (jt == info.begin()) { - break; - } - } - - /* we can include the exits of the regions leading in */ - if (!jt->second.dag) { - u32 first_early_region = jt->first; - clear_out_edges(midfix.startDs, midfix); - add_edge(midfix.startDs, midfix.startDs, midfix); - - do { - for (auto v : jt->second.exits) { + NFAVertex vnew = add_vertex(g[v], midfix); + v_map[v] = vnew; + } + + /* add edges leaving region verts based on mapping */ + for (auto v : jt->second.full) { + NFAVertex u = v_map[v]; + for (auto w : adjacent_vertices_range(v, g)) { + if (w == g.accept || w == g.acceptEod) { + continue; + } + if (!contains(v_map, w)) { + add_edge_if_not_present(u, midfix.accept, midfix); + } else { + add_edge_if_not_present(u, v_map[w], midfix); + } + } + } + + /* add edges from startds to enters */ + for (auto enter : jt->second.enters) { + assert(contains(v_map, enter)); + NFAVertex v = v_map[enter]; + add_edge(midfix.startDs, v, midfix); + } + + if (jt == info.begin()) { + break; + } + } + + /* we can include the exits of the regions leading in */ + if (!jt->second.dag) { + u32 first_early_region = jt->first; + clear_out_edges(midfix.startDs, midfix); + add_edge(midfix.startDs, midfix.startDs, midfix); + + do { + for (auto v : jt->second.exits) { DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); - NFAVertex vnew = add_vertex(g[v], midfix); - v_map[v] = vnew; - - /* add edges from startds to new vertices */ - add_edge(midfix.startDs, vnew, midfix); - } - - /* add edges leaving region verts based on mapping */ - for (auto v : jt->second.exits) { - NFAVertex u = v_map[v]; - for (auto w : adjacent_vertices_range(v, g)) { - if (w == g.accept || w == g.acceptEod - || regions.at(w) <= first_early_region) { - continue; - } - if (!contains(v_map, w)) { - add_edge_if_not_present(u, midfix.accept, midfix); - } else { - add_edge_if_not_present(u, v_map[w], midfix); - } - } - } - } while (jt->second.optional && jt != info.begin() && (jt--)->first); - - if (jt->second.optional) { - assert(!jt->second.exits.empty()); - NFAVertex v = v_map[jt->second.exits.front()]; - for (auto w : adjacent_vertices_range(v, midfix)) { - add_edge(midfix.startDs, w, midfix); - } - } - } -} - -static -bool beginsWithDotStar(const NGHolder &g) { - bool hasDot = false; - - // We can ignore the successors of start, as matches that begin there will - // necessarily have a SOM of 0. - - set<NFAVertex> succ; - insert(&succ, adjacent_vertices(g.startDs, g)); - succ.erase(g.startDs); - - for (auto v : succ) { - // We want 'dot' states that aren't virtual starts. - if (g[v].char_reach.all() && - !g[v].assert_flags) { - hasDot = true; - set<NFAVertex> dotsucc; - insert(&dotsucc, adjacent_vertices(v, g)); - if (dotsucc != succ) { - DEBUG_PRINTF("failed dot-star succ check\n"); - return false; - } - } - } - - if (hasDot) { - DEBUG_PRINTF("begins with dot-star\n"); - } - return hasDot; -} - -static -bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in, - const u32 som_slot_out) { - assert(item.prefix); - assert(hasCorrectlyNumberedVertices(*item.prefix)); - - /* setup escaper for second som_location if required */ - if (item.escapes.any()) { - if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) { - return false; - } - } - - /* ensure we copy som from prev loc */ - setMidfixReports(ng.rm, item, som_slot_in, som_slot_out); - - /* add second prefix/1st midfix */ - if (!ng.addHolder(*item.prefix)) { - DEBUG_PRINTF("---addHolder failed---\n"); - return false; - } - - return true; -} - -static -bool isMandRegionBetween(map<u32, region_info>::const_iterator a, - map<u32, region_info>::const_iterator b) { - while (b != a) { - if (!b->second.optional) { - return true; - } - --b; - } - - return false; -} - -// Attempts to advance the current plan. Returns true if we advance to the end -// (woot!); updates picked, plan and bad_region. -static -bool advancePlan(const NGHolder &g, + NFAVertex vnew = add_vertex(g[v], midfix); + v_map[v] = vnew; + + /* add edges from startds to new vertices */ + add_edge(midfix.startDs, vnew, midfix); + } + + /* add edges leaving region verts based on mapping */ + for (auto v : jt->second.exits) { + NFAVertex u = v_map[v]; + for (auto w : adjacent_vertices_range(v, g)) { + if (w == g.accept || w == g.acceptEod + || regions.at(w) <= first_early_region) { + continue; + } + if (!contains(v_map, w)) { + add_edge_if_not_present(u, midfix.accept, midfix); + } else { + add_edge_if_not_present(u, v_map[w], midfix); + } + } + } + } while (jt->second.optional && jt != info.begin() && (jt--)->first); + + if (jt->second.optional) { + assert(!jt->second.exits.empty()); + NFAVertex v = v_map[jt->second.exits.front()]; + for (auto w : adjacent_vertices_range(v, midfix)) { + add_edge(midfix.startDs, w, midfix); + } + } + } +} + +static +bool beginsWithDotStar(const NGHolder &g) { + bool hasDot = false; + + // We can ignore the successors of start, as matches that begin there will + // necessarily have a SOM of 0. + + set<NFAVertex> succ; + insert(&succ, adjacent_vertices(g.startDs, g)); + succ.erase(g.startDs); + + for (auto v : succ) { + // We want 'dot' states that aren't virtual starts. + if (g[v].char_reach.all() && + !g[v].assert_flags) { + hasDot = true; + set<NFAVertex> dotsucc; + insert(&dotsucc, adjacent_vertices(v, g)); + if (dotsucc != succ) { + DEBUG_PRINTF("failed dot-star succ check\n"); + return false; + } + } + } + + if (hasDot) { + DEBUG_PRINTF("begins with dot-star\n"); + } + return hasDot; +} + +static +bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in, + const u32 som_slot_out) { + assert(item.prefix); + assert(hasCorrectlyNumberedVertices(*item.prefix)); + + /* setup escaper for second som_location if required */ + if (item.escapes.any()) { + if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) { + return false; + } + } + + /* ensure we copy som from prev loc */ + setMidfixReports(ng.rm, item, som_slot_in, som_slot_out); + + /* add second prefix/1st midfix */ + if (!ng.addHolder(*item.prefix)) { + DEBUG_PRINTF("---addHolder failed---\n"); + return false; + } + + return true; +} + +static +bool isMandRegionBetween(map<u32, region_info>::const_iterator a, + map<u32, region_info>::const_iterator b) { + while (b != a) { + if (!b->second.optional) { + return true; + } + --b; + } + + return false; +} + +// Attempts to advance the current plan. Returns true if we advance to the end +// (woot!); updates picked, plan and bad_region. +static +bool advancePlan(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const NGHolder &prefix, bool stuck, - map<u32, region_info>::const_iterator &picked, - const map<u32, region_info>::const_iterator furthest, - const map<u32, region_info>::const_iterator furthest_lock, - const CharReach &next_escapes, som_plan &plan, - u32 *bad_region) { - u32 bad_region_r = 0; - u32 bad_region_x = 0; - u32 bad_region_e = 0; - DEBUG_PRINTF("curr %u\n", picked->first); - - if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) { - plan.is_reset = true; - picked = furthest; - DEBUG_PRINTF("Prefix clears tail, woot!\n"); - return true; - } else { - DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r); - } - - if (stuck) { - u32 to_region = furthest_lock->first; - if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) { - DEBUG_PRINTF("XSL\n"); - picked = furthest_lock; - plan.escapes = next_escapes; - return true; - } else { - DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x); - } - - if (validateEXSL(g, regions, to_region, next_escapes, prefix, - &bad_region_e)) { - DEBUG_PRINTF("EXSL\n"); - picked = furthest_lock; - plan.escapes = next_escapes; - return true; - } else { - DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e); - } - } else { - DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n"); - } - - assert(!plan.is_reset); - - *bad_region = max(bad_region_x, bad_region_e); - if (bad_region_r >= *bad_region) { - *bad_region = bad_region_r; - plan.is_reset = true; - plan.escapes.clear(); - picked = furthest; - } else { - picked = furthest_lock; - plan.escapes = next_escapes; - } - - DEBUG_PRINTF("first bad region now %u\n", *bad_region); - return false; -} - -static -bool addPlan(vector<som_plan> &plan, u32 parent) { - DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(), - parent); - - if (plan.size() >= MAX_SOM_PLANS) { - DEBUG_PRINTF("too many plans!\n"); - return false; - } - - plan.emplace_back(nullptr, CharReach(), false, parent); - return true; -} - -// Fetches all preds of {accept, acceptEod} for this graph. -static -void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) { + const NGHolder &prefix, bool stuck, + map<u32, region_info>::const_iterator &picked, + const map<u32, region_info>::const_iterator furthest, + const map<u32, region_info>::const_iterator furthest_lock, + const CharReach &next_escapes, som_plan &plan, + u32 *bad_region) { + u32 bad_region_r = 0; + u32 bad_region_x = 0; + u32 bad_region_e = 0; + DEBUG_PRINTF("curr %u\n", picked->first); + + if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) { + plan.is_reset = true; + picked = furthest; + DEBUG_PRINTF("Prefix clears tail, woot!\n"); + return true; + } else { + DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r); + } + + if (stuck) { + u32 to_region = furthest_lock->first; + if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) { + DEBUG_PRINTF("XSL\n"); + picked = furthest_lock; + plan.escapes = next_escapes; + return true; + } else { + DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x); + } + + if (validateEXSL(g, regions, to_region, next_escapes, prefix, + &bad_region_e)) { + DEBUG_PRINTF("EXSL\n"); + picked = furthest_lock; + plan.escapes = next_escapes; + return true; + } else { + DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e); + } + } else { + DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n"); + } + + assert(!plan.is_reset); + + *bad_region = max(bad_region_x, bad_region_e); + if (bad_region_r >= *bad_region) { + *bad_region = bad_region_r; + plan.is_reset = true; + plan.escapes.clear(); + picked = furthest; + } else { + picked = furthest_lock; + plan.escapes = next_escapes; + } + + DEBUG_PRINTF("first bad region now %u\n", *bad_region); + return false; +} + +static +bool addPlan(vector<som_plan> &plan, u32 parent) { + DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(), + parent); + + if (plan.size() >= MAX_SOM_PLANS) { + DEBUG_PRINTF("too many plans!\n"); + return false; + } + + plan.emplace_back(nullptr, CharReach(), false, parent); + return true; +} + +// Fetches all preds of {accept, acceptEod} for this graph. +static +void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) { set<NFAVertex> tmp; - insert(&tmp, inv_adjacent_vertices(g.accept, g)); - insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); - tmp.erase(g.accept); - -#ifdef DEBUG - DEBUG_PRINTF("add reporters:"); - for (UNUSED auto v : tmp) { + insert(&tmp, inv_adjacent_vertices(g.accept, g)); + insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); + tmp.erase(g.accept); + +#ifdef DEBUG + DEBUG_PRINTF("add reporters:"); + for (UNUSED auto v : tmp) { printf(" %zu", g[v].index); - } - printf("\n"); -#endif - - reporters.insert(reporters.end(), tmp.begin(), tmp.end()); -} - -// Fetches all preds of {accept, acceptEod} in this region. -static -void addReporterVertices(const region_info &r, const NGHolder &g, - vector<NFAVertex> &reporters) { - for (auto v : r.exits) { - if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { + } + printf("\n"); +#endif + + reporters.insert(reporters.end(), tmp.begin(), tmp.end()); +} + +// Fetches all preds of {accept, acceptEod} in this region. +static +void addReporterVertices(const region_info &r, const NGHolder &g, + vector<NFAVertex> &reporters) { + for (auto v : r.exits) { + if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { DEBUG_PRINTF("add reporter %zu\n", g[v].index); - reporters.push_back(v); - } - } -} - -// Fetches the mappings of all preds of {accept, acceptEod} in this region. -static -void addMappedReporterVertices(const region_info &r, const NGHolder &g, + reporters.push_back(v); + } + } +} + +// Fetches the mappings of all preds of {accept, acceptEod} in this region. +static +void addMappedReporterVertices(const region_info &r, const NGHolder &g, const unordered_map<NFAVertex, NFAVertex> &mapping, - vector<NFAVertex> &reporters) { - for (auto v : r.exits) { - if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { + vector<NFAVertex> &reporters) { + for (auto v : r.exits) { + if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { DEBUG_PRINTF("adding v=%zu\n", g[v].index); auto it = mapping.find(v); - assert(it != mapping.end()); - reporters.push_back(it->second); - } - } -} - -// Clone a version of the graph, but only including the in-edges of `enter' -// from earlier regions. -static -void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, + assert(it != mapping.end()); + reporters.push_back(it->second); + } + } +} + +// Clone a version of the graph, but only including the in-edges of `enter' +// from earlier regions. +static +void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - NFAVertex entry, const vector<NFAVertex> &enters, + NFAVertex entry, const vector<NFAVertex> &enters, unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { - orig_to_copy.clear(); - cloneHolder(out, g, &orig_to_copy); - - assert(contains(orig_to_copy, entry)); - const u32 region = regions.at(entry); - - for (auto v : enters) { - if (v == entry) { - continue; - } - assert(contains(orig_to_copy, v)); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (regions.at(u) < region) { - assert(edge(orig_to_copy[u], orig_to_copy[v], out).second); - remove_edge(orig_to_copy[u], orig_to_copy[v], out); - } - } - } - - pruneUseless(out); -} - -static + orig_to_copy.clear(); + cloneHolder(out, g, &orig_to_copy); + + assert(contains(orig_to_copy, entry)); + const u32 region = regions.at(entry); + + for (auto v : enters) { + if (v == entry) { + continue; + } + assert(contains(orig_to_copy, v)); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (regions.at(u) < region) { + assert(edge(orig_to_copy[u], orig_to_copy[v], out).second); + remove_edge(orig_to_copy[u], orig_to_copy[v], out); + } + } + } + + pruneUseless(out); +} + +static void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, - vector<NFAVertex> &enters) { - assert(!enters.empty()); - const u32 split_region = regions.at(enters.front()); - - vector<NFAVertex> new_enters; - - // Gather the list of vertices in the split region and subsequent regions. - vector<NFAVertex> tail_vertices; - for (auto v : vertices_range(g)) { - if (is_special(v, g) || regions.at(v) < split_region) { - continue; - } - tail_vertices.push_back(v); - } - - for (auto enter : enters) { + vector<NFAVertex> &enters) { + assert(!enters.empty()); + const u32 split_region = regions.at(enters.front()); + + vector<NFAVertex> new_enters; + + // Gather the list of vertices in the split region and subsequent regions. + vector<NFAVertex> tail_vertices; + for (auto v : vertices_range(g)) { + if (is_special(v, g) || regions.at(v) < split_region) { + continue; + } + tail_vertices.push_back(v); + } + + for (auto enter : enters) { DEBUG_PRINTF("processing enter %zu\n", g[enter].index); - map<NFAVertex, NFAVertex> orig_to_copy; - - // Make a copy of all of the tail vertices, storing region info along - // the way. - for (auto v : tail_vertices) { - auto v2 = clone_vertex(g, v); - orig_to_copy[v] = v2; - regions[v2] = regions.at(v); - } - - // Wire up the edges: edges from previous regions come from the - // original vertices, while edges internal to and beyond the split - // region go to the copies. - - for (const auto &m : orig_to_copy) { - NFAVertex v = m.first, v2 = m.second; - - for (const auto &e : out_edges_range(v, g)) { - NFAVertex t = target(e, g); - u32 t_region = regions.at(t); - if (t_region >= split_region && !is_special(t, g)) { - assert(contains(orig_to_copy, t)); - t = orig_to_copy[t]; - } - add_edge_if_not_present(v2, t, g[e], g); - } - - for (const auto &e : in_edges_range(v, g)) { - NFAVertex u = source(e, g); - if (regions.at(u) >= split_region && !is_special(u, g)) { - assert(contains(orig_to_copy, u)); - u = orig_to_copy[u]; - } - add_edge_if_not_present(u, v2, g[e], g); - } - - } - - // Clear the in-edges from earlier regions of the OTHER enters for this - // copy of the split region. - for (auto v : enters) { - if (v == enter) { - continue; - } - - remove_in_edge_if(orig_to_copy[v], - [&](const NFAEdge &e) { - NFAVertex u = source(e, g); - return regions.at(u) < split_region; + map<NFAVertex, NFAVertex> orig_to_copy; + + // Make a copy of all of the tail vertices, storing region info along + // the way. + for (auto v : tail_vertices) { + auto v2 = clone_vertex(g, v); + orig_to_copy[v] = v2; + regions[v2] = regions.at(v); + } + + // Wire up the edges: edges from previous regions come from the + // original vertices, while edges internal to and beyond the split + // region go to the copies. + + for (const auto &m : orig_to_copy) { + NFAVertex v = m.first, v2 = m.second; + + for (const auto &e : out_edges_range(v, g)) { + NFAVertex t = target(e, g); + u32 t_region = regions.at(t); + if (t_region >= split_region && !is_special(t, g)) { + assert(contains(orig_to_copy, t)); + t = orig_to_copy[t]; + } + add_edge_if_not_present(v2, t, g[e], g); + } + + for (const auto &e : in_edges_range(v, g)) { + NFAVertex u = source(e, g); + if (regions.at(u) >= split_region && !is_special(u, g)) { + assert(contains(orig_to_copy, u)); + u = orig_to_copy[u]; + } + add_edge_if_not_present(u, v2, g[e], g); + } + + } + + // Clear the in-edges from earlier regions of the OTHER enters for this + // copy of the split region. + for (auto v : enters) { + if (v == enter) { + continue; + } + + remove_in_edge_if(orig_to_copy[v], + [&](const NFAEdge &e) { + NFAVertex u = source(e, g); + return regions.at(u) < split_region; }, g); - } - - new_enters.push_back(orig_to_copy[enter]); - } - - // Remove the original set of tail vertices. - remove_vertices(tail_vertices, g); - pruneUseless(g); - regions = assignRegions(g); - - enters.swap(new_enters); -} - -static -bool doTreePlanningIntl(NGHolder &g, + } + + new_enters.push_back(orig_to_copy[enter]); + } + + // Remove the original set of tail vertices. + remove_vertices(tail_vertices, g); + pruneUseless(g); + regions = assignRegions(g); + + enters.swap(new_enters); +} + +static +bool doTreePlanningIntl(NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator picked, u32 bad_region, - u32 parent_plan, + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator picked, u32 bad_region, + u32 parent_plan, const unordered_map<NFAVertex, NFAVertex> ©_to_orig, - vector<som_plan> &plan, const Grey &grey) { - assert(picked != info.end()); - - DEBUG_PRINTF("picked=%u\n", picked->first); - DEBUG_PRINTF("parent is %u\n", parent_plan); - - map<u32, region_info>::const_iterator furthest; - - bool to_end = false; - while (!to_end) { - DEBUG_PRINTF("picked is %u\n", picked->first); - DEBUG_PRINTF("first bad region now %u\n", bad_region); - - furthest = info.find(bad_region); /* first bad */ - if (furthest == info.end()) { - DEBUG_PRINTF("no partition\n"); - return false; - } - --furthest; /* last region we can establish som for */ - - if (furthest->first <= picked->first) { - DEBUG_PRINTF("failed to make any progress\n"); - return false; - } - - map<u32, region_info>::const_iterator furthest_lock = furthest; - CharReach next_escapes; - bool lock_found; - /* The last possible lock in the range that we examine should be the - * best. If the previous plan is a lock, this follow as any early lock - * must have a reach that is a subset of the last plan's lock. If the - * last plan is a resetting plan ..., ?is this true? */ - do { - lock_found = isPossibleLock(g, furthest_lock, info, - &next_escapes); - } while (!lock_found && (--furthest_lock)->first > picked->first); - DEBUG_PRINTF("lock possible? %d\n", (int)lock_found); - - if (lock_found && !isMandRegionBetween(picked, furthest_lock)) { - lock_found = false; - } - - if (!isMandRegionBetween(picked, furthest)) { - return false; - } - - /* There is no certainty that the som at a reset location will always - * go forward */ - if (plan[parent_plan].is_reset && lock_found) { - NGHolder midfix; - DEBUG_PRINTF("checking if midfix is suitable for lock\n"); - fillHolderForLockCheck(&midfix, g, info, furthest_lock); - - if (!firstMatchIsFirst(midfix)) { - DEBUG_PRINTF("not stuck\n"); - lock_found = false; - } - } - - if (!addPlan(plan, parent_plan)) { - return false; - } - - to_end = false; - - if (lock_found && next_escapes.none()) { - picked = furthest_lock; - to_end = true; - } - - if (!to_end) { - NGHolder conservative_midfix; /* for use in reset, exsl analysis */ - fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); - dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey); - - u32 old_bad_region = bad_region; - to_end = advancePlan(g, regions, conservative_midfix, lock_found, - picked, furthest, furthest_lock, next_escapes, - plan.back(), &bad_region); - if (!to_end - && bad_region <= old_bad_region) { /* we failed to progress */ - DEBUG_PRINTF("failed to make any progress\n"); - return false; - } - } - - /* handle direct edge to accepts from region */ - if (edge(furthest->second.exits.front(), g.accept, g).second - || edge(furthest->second.exits.front(), g.acceptEod, g).second) { - map<u32, region_info>::const_iterator it = furthest; - do { - addMappedReporterVertices(it->second, g, copy_to_orig, - plan.back().reporters_in); - } while (it != info.begin() && it->second.optional && (it--)->first); - } - - /* create second prefix */ - plan.back().prefix = makePrefix(g, regions, furthest->second, - next(furthest)->second); - parent_plan = plan.size() - 1; - } - - // The last region contributes reporters. If it's optional, the regions - // before it do as well. - map<u32, region_info>::const_reverse_iterator it = info.rbegin(); - do { - DEBUG_PRINTF("add mapped reporters for region %u\n", it->first); - addMappedReporterVertices(it->second, g, copy_to_orig, - plan.back().reporters); - } while (it->second.optional && it != info.rend() && - (++it)->first > furthest->first); - - return true; -} - -static -bool doTreePlanning(NGHolder &g, - map<u32, region_info>::const_iterator presplit, - map<u32, region_info>::const_iterator picked, - vector<som_plan> &plan, const Grey &grey) { - DEBUG_PRINTF("picked is %u\n", picked->first); - DEBUG_PRINTF("presplit is %u\n", presplit->first); - - map<u32, region_info>::const_iterator splitter = next(presplit); - vector<NFAVertex> enters = splitter->second.enters; // mutable copy - DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size()); - - if (enters.size() <= 1) { - // TODO: Splitting a region with one entry won't get us anywhere, but - // it shouldn't create buggy analyses either. See UE-1892. - DEBUG_PRINTF("nothing to split\n"); - return false; - } - - if (plan.size() + enters.size() > MAX_SOM_PLANS) { - DEBUG_PRINTF("splitting this tree would hit the plan limit.\n"); - return false; - } - - assert(!plan.empty()); - const u32 parent_plan = plan.size() - 1; - - // Make a copy of the graph, with the subgraph under each enter vertex - // duplicated without the edges into the other enter vertices. - // NOTE WELL: this will invalidate 'info' from the split point, but it's - // OK... we don't use it after this. - auto g_regions = assignRegions(g); - expandGraph(g, g_regions, enters); - dumpHolder(g, g_regions, 14, "som_expandedtree", grey); - - for (auto v : enters) { + vector<som_plan> &plan, const Grey &grey) { + assert(picked != info.end()); + + DEBUG_PRINTF("picked=%u\n", picked->first); + DEBUG_PRINTF("parent is %u\n", parent_plan); + + map<u32, region_info>::const_iterator furthest; + + bool to_end = false; + while (!to_end) { + DEBUG_PRINTF("picked is %u\n", picked->first); + DEBUG_PRINTF("first bad region now %u\n", bad_region); + + furthest = info.find(bad_region); /* first bad */ + if (furthest == info.end()) { + DEBUG_PRINTF("no partition\n"); + return false; + } + --furthest; /* last region we can establish som for */ + + if (furthest->first <= picked->first) { + DEBUG_PRINTF("failed to make any progress\n"); + return false; + } + + map<u32, region_info>::const_iterator furthest_lock = furthest; + CharReach next_escapes; + bool lock_found; + /* The last possible lock in the range that we examine should be the + * best. If the previous plan is a lock, this follow as any early lock + * must have a reach that is a subset of the last plan's lock. If the + * last plan is a resetting plan ..., ?is this true? */ + do { + lock_found = isPossibleLock(g, furthest_lock, info, + &next_escapes); + } while (!lock_found && (--furthest_lock)->first > picked->first); + DEBUG_PRINTF("lock possible? %d\n", (int)lock_found); + + if (lock_found && !isMandRegionBetween(picked, furthest_lock)) { + lock_found = false; + } + + if (!isMandRegionBetween(picked, furthest)) { + return false; + } + + /* There is no certainty that the som at a reset location will always + * go forward */ + if (plan[parent_plan].is_reset && lock_found) { + NGHolder midfix; + DEBUG_PRINTF("checking if midfix is suitable for lock\n"); + fillHolderForLockCheck(&midfix, g, info, furthest_lock); + + if (!firstMatchIsFirst(midfix)) { + DEBUG_PRINTF("not stuck\n"); + lock_found = false; + } + } + + if (!addPlan(plan, parent_plan)) { + return false; + } + + to_end = false; + + if (lock_found && next_escapes.none()) { + picked = furthest_lock; + to_end = true; + } + + if (!to_end) { + NGHolder conservative_midfix; /* for use in reset, exsl analysis */ + fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); + dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey); + + u32 old_bad_region = bad_region; + to_end = advancePlan(g, regions, conservative_midfix, lock_found, + picked, furthest, furthest_lock, next_escapes, + plan.back(), &bad_region); + if (!to_end + && bad_region <= old_bad_region) { /* we failed to progress */ + DEBUG_PRINTF("failed to make any progress\n"); + return false; + } + } + + /* handle direct edge to accepts from region */ + if (edge(furthest->second.exits.front(), g.accept, g).second + || edge(furthest->second.exits.front(), g.acceptEod, g).second) { + map<u32, region_info>::const_iterator it = furthest; + do { + addMappedReporterVertices(it->second, g, copy_to_orig, + plan.back().reporters_in); + } while (it != info.begin() && it->second.optional && (it--)->first); + } + + /* create second prefix */ + plan.back().prefix = makePrefix(g, regions, furthest->second, + next(furthest)->second); + parent_plan = plan.size() - 1; + } + + // The last region contributes reporters. If it's optional, the regions + // before it do as well. + map<u32, region_info>::const_reverse_iterator it = info.rbegin(); + do { + DEBUG_PRINTF("add mapped reporters for region %u\n", it->first); + addMappedReporterVertices(it->second, g, copy_to_orig, + plan.back().reporters); + } while (it->second.optional && it != info.rend() && + (++it)->first > furthest->first); + + return true; +} + +static +bool doTreePlanning(NGHolder &g, + map<u32, region_info>::const_iterator presplit, + map<u32, region_info>::const_iterator picked, + vector<som_plan> &plan, const Grey &grey) { + DEBUG_PRINTF("picked is %u\n", picked->first); + DEBUG_PRINTF("presplit is %u\n", presplit->first); + + map<u32, region_info>::const_iterator splitter = next(presplit); + vector<NFAVertex> enters = splitter->second.enters; // mutable copy + DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size()); + + if (enters.size() <= 1) { + // TODO: Splitting a region with one entry won't get us anywhere, but + // it shouldn't create buggy analyses either. See UE-1892. + DEBUG_PRINTF("nothing to split\n"); + return false; + } + + if (plan.size() + enters.size() > MAX_SOM_PLANS) { + DEBUG_PRINTF("splitting this tree would hit the plan limit.\n"); + return false; + } + + assert(!plan.empty()); + const u32 parent_plan = plan.size() - 1; + + // Make a copy of the graph, with the subgraph under each enter vertex + // duplicated without the edges into the other enter vertices. + // NOTE WELL: this will invalidate 'info' from the split point, but it's + // OK... we don't use it after this. + auto g_regions = assignRegions(g); + expandGraph(g, g_regions, enters); + dumpHolder(g, g_regions, 14, "som_expandedtree", grey); + + for (auto v : enters) { DEBUG_PRINTF("enter %zu\n", g[v].index); - - // For this entry vertex, construct a version of the graph without the - // other entries in this region (g_path), and calculate its depths and - // regions. - - NGHolder g_path; + + // For this entry vertex, construct a version of the graph without the + // other entries in this region (g_path), and calculate its depths and + // regions. + + NGHolder g_path; unordered_map<NFAVertex, NFAVertex> orig_to_copy; - cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy); - auto regions = assignRegions(g_path); - dumpHolder(g_path, regions, 14, "som_treepath", grey); - - map<u32, region_info> path_info; - buildRegionMapping(g_path, regions, path_info); - - // Translate 'picked' to the corresponding region iterator over the - // g_path graph. we can't trust the numbering, so we use a vertex - // instead. - NFAVertex picked_v = picked->second.enters.front(); - assert(contains(orig_to_copy, picked_v)); - u32 picked_region = regions.at(orig_to_copy[picked_v]); - map<u32, region_info>::const_iterator path_pick = - path_info.find(picked_region); - if (path_pick == path_info.end()) { - assert(0); // odd - return false; - } - - // Similarly, find our bad_region. - assert(contains(orig_to_copy, v)); - u32 bad_region = regions.at(orig_to_copy[v]); - - // It's possible that the region may have grown to include its - // successors, in which case we (currently) run screaming. Just - // checking the size should be sufficient here. - if (picked->second.full.size() != path_pick->second.full.size()) { - DEBUG_PRINTF("picked region has grown, bailing\n"); - return false; - } - - // Construct reverse mapping from vertices in g_path to g. + cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy); + auto regions = assignRegions(g_path); + dumpHolder(g_path, regions, 14, "som_treepath", grey); + + map<u32, region_info> path_info; + buildRegionMapping(g_path, regions, path_info); + + // Translate 'picked' to the corresponding region iterator over the + // g_path graph. we can't trust the numbering, so we use a vertex + // instead. + NFAVertex picked_v = picked->second.enters.front(); + assert(contains(orig_to_copy, picked_v)); + u32 picked_region = regions.at(orig_to_copy[picked_v]); + map<u32, region_info>::const_iterator path_pick = + path_info.find(picked_region); + if (path_pick == path_info.end()) { + assert(0); // odd + return false; + } + + // Similarly, find our bad_region. + assert(contains(orig_to_copy, v)); + u32 bad_region = regions.at(orig_to_copy[v]); + + // It's possible that the region may have grown to include its + // successors, in which case we (currently) run screaming. Just + // checking the size should be sufficient here. + if (picked->second.full.size() != path_pick->second.full.size()) { + DEBUG_PRINTF("picked region has grown, bailing\n"); + return false; + } + + // Construct reverse mapping from vertices in g_path to g. unordered_map<NFAVertex, NFAVertex> copy_to_orig; - for (const auto &m : orig_to_copy) { - copy_to_orig.insert(make_pair(m.second, m.first)); - } - - bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick, - bad_region, parent_plan, - copy_to_orig, plan, grey); - if (!to_end) { - return false; - } - } - - return true; -} - -enum dsp_behaviour { - ALLOW_MODIFY_HOLDER, - DISALLOW_MODIFY_HOLDER /* say no to tree planning */ -}; - -static -bool doSomPlanning(NGHolder &g, bool stuck_in, + for (const auto &m : orig_to_copy) { + copy_to_orig.insert(make_pair(m.second, m.first)); + } + + bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick, + bad_region, parent_plan, + copy_to_orig, plan, grey); + if (!to_end) { + return false; + } + } + + return true; +} + +enum dsp_behaviour { + ALLOW_MODIFY_HOLDER, + DISALLOW_MODIFY_HOLDER /* say no to tree planning */ +}; + +static +bool doSomPlanning(NGHolder &g, bool stuck_in, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator picked, - vector<som_plan> &plan, - const Grey &grey, - dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) { - DEBUG_PRINTF("in picked is %u\n", picked->first); - - /* Need to verify how far the lock covers */ - u32 bad_region; - NGHolder *ap_pref = plan.back().prefix.get(); - NGHolder ap_temp; - if (hasBigCycles(*ap_pref)) { - fillRoughMidfix(&ap_temp, g, regions, info, picked); - ap_pref = &ap_temp; - } - - bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked, - picked, picked, plan.back().escapes, - plan.back(), &bad_region); - - if (to_end) { - DEBUG_PRINTF("advanced through the whole graph in one go!\n"); - addReporterVertices(g, plan.back().reporters); - return true; - } - - map<u32, region_info>::const_iterator prev_furthest = picked; - map<u32, region_info>::const_iterator furthest; - - furthest = info.find(bad_region); /* first bad */ - if (furthest == info.begin() || furthest == info.end()) { - DEBUG_PRINTF("no partition\n"); - return false; - } - --furthest; /* last region we can establish som for */ - - if (furthest->first <= picked->first) { - do_tree: - /* unable to establish SoM past the last picked region */ - if (behaviour == DISALLOW_MODIFY_HOLDER) { - /* tree planning mutates the graph */ - return false; - } - - DEBUG_PRINTF("failed to make any progress\n"); - assert(!plan.empty()); - if (plan.size() == 1) { - DEBUG_PRINTF("not handling initial alternations yet\n"); - return false; - } - plan.pop_back(); - return doTreePlanning(g, furthest, prev_furthest, plan, grey); - } - - furthest = picked; - while (!to_end) { - prev_furthest = furthest; - - DEBUG_PRINTF("prev further is %u\n", prev_furthest->first); - DEBUG_PRINTF("first bad region now %u\n", bad_region); - - furthest = info.find(bad_region); /* first bad */ - if (furthest == info.begin() || furthest == info.end()) { - DEBUG_PRINTF("no partition\n"); - return false; - } - --furthest; /* last region we can establish som for */ - - map<u32, region_info>::const_iterator furthest_lock = furthest; - CharReach next_escapes; - bool stuck; - do { - stuck = isPossibleLock(g, furthest_lock, info, &next_escapes); - } while (!stuck && (--furthest_lock)->first > prev_furthest->first); - DEBUG_PRINTF("lock possible? %d\n", (int)stuck); - DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first); - - if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) { - stuck = false; - } - - if (!isMandRegionBetween(prev_furthest, furthest)) { - DEBUG_PRINTF("no mand region between %u and %u\n", - prev_furthest->first, furthest->first); - return false; - } - - /* There is no certainty that the som at a reset location will always - * go forward */ - if (plan.back().is_reset && stuck) { - NGHolder midfix; - fillHolderForLockCheck(&midfix, g, info, furthest_lock); - - DEBUG_PRINTF("checking if midfix is suitable for lock\n"); - if (!firstMatchIsFirst(midfix)) { - DEBUG_PRINTF("not stuck\n"); - stuck = false; - } - } - - assert(!plan.empty()); - if (!addPlan(plan, plan.size() - 1)) { - return false; - } - - to_end = false; - - if (stuck && next_escapes.none()) { - picked = furthest_lock; - to_end = true; - } - - if (!to_end) { - NGHolder conservative_midfix; /* for use in reset, exsl analysis */ - fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); - - u32 old_bad_region = bad_region; - to_end = advancePlan(g, regions, conservative_midfix, stuck, picked, - furthest, furthest_lock, next_escapes, - plan.back(), &bad_region); - - if (!to_end - && bad_region <= old_bad_region) { /* we failed to progress */ - goto do_tree; - } - } - - /* handle direct edge to accepts from region */ - if (edge(furthest->second.exits.front(), g.accept, g).second - || edge(furthest->second.exits.front(), g.acceptEod, g).second) { - map<u32, region_info>::const_iterator it = furthest; - do { - DEBUG_PRINTF("direct edge to accept from region %u\n", - it->first); - addReporterVertices(it->second, g, plan.back().reporters_in); - } while (it != info.begin() && it->second.optional - && (it--)->first); - } - - /* create second prefix */ - plan.back().prefix = makePrefix(g, regions, furthest->second, - next(furthest)->second); - } - DEBUG_PRINTF("(final) picked is %u\n", picked->first); - - // The last region contributes reporters. If it's optional, the regions - // before it do as well. - map<u32, region_info>::const_reverse_iterator it = info.rbegin(); - do { - DEBUG_PRINTF("region %u contributes reporters to last plan\n", - it->first); - addReporterVertices(it->second, g, plan.back().reporters); - } while (it->second.optional && it != info.rend() && - (++it)->first > furthest->first); - - DEBUG_PRINTF("done!\n"); - return true; -} - -static -void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, - UNUSED size_t num) { -#if defined(DEBUG) || defined(DUMP_PLANS) - DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, " - "parent=%u\n", - num, p.prefix.get(), - describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(), - p.is_reset, p.parent); - printf(" reporters:"); - for (auto v : p.reporters) { + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator picked, + vector<som_plan> &plan, + const Grey &grey, + dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) { + DEBUG_PRINTF("in picked is %u\n", picked->first); + + /* Need to verify how far the lock covers */ + u32 bad_region; + NGHolder *ap_pref = plan.back().prefix.get(); + NGHolder ap_temp; + if (hasBigCycles(*ap_pref)) { + fillRoughMidfix(&ap_temp, g, regions, info, picked); + ap_pref = &ap_temp; + } + + bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked, + picked, picked, plan.back().escapes, + plan.back(), &bad_region); + + if (to_end) { + DEBUG_PRINTF("advanced through the whole graph in one go!\n"); + addReporterVertices(g, plan.back().reporters); + return true; + } + + map<u32, region_info>::const_iterator prev_furthest = picked; + map<u32, region_info>::const_iterator furthest; + + furthest = info.find(bad_region); /* first bad */ + if (furthest == info.begin() || furthest == info.end()) { + DEBUG_PRINTF("no partition\n"); + return false; + } + --furthest; /* last region we can establish som for */ + + if (furthest->first <= picked->first) { + do_tree: + /* unable to establish SoM past the last picked region */ + if (behaviour == DISALLOW_MODIFY_HOLDER) { + /* tree planning mutates the graph */ + return false; + } + + DEBUG_PRINTF("failed to make any progress\n"); + assert(!plan.empty()); + if (plan.size() == 1) { + DEBUG_PRINTF("not handling initial alternations yet\n"); + return false; + } + plan.pop_back(); + return doTreePlanning(g, furthest, prev_furthest, plan, grey); + } + + furthest = picked; + while (!to_end) { + prev_furthest = furthest; + + DEBUG_PRINTF("prev further is %u\n", prev_furthest->first); + DEBUG_PRINTF("first bad region now %u\n", bad_region); + + furthest = info.find(bad_region); /* first bad */ + if (furthest == info.begin() || furthest == info.end()) { + DEBUG_PRINTF("no partition\n"); + return false; + } + --furthest; /* last region we can establish som for */ + + map<u32, region_info>::const_iterator furthest_lock = furthest; + CharReach next_escapes; + bool stuck; + do { + stuck = isPossibleLock(g, furthest_lock, info, &next_escapes); + } while (!stuck && (--furthest_lock)->first > prev_furthest->first); + DEBUG_PRINTF("lock possible? %d\n", (int)stuck); + DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first); + + if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) { + stuck = false; + } + + if (!isMandRegionBetween(prev_furthest, furthest)) { + DEBUG_PRINTF("no mand region between %u and %u\n", + prev_furthest->first, furthest->first); + return false; + } + + /* There is no certainty that the som at a reset location will always + * go forward */ + if (plan.back().is_reset && stuck) { + NGHolder midfix; + fillHolderForLockCheck(&midfix, g, info, furthest_lock); + + DEBUG_PRINTF("checking if midfix is suitable for lock\n"); + if (!firstMatchIsFirst(midfix)) { + DEBUG_PRINTF("not stuck\n"); + stuck = false; + } + } + + assert(!plan.empty()); + if (!addPlan(plan, plan.size() - 1)) { + return false; + } + + to_end = false; + + if (stuck && next_escapes.none()) { + picked = furthest_lock; + to_end = true; + } + + if (!to_end) { + NGHolder conservative_midfix; /* for use in reset, exsl analysis */ + fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); + + u32 old_bad_region = bad_region; + to_end = advancePlan(g, regions, conservative_midfix, stuck, picked, + furthest, furthest_lock, next_escapes, + plan.back(), &bad_region); + + if (!to_end + && bad_region <= old_bad_region) { /* we failed to progress */ + goto do_tree; + } + } + + /* handle direct edge to accepts from region */ + if (edge(furthest->second.exits.front(), g.accept, g).second + || edge(furthest->second.exits.front(), g.acceptEod, g).second) { + map<u32, region_info>::const_iterator it = furthest; + do { + DEBUG_PRINTF("direct edge to accept from region %u\n", + it->first); + addReporterVertices(it->second, g, plan.back().reporters_in); + } while (it != info.begin() && it->second.optional + && (it--)->first); + } + + /* create second prefix */ + plan.back().prefix = makePrefix(g, regions, furthest->second, + next(furthest)->second); + } + DEBUG_PRINTF("(final) picked is %u\n", picked->first); + + // The last region contributes reporters. If it's optional, the regions + // before it do as well. + map<u32, region_info>::const_reverse_iterator it = info.rbegin(); + do { + DEBUG_PRINTF("region %u contributes reporters to last plan\n", + it->first); + addReporterVertices(it->second, g, plan.back().reporters); + } while (it->second.optional && it != info.rend() && + (++it)->first > furthest->first); + + DEBUG_PRINTF("done!\n"); + return true; +} + +static +void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, + UNUSED size_t num) { +#if defined(DEBUG) || defined(DUMP_PLANS) + DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, " + "parent=%u\n", + num, p.prefix.get(), + describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(), + p.is_reset, p.parent); + printf(" reporters:"); + for (auto v : p.reporters) { printf(" %zu", g[v].index); - } - printf("\n"); - printf(" reporters_in:"); - for (auto v : p.reporters_in) { + } + printf("\n"); + printf(" reporters_in:"); + for (auto v : p.reporters_in) { printf(" %zu", g[v].index); - } - printf("\n"); -#endif -} - -/** - * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too - * large exception as (1) if previous ng modification have been applied (other - * midfixes have been applied), ng will be an undefined state on return and (2) - * if the head of a pattern cannot be implemented we are generally unable to - * implement the full pattern. - */ -static + } + printf("\n"); +#endif +} + +/** + * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too + * large exception as (1) if previous ng modification have been applied (other + * midfixes have been applied), ng will be an undefined state on return and (2) + * if the head of a pattern cannot be implemented we are generally unable to + * implement the full pattern. + */ +static void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, NGHolder &g, vector<som_plan> &plan, const u32 first_som_slot) { - ReportManager &rm = ng.rm; - SomSlotManager &ssm = ng.ssm; - - DEBUG_PRINTF("%zu plans\n", plan.size()); - assert(plan.size() <= MAX_SOM_PLANS); - assert(!plan.empty()); - - vector<u32> som_slots(plan.size()); - som_slots[0] = first_som_slot; - - // Root plan, which already has a SOM slot assigned (first_som_slot). - dumpSomPlan(g, plan.front(), 0); + ReportManager &rm = ng.rm; + SomSlotManager &ssm = ng.ssm; + + DEBUG_PRINTF("%zu plans\n", plan.size()); + assert(plan.size() <= MAX_SOM_PLANS); + assert(!plan.empty()); + + vector<u32> som_slots(plan.size()); + som_slots[0] = first_som_slot; + + // Root plan, which already has a SOM slot assigned (first_som_slot). + dumpSomPlan(g, plan.front(), 0); dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, ng.cc.grey); - assert(plan.front().prefix); - if (plan.front().escapes.any() && !plan.front().is_reset) { - /* setup escaper for first som location */ - if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, - first_som_slot)) { + assert(plan.front().prefix); + if (plan.front().escapes.any() && !plan.front().is_reset) { + /* setup escaper for first som location */ + if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, + first_som_slot)) { throw CompileError(expr.index, "Pattern is too large."); - } - } - - assert(plan.front().reporters_in.empty()); - updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot); - - // Tree of plans, encoded in a vector. - vector<som_plan>::const_iterator it = plan.begin(); - for (++it; it != plan.end(); ++it) { - const u32 plan_num = it - plan.begin(); - dumpSomPlan(g, *it, plan_num); + } + } + + assert(plan.front().reporters_in.empty()); + updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot); + + // Tree of plans, encoded in a vector. + vector<som_plan>::const_iterator it = plan.begin(); + for (++it; it != plan.end(); ++it) { + const u32 plan_num = it - plan.begin(); + dumpSomPlan(g, *it, plan_num); dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, - plan_num, ng.cc.grey); - - assert(it->parent < plan_num); - u32 som_slot_in = som_slots[it->parent]; - u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes, - it->is_reset, som_slot_in); - som_slots[plan_num] = som_slot_out; - - assert(!it->no_implement); - if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { + plan_num, ng.cc.grey); + + assert(it->parent < plan_num); + u32 som_slot_in = som_slots[it->parent]; + u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes, + it->is_reset, som_slot_in); + som_slots[plan_num] = som_slot_out; + + assert(!it->no_implement); + if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { throw CompileError(expr.index, "Pattern is too large."); - } - updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); - updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); - } - - /* create prefix to set the som_loc */ - if (!plan.front().no_implement) { + } + updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); + updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); + } + + /* create prefix to set the som_loc */ + if (!plan.front().no_implement) { renumber_vertices(*plan.front().prefix); - assert(plan.front().prefix->kind == NFA_OUTFIX); - if (!ng.addHolder(*plan.front().prefix)) { + assert(plan.front().prefix->kind == NFA_OUTFIX); + if (!ng.addHolder(*plan.front().prefix)) { throw CompileError(expr.index, "Pattern is too large."); - } - } -} - -static -void anchorStarts(NGHolder &g) { - vector<NFAEdge> dead; - for (const auto &e : out_edges_range(g.startDs, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - add_edge_if_not_present(g.start, v, g[e], g); - dead.push_back(e); - } - remove_edges(dead, g); -} - -static -void setZeroReports(NGHolder &g) { - set<NFAVertex> acceptors; - insert(&acceptors, inv_adjacent_vertices(g.accept, g)); - insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g)); - acceptors.erase(g.accept); - - for (auto v : vertices_range(g)) { - auto &reports = g[v].reports; - reports.clear(); - - if (!contains(acceptors, v)) { - continue; - } - - // We use the report ID to store the offset adjustment used for virtual - // starts. - - if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { - reports.insert(1); - } else { - reports.insert(0); - } - } -} - -/* updates the reports on all vertices leading to the sink */ -static -void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink, - const ReportID report, const u32 comp_id) { - // Construct replacement report. - Report ir = rm.getReport(report); - ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA; - ir.revNfaIndex = comp_id; - ReportID new_report = rm.getInternalId(ir); - - for (auto v : inv_adjacent_vertices_range(sink, g)) { - if (v == g.accept) { - continue; - } - - auto &r = g[v].reports; - if (contains(r, report)) { - r.erase(report); - r.insert(new_report); - } - } -} - -static -void clearProperInEdges(NGHolder &g, const NFAVertex sink) { - vector<NFAEdge> dead; - for (const auto &e : in_edges_range(sink, g)) { - if (source(e, g) == g.accept) { - continue; - } - dead.push_back(e); - } - - if (dead.empty()) { - return; - } - - remove_edges(dead, g); - pruneUseless(g, false); -} - -namespace { -struct SomRevNfa { + } + } +} + +static +void anchorStarts(NGHolder &g) { + vector<NFAEdge> dead; + for (const auto &e : out_edges_range(g.startDs, g)) { + NFAVertex v = target(e, g); + if (v == g.startDs) { + continue; + } + add_edge_if_not_present(g.start, v, g[e], g); + dead.push_back(e); + } + remove_edges(dead, g); +} + +static +void setZeroReports(NGHolder &g) { + set<NFAVertex> acceptors; + insert(&acceptors, inv_adjacent_vertices(g.accept, g)); + insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g)); + acceptors.erase(g.accept); + + for (auto v : vertices_range(g)) { + auto &reports = g[v].reports; + reports.clear(); + + if (!contains(acceptors, v)) { + continue; + } + + // We use the report ID to store the offset adjustment used for virtual + // starts. + + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { + reports.insert(1); + } else { + reports.insert(0); + } + } +} + +/* updates the reports on all vertices leading to the sink */ +static +void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink, + const ReportID report, const u32 comp_id) { + // Construct replacement report. + Report ir = rm.getReport(report); + ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA; + ir.revNfaIndex = comp_id; + ReportID new_report = rm.getInternalId(ir); + + for (auto v : inv_adjacent_vertices_range(sink, g)) { + if (v == g.accept) { + continue; + } + + auto &r = g[v].reports; + if (contains(r, report)) { + r.erase(report); + r.insert(new_report); + } + } +} + +static +void clearProperInEdges(NGHolder &g, const NFAVertex sink) { + vector<NFAEdge> dead; + for (const auto &e : in_edges_range(sink, g)) { + if (source(e, g) == g.accept) { + continue; + } + dead.push_back(e); + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, g); + pruneUseless(g, false); +} + +namespace { +struct SomRevNfa { SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n) - : sink(s), report(r), nfa(move(n)) {} - NFAVertex sink; - ReportID report; + : sink(s), report(r), nfa(move(n)) {} + NFAVertex sink; + ReportID report; bytecode_ptr<NFA> nfa; -}; -} - -static +}; +} + +static bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g, const CompileContext &cc) { - // Create a reversed anchored version of this NFA which fires a zero report - // ID on accept. - NGHolder g_rev; - reverseHolder(g, g_rev); - anchorStarts(g_rev); - setZeroReports(g_rev); - - // Prep for actual construction. + // Create a reversed anchored version of this NFA which fires a zero report + // ID on accept. + NGHolder g_rev; + reverseHolder(g, g_rev); + anchorStarts(g_rev); + setZeroReports(g_rev); + + // Prep for actual construction. renumber_vertices(g_rev); - g_rev.kind = NFA_REV_PREFIX; - reduceGraphEquivalences(g_rev, cc); - removeRedundancy(g_rev, SOM_NONE); - - DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev)); - + g_rev.kind = NFA_REV_PREFIX; + reduceGraphEquivalences(g_rev, cc); + removeRedundancy(g_rev, SOM_NONE); + + DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev)); + auto nfa = constructReversedNFA(g_rev, cc); - if (!nfa) { - return nfa; - } - - // Set some useful properties. - depth maxWidth = findMaxWidth(g); - if (maxWidth.is_finite()) { - nfa->maxWidth = (u32)maxWidth; - } else { - nfa->maxWidth = 0; - } - depth minWidth = findMinWidth(g); - nfa->minWidth = (u32)minWidth; - - return nfa; -} - -static -bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g, - const ReportID report, const NFAVertex sink, - const CompileContext &cc) { - // Clone the graph with ONLY the given report vertices on the given sink. - NGHolder g2; - cloneHolder(g2, g); - clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept); - pruneAllOtherReports(g2, report); - - if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) { - DEBUG_PRINTF("no work to do for this sink\n"); - return true; - } - + if (!nfa) { + return nfa; + } + + // Set some useful properties. + depth maxWidth = findMaxWidth(g); + if (maxWidth.is_finite()) { + nfa->maxWidth = (u32)maxWidth; + } else { + nfa->maxWidth = 0; + } + depth minWidth = findMinWidth(g); + nfa->minWidth = (u32)minWidth; + + return nfa; +} + +static +bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g, + const ReportID report, const NFAVertex sink, + const CompileContext &cc) { + // Clone the graph with ONLY the given report vertices on the given sink. + NGHolder g2; + cloneHolder(g2, g); + clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept); + pruneAllOtherReports(g2, report); + + if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) { + DEBUG_PRINTF("no work to do for this sink\n"); + return true; + } + renumber_vertices(g2); // for findMinWidth, findMaxWidth. - + auto nfa = makeBareSomRevNfa(g2, cc); - if (!nfa) { - DEBUG_PRINTF("couldn't build rev nfa\n"); - return false; - } - - som_nfas.emplace_back(sink, report, move(nfa)); - return true; -} - -static -bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { - ReportManager &rm = ng.rm; - - // FIXME might want to work on a graph without extra redundancy? - depth maxWidth = findMaxWidth(g); - DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); - - if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) { - DEBUG_PRINTF("too wide\n"); - return false; - } - - set<ReportID> reports = all_reports(g); - DEBUG_PRINTF("%zu reports\n", reports.size()); - - // We distinguish between reports and accept/acceptEod sinks in order to - // correctly handle cases which do different things on eod/normal accepts. - // Later, it might be more elegant to do this with a single NFA and - // multi-tops. - - vector<SomRevNfa> som_nfas; - - for (auto report : reports) { - if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) { - return false; - } - if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) { - return false; - } - } - - for (auto &som_nfa : som_nfas) { - assert(som_nfa.nfa); - - // Transfer ownership of the NFA to the SOM slot manager. - u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth); - - // Replace this report on 'g' with a SOM_REV_NFA report pointing at our - // new component. - makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id); - } - - if (ng.cc.streaming) { - assert(ng.ssm.somHistoryRequired() <= - max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); - } - - return true; -} - -static + if (!nfa) { + DEBUG_PRINTF("couldn't build rev nfa\n"); + return false; + } + + som_nfas.emplace_back(sink, report, move(nfa)); + return true; +} + +static +bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { + ReportManager &rm = ng.rm; + + // FIXME might want to work on a graph without extra redundancy? + depth maxWidth = findMaxWidth(g); + DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); + + if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) { + DEBUG_PRINTF("too wide\n"); + return false; + } + + set<ReportID> reports = all_reports(g); + DEBUG_PRINTF("%zu reports\n", reports.size()); + + // We distinguish between reports and accept/acceptEod sinks in order to + // correctly handle cases which do different things on eod/normal accepts. + // Later, it might be more elegant to do this with a single NFA and + // multi-tops. + + vector<SomRevNfa> som_nfas; + + for (auto report : reports) { + if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) { + return false; + } + if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) { + return false; + } + } + + for (auto &som_nfa : som_nfas) { + assert(som_nfa.nfa); + + // Transfer ownership of the NFA to the SOM slot manager. + u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth); + + // Replace this report on 'g' with a SOM_REV_NFA report pointing at our + // new component. + makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id); + } + + if (ng.cc.streaming) { + assert(ng.ssm.somHistoryRequired() <= + max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); + } + + return true; +} + +static u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, - const CompileContext &cc) { - depth maxWidth = findMaxWidth(g); - - assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable)); - assert(all_reports(g).size() == 1); - - auto nfa = makeBareSomRevNfa(g, cc); - if (!nfa) { + const CompileContext &cc) { + depth maxWidth = findMaxWidth(g); + + assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable)); + assert(all_reports(g).size() == 1); + + auto nfa = makeBareSomRevNfa(g, cc); + if (!nfa) { throw CompileError(expr.index, "Pattern is too large."); - } - - if (ng.cc.streaming) { - assert(ng.ssm.somHistoryRequired() <= - max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); - } - - return ng.ssm.addRevNfa(move(nfa), maxWidth); -} - -static -bool is_literable(const NGHolder &g, NFAVertex v) { - const CharReach &cr = g[v].char_reach; - return cr.count() == 1 || cr.isCaselessChar(); -} - -static -void append(ue2_literal &s, const CharReach &cr) { - assert(cr.count() == 1 || cr.isCaselessChar()); - s.push_back(cr.find_first(), cr.isCaselessChar()); -} - -static -map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator lower_bound, - ue2_literal &s_out, const Grey &grey) { -#define MIN_LITERAL_LENGTH 3 - s_out.clear(); - bool past_lower = false; - ue2_literal s; - map<u32, region_info>::const_iterator it; - for (it = info.begin(); it != info.end(); ++it) { - if (it == lower_bound) { - past_lower = true; - } - if (!it->second.optional && it->second.dag - && it->second.full.size() == 1 - && is_literable(g, it->second.full.front())) { - append(s, g[it->second.full.front()].char_reach); - - if (s.length() >= grey.maxHistoryAvailable && past_lower) { - goto exit; - } - } else { - if (past_lower && it != lower_bound - && s.length() >= MIN_LITERAL_LENGTH) { - --it; - goto exit; - } - s.clear(); - } - } - - if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) { - --it; - s_out = s; - return it; - } - exit: - if (s.length() > grey.maxHistoryAvailable) { - ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable; - for (; jt != s.end(); ++jt) { - s_out.push_back(*jt); - } - } else { - s_out = s; - } - return it; -} - -static -bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g, + } + + if (ng.cc.streaming) { + assert(ng.ssm.somHistoryRequired() <= + max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); + } + + return ng.ssm.addRevNfa(move(nfa), maxWidth); +} + +static +bool is_literable(const NGHolder &g, NFAVertex v) { + const CharReach &cr = g[v].char_reach; + return cr.count() == 1 || cr.isCaselessChar(); +} + +static +void append(ue2_literal &s, const CharReach &cr) { + assert(cr.count() == 1 || cr.isCaselessChar()); + s.push_back(cr.find_first(), cr.isCaselessChar()); +} + +static +map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g, + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator lower_bound, + ue2_literal &s_out, const Grey &grey) { +#define MIN_LITERAL_LENGTH 3 + s_out.clear(); + bool past_lower = false; + ue2_literal s; + map<u32, region_info>::const_iterator it; + for (it = info.begin(); it != info.end(); ++it) { + if (it == lower_bound) { + past_lower = true; + } + if (!it->second.optional && it->second.dag + && it->second.full.size() == 1 + && is_literable(g, it->second.full.front())) { + append(s, g[it->second.full.front()].char_reach); + + if (s.length() >= grey.maxHistoryAvailable && past_lower) { + goto exit; + } + } else { + if (past_lower && it != lower_bound + && s.length() >= MIN_LITERAL_LENGTH) { + --it; + goto exit; + } + s.clear(); + } + } + + if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) { + --it; + s_out = s; + return it; + } + exit: + if (s.length() > grey.maxHistoryAvailable) { + ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable; + for (; jt != s.end(); ++jt) { + s_out.push_back(*jt); + } + } else { + s_out = s; + } + return it; +} + +static +bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator picked, - const Grey &grey, - vector<som_plan> *plan) { - DEBUG_PRINTF("trying to chain from %u\n", picked->first); - const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ - - shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, - next(picked)->second); - - // Quick check to stop us from trying this on huge graphs, which causes us - // to spend forever in ng_execute looking at cases that will most like - // fail. See UE-2078. - size_t prefix_size = num_vertices(*prefix); - size_t total_size = num_vertices(g); - assert(total_size >= prefix_size); - if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) { - DEBUG_PRINTF("suffix has %zu vertices, fail\n", - total_size - prefix_size); - return false; - } - - clearReports(*prefix); - for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) { - (*prefix)[u].reports.insert(0); - } - - dumpHolder(*prefix, 0, "full_haiglit_prefix", grey); - - CharReach escapes; - bool stuck = isPossibleLock(g, picked, info, &escapes); - if (stuck) { - NGHolder gg; - fillHolderForLockCheck(&gg, g, info, picked); - - stuck = firstMatchIsFirst(gg); - } - - DEBUG_PRINTF("stuck = %d\n", (int)stuck); - - // Note: no-one should ever pay attention to the root plan's som_loc_in. - plan->emplace_back(prefix, escapes, false, 0); - plan->back().no_implement = true; - - dumpHolder(*plan->back().prefix, 22, "som_prefix", grey); - - /* don't allow tree planning to mutate the graph */ - if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey, - DISALLOW_MODIFY_HOLDER)) { - // Rollback SOM locations. - ssm.rollbackSomTo(numSomLocsBefore); - - DEBUG_PRINTF("fail to chain\n"); - return false; - } - - return true; -} - -static -void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { - ReportID haig_report_id = rose.getNewNfaReport(); - DEBUG_PRINTF("setting report id of %u\n", haig_report_id); - - clearReports(h); - for (auto u : inv_adjacent_vertices_range(h.accept, h)) { - h[u].reports.clear(); - h[u].reports.insert(haig_report_id); - } -} - -static -bool tryHaig(RoseBuild &rose, NGHolder &g, + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator picked, + const Grey &grey, + vector<som_plan> *plan) { + DEBUG_PRINTF("trying to chain from %u\n", picked->first); + const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ + + shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, + next(picked)->second); + + // Quick check to stop us from trying this on huge graphs, which causes us + // to spend forever in ng_execute looking at cases that will most like + // fail. See UE-2078. + size_t prefix_size = num_vertices(*prefix); + size_t total_size = num_vertices(g); + assert(total_size >= prefix_size); + if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) { + DEBUG_PRINTF("suffix has %zu vertices, fail\n", + total_size - prefix_size); + return false; + } + + clearReports(*prefix); + for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) { + (*prefix)[u].reports.insert(0); + } + + dumpHolder(*prefix, 0, "full_haiglit_prefix", grey); + + CharReach escapes; + bool stuck = isPossibleLock(g, picked, info, &escapes); + if (stuck) { + NGHolder gg; + fillHolderForLockCheck(&gg, g, info, picked); + + stuck = firstMatchIsFirst(gg); + } + + DEBUG_PRINTF("stuck = %d\n", (int)stuck); + + // Note: no-one should ever pay attention to the root plan's som_loc_in. + plan->emplace_back(prefix, escapes, false, 0); + plan->back().no_implement = true; + + dumpHolder(*plan->back().prefix, 22, "som_prefix", grey); + + /* don't allow tree planning to mutate the graph */ + if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey, + DISALLOW_MODIFY_HOLDER)) { + // Rollback SOM locations. + ssm.rollbackSomTo(numSomLocsBefore); + + DEBUG_PRINTF("fail to chain\n"); + return false; + } + + return true; +} + +static +void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { + ReportID haig_report_id = rose.getNewNfaReport(); + DEBUG_PRINTF("setting report id of %u\n", haig_report_id); + + clearReports(h); + for (auto u : inv_adjacent_vertices_range(h.accept, h)) { + h[u].reports.clear(); + h[u].reports.insert(haig_report_id); + } +} + +static +bool tryHaig(RoseBuild &rose, NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - som_type som, u32 somPrecision, - map<u32, region_info>::const_iterator picked, - shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix, - const Grey &grey) { - DEBUG_PRINTF("trying to build a haig\n"); - shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, - next(picked)->second); - prefix->kind = NFA_PREFIX; - setReportOnHaigPrefix(rose, *prefix); - dumpHolder(*prefix, 0, "haig_prefix", grey); - vector<vector<CharReach> > triggers; /* empty for prefix */ - *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey); - if (!*haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - *haig_prefix = prefix; - return true; -} - -static -void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix, - const shared_ptr<raw_som_dfa> &haig, - const ue2_literal &lit, const set<ReportID> &reports) { - assert(prefix && haig); - - DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str()); - - RoseInGraph ig; - RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig); - - assert(!reports.empty()); - RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); - - calcVertexOffsets(ig); - - UNUSED bool rv = tb.addSombeRose(ig); - assert(rv); // TODO: recover from addRose failure -} - -static + som_type som, u32 somPrecision, + map<u32, region_info>::const_iterator picked, + shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix, + const Grey &grey) { + DEBUG_PRINTF("trying to build a haig\n"); + shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, + next(picked)->second); + prefix->kind = NFA_PREFIX; + setReportOnHaigPrefix(rose, *prefix); + dumpHolder(*prefix, 0, "haig_prefix", grey); + vector<vector<CharReach> > triggers; /* empty for prefix */ + *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey); + if (!*haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + *haig_prefix = prefix; + return true; +} + +static +void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix, + const shared_ptr<raw_som_dfa> &haig, + const ue2_literal &lit, const set<ReportID> &reports) { + assert(prefix && haig); + + DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str()); + + RoseInGraph ig; + RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); + + add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig); + + assert(!reports.empty()); + RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig); + add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); + + calcVertexOffsets(ig); + + UNUSED bool rv = tb.addSombeRose(ig); + assert(rv); // TODO: recover from addRose failure +} + +static sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - map<u32, region_info>::const_iterator lower_bound) { - DEBUG_PRINTF("entry\n"); - assert(g.kind == NFA_OUTFIX); - const CompileContext &cc = ng.cc; - ReportManager &rm = ng.rm; - SomSlotManager &ssm = ng.ssm; - + const map<u32, region_info> &info, + map<u32, region_info>::const_iterator lower_bound) { + DEBUG_PRINTF("entry\n"); + assert(g.kind == NFA_OUTFIX); + const CompileContext &cc = ng.cc; + ReportManager &rm = ng.rm; + SomSlotManager &ssm = ng.ssm; + if (!cc.grey.allowHaigLit) { - return SOMBE_FAIL; - } - - const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ - u32 som_loc = ssm.getPrivateSomSlot(); - + return SOMBE_FAIL; + } + + const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ + u32 som_loc = ssm.getPrivateSomSlot(); + if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { - // This is an optimisation: if we can't build a Haig from a portion of - // the graph, then we won't be able to manage it as an outfix either - // when we fall back. + // This is an optimisation: if we can't build a Haig from a portion of + // the graph, then we won't be able to manage it as an outfix either + // when we fall back. throw CompileError(expr.index, "Pattern is too large."); - } - - while (1) { - DEBUG_PRINTF("lower bound is %u\n", lower_bound->first); - ue2_literal s; - map<u32, region_info>::const_iterator lit - = findLaterLiteral(g, info, lower_bound, s, cc.grey); - if (lit == info.end()) { - DEBUG_PRINTF("failed to find literal\n"); - ssm.rollbackSomTo(numSomLocsBefore); - return SOMBE_FAIL; - } - DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(), - lit->first); - - if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { - DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n"); - lower_bound = lit; - ++lower_bound; - continue; - } - - shared_ptr<raw_som_dfa> haig; - shared_ptr<NGHolder> haig_prefix; - map<u32, region_info>::const_iterator haig_reg = lit; - - if (edge(lit->second.exits.front(), g.acceptEod, g).second) { - /* TODO: handle */ - ssm.rollbackSomTo(numSomLocsBefore); - return SOMBE_FAIL; - } - - advance(haig_reg, -(s32)s.length()); - - if (!haig_reg->first && haig_reg->second.full.size() == 2) { - /* just starts */ - - /* TODO: make below assertion true, reset checks could be stronger - * (12356) - */ - /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey, - &plan)); */ - - lower_bound = lit; - ++lower_bound; - continue; /* somebody else should have been able to chain */ - } - - bool ok = true; - set<ReportID> rep; - if (next(lit) != info.end()) { - /* non terminal literal */ - - /* TODO: handle edges to accept ? */ - vector<som_plan> plan; - if (edge(lit->second.exits.front(), g.accept, g).second) { - insert(&rep, g[lit->second.exits.front()].reports); - remove_edge(lit->second.exits.front(), g.accept, g); - g[lit->second.exits.front()].reports.clear(); - - /* Note: we can mess with the graph as this is the last literal - * we will find and on failure the graph will be thrown away */ - } - - ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit, - cc.grey, &plan); - ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), - haig_reg, &haig, &haig_prefix, cc.grey); - - if (!ok) { - DEBUG_PRINTF(":( going to next attempt\n"); - goto next_try; - } - + } + + while (1) { + DEBUG_PRINTF("lower bound is %u\n", lower_bound->first); + ue2_literal s; + map<u32, region_info>::const_iterator lit + = findLaterLiteral(g, info, lower_bound, s, cc.grey); + if (lit == info.end()) { + DEBUG_PRINTF("failed to find literal\n"); + ssm.rollbackSomTo(numSomLocsBefore); + return SOMBE_FAIL; + } + DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(), + lit->first); + + if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { + DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n"); + lower_bound = lit; + ++lower_bound; + continue; + } + + shared_ptr<raw_som_dfa> haig; + shared_ptr<NGHolder> haig_prefix; + map<u32, region_info>::const_iterator haig_reg = lit; + + if (edge(lit->second.exits.front(), g.acceptEod, g).second) { + /* TODO: handle */ + ssm.rollbackSomTo(numSomLocsBefore); + return SOMBE_FAIL; + } + + advance(haig_reg, -(s32)s.length()); + + if (!haig_reg->first && haig_reg->second.full.size() == 2) { + /* just starts */ + + /* TODO: make below assertion true, reset checks could be stronger + * (12356) + */ + /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey, + &plan)); */ + + lower_bound = lit; + ++lower_bound; + continue; /* somebody else should have been able to chain */ + } + + bool ok = true; + set<ReportID> rep; + if (next(lit) != info.end()) { + /* non terminal literal */ + + /* TODO: handle edges to accept ? */ + vector<som_plan> plan; + if (edge(lit->second.exits.front(), g.accept, g).second) { + insert(&rep, g[lit->second.exits.front()].reports); + remove_edge(lit->second.exits.front(), g.accept, g); + g[lit->second.exits.front()].reports.clear(); + + /* Note: we can mess with the graph as this is the last literal + * we will find and on failure the graph will be thrown away */ + } + + ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit, + cc.grey, &plan); + ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), + haig_reg, &haig, &haig_prefix, cc.grey); + + if (!ok) { + DEBUG_PRINTF(":( going to next attempt\n"); + goto next_try; + } + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); - - Report ir = makeCallback(0U, 0); - assert(!plan.empty()); - if (plan.front().is_reset) { - ir.type = INTERNAL_SOM_LOC_SET_FROM; - } else { - ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE; - } - ir.onmatch = som_loc; - rep.insert(rm.getInternalId(ir)); - } else { - /* terminal literal */ - ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg, - &haig, &haig_prefix, cc.grey); - - /* find report */ - insert(&rep, g[lit->second.exits.front()].reports); - - /* TODO: som_loc is unused */ - } - - if (ok) { - roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep); - if (next(lit) != info.end()) { - return SOMBE_HANDLED_INTERNAL; - } else { - ssm.rollbackSomTo(numSomLocsBefore); - return SOMBE_HANDLED_ALL; - } - } -next_try: - lower_bound = lit; - ++lower_bound; - } - assert(0); - return SOMBE_FAIL; -} - -static -bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, - set<NFAVertex> *terminals) { - /* TODO: smarter (topo) */ -#define MAX_LEADING_LITERALS 20 - set<NFAVertex> s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); - - set<NFAVertex> sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); - - if (!is_subset_of(s_succ, sds_succ)) { - DEBUG_PRINTF("not floating\n"); - return false; - } - - sds_succ.erase(g.startDs); - - map<NFAVertex, vector<ue2_literal> > curr; - curr[g.startDs].push_back(ue2_literal()); - - map<NFAVertex, set<NFAVertex> > seen; - map<NFAVertex, vector<ue2_literal> > next; - - bool did_expansion = true; - while (did_expansion) { - did_expansion = false; - u32 count = 0; - assert(!curr.empty()); - for (const auto &m : curr) { - const NFAVertex u = m.first; - const vector<ue2_literal> &base = m.second; + + Report ir = makeCallback(0U, 0); + assert(!plan.empty()); + if (plan.front().is_reset) { + ir.type = INTERNAL_SOM_LOC_SET_FROM; + } else { + ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE; + } + ir.onmatch = som_loc; + rep.insert(rm.getInternalId(ir)); + } else { + /* terminal literal */ + ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg, + &haig, &haig_prefix, cc.grey); + + /* find report */ + insert(&rep, g[lit->second.exits.front()].reports); + + /* TODO: som_loc is unused */ + } + + if (ok) { + roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep); + if (next(lit) != info.end()) { + return SOMBE_HANDLED_INTERNAL; + } else { + ssm.rollbackSomTo(numSomLocsBefore); + return SOMBE_HANDLED_ALL; + } + } +next_try: + lower_bound = lit; + ++lower_bound; + } + assert(0); + return SOMBE_FAIL; +} + +static +bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, + set<NFAVertex> *terminals) { + /* TODO: smarter (topo) */ +#define MAX_LEADING_LITERALS 20 + set<NFAVertex> s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); + + set<NFAVertex> sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); + + if (!is_subset_of(s_succ, sds_succ)) { + DEBUG_PRINTF("not floating\n"); + return false; + } + + sds_succ.erase(g.startDs); + + map<NFAVertex, vector<ue2_literal> > curr; + curr[g.startDs].push_back(ue2_literal()); + + map<NFAVertex, set<NFAVertex> > seen; + map<NFAVertex, vector<ue2_literal> > next; + + bool did_expansion = true; + while (did_expansion) { + did_expansion = false; + u32 count = 0; + assert(!curr.empty()); + for (const auto &m : curr) { + const NFAVertex u = m.first; + const vector<ue2_literal> &base = m.second; DEBUG_PRINTF("expanding from %zu\n", g[u].index); - for (auto v : adjacent_vertices_range(u, g)) { - if (v == g.startDs) { - continue; - } - if (contains(seen[u], v)) { - DEBUG_PRINTF("loop\n"); - goto skip_to_next_terminal; - } - if (is_any_accept(v, g) || is_match_vertex(v, g)) { - DEBUG_PRINTF("match\n"); - goto skip_to_next_terminal; - } + for (auto v : adjacent_vertices_range(u, g)) { + if (v == g.startDs) { + continue; + } + if (contains(seen[u], v)) { + DEBUG_PRINTF("loop\n"); + goto skip_to_next_terminal; + } + if (is_any_accept(v, g) || is_match_vertex(v, g)) { + DEBUG_PRINTF("match\n"); + goto skip_to_next_terminal; + } if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { - DEBUG_PRINTF("wide\n"); - goto skip_to_next_terminal; - } - } - - for (auto v : adjacent_vertices_range(u, g)) { - assert(!contains(seen[u], v)); - if (v == g.startDs) { - continue; - } - insert(&seen[v], seen[u]); - seen[v].insert(v); - CharReach cr = g[v].char_reach; - vector<ue2_literal> &out = next[v]; - + DEBUG_PRINTF("wide\n"); + goto skip_to_next_terminal; + } + } + + for (auto v : adjacent_vertices_range(u, g)) { + assert(!contains(seen[u], v)); + if (v == g.startDs) { + continue; + } + insert(&seen[v], seen[u]); + seen[v].insert(v); + CharReach cr = g[v].char_reach; + vector<ue2_literal> &out = next[v]; + DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index, cr.count()); - for (size_t c = cr.find_first(); c != CharReach::npos; - c = cr.find_next(c)) { - bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) - && cr.test(mytolower(c)); - - if (nocase && (char)c == mytolower(c)) { - continue; /* uppercase already handled us */ - } - - for (const auto &lit : base) { - if (count >= MAX_LEADING_LITERALS) { - DEBUG_PRINTF("count %u\n", count); - goto exit; - } - did_expansion = true; - out.push_back(lit); - out.back().push_back(c, nocase); - count++; - if (out.back().length() > MAX_MASK2_WIDTH - && mixed_sensitivity(out.back())) { - goto exit; - } - - } - } - } - if (0) { - skip_to_next_terminal: - insert(&next[u], next[u].end(), base); - count += base.size(); - if (count > MAX_LEADING_LITERALS) { - DEBUG_PRINTF("count %u\n", count); - goto exit; - } - } - } - - curr.swap(next); - next.clear(); - }; - exit:; - for (const auto &m : curr) { - NFAVertex t = m.first; - if (t == g.startDs) { - assert(curr.size() == 1); - return false; - } - assert(!is_special(t, g)); - terminals->insert(t); - insert(lits, m.second); - } - assert(lits->size() <= MAX_LEADING_LITERALS); - return !lits->empty(); -} - -static -bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, - NGHolder *rhs) { - DEBUG_PRINTF("looking for a leading literals\n"); - - set<NFAVertex> terms; - if (!leadingLiterals(g, lit_out, &terms)) { - return false; - } - - for (UNUSED const auto &lit : *lit_out) { - DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(), - lit.length()); - } - - /* need to validate that it is a clean split */ - assert(!terms.empty()); - set<NFAVertex> adj_term1; - insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); - for (auto v : terms) { + for (size_t c = cr.find_first(); c != CharReach::npos; + c = cr.find_next(c)) { + bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) + && cr.test(mytolower(c)); + + if (nocase && (char)c == mytolower(c)) { + continue; /* uppercase already handled us */ + } + + for (const auto &lit : base) { + if (count >= MAX_LEADING_LITERALS) { + DEBUG_PRINTF("count %u\n", count); + goto exit; + } + did_expansion = true; + out.push_back(lit); + out.back().push_back(c, nocase); + count++; + if (out.back().length() > MAX_MASK2_WIDTH + && mixed_sensitivity(out.back())) { + goto exit; + } + + } + } + } + if (0) { + skip_to_next_terminal: + insert(&next[u], next[u].end(), base); + count += base.size(); + if (count > MAX_LEADING_LITERALS) { + DEBUG_PRINTF("count %u\n", count); + goto exit; + } + } + } + + curr.swap(next); + next.clear(); + }; + exit:; + for (const auto &m : curr) { + NFAVertex t = m.first; + if (t == g.startDs) { + assert(curr.size() == 1); + return false; + } + assert(!is_special(t, g)); + terminals->insert(t); + insert(lits, m.second); + } + assert(lits->size() <= MAX_LEADING_LITERALS); + return !lits->empty(); +} + +static +bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, + NGHolder *rhs) { + DEBUG_PRINTF("looking for a leading literals\n"); + + set<NFAVertex> terms; + if (!leadingLiterals(g, lit_out, &terms)) { + return false; + } + + for (UNUSED const auto &lit : *lit_out) { + DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(), + lit.length()); + } + + /* need to validate that it is a clean split */ + assert(!terms.empty()); + set<NFAVertex> adj_term1; + insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); + for (auto v : terms) { DEBUG_PRINTF("term %zu\n", g[v].index); - set<NFAVertex> temp; - insert(&temp, adjacent_vertices(v, g)); - if (temp != adj_term1) { - DEBUG_PRINTF("bad split\n"); - return false; - } - } - + set<NFAVertex> temp; + insert(&temp, adjacent_vertices(v, g)); + if (temp != adj_term1) { + DEBUG_PRINTF("bad split\n"); + return false; + } + } + unordered_map<NFAVertex, NFAVertex> rhs_map; - vector<NFAVertex> pivots; - insert(&pivots, pivots.end(), adj_term1); - splitRHS(g, pivots, rhs, &rhs_map); - - assert(is_triggered(*rhs)); - return true; -} - -static -void findBestLiteral(const NGHolder &g, + vector<NFAVertex> pivots; + insert(&pivots, pivots.end(), adj_term1); + splitRHS(g, pivots, rhs, &rhs_map); + + assert(is_triggered(*rhs)); + return true; +} + +static +void findBestLiteral(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - ue2_literal *lit_out, NFAVertex *v, - const CompileContext &cc) { - map<u32, region_info> info; - buildRegionMapping(g, regions, info, false); - - ue2_literal best; + ue2_literal *lit_out, NFAVertex *v, + const CompileContext &cc) { + map<u32, region_info> info; + buildRegionMapping(g, regions, info, false); + + ue2_literal best; NFAVertex best_v = NGHolder::null_vertex(); - - map<u32, region_info>::const_iterator lit = info.begin(); - while (1) { - ue2_literal s; - lit = findLaterLiteral(g, info, lit, s, cc.grey); - if (lit == info.end()) { - break; - } - DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(), - lit->first); - - if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { - DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n"); - ++lit; - continue; - } - - if (s.length() > best.length()) { - best = s; - assert(!lit->second.exits.empty()); - best_v = lit->second.exits[0]; - } - - ++lit; - } - - lit_out->swap(best); - *v = best_v; -} - -static -bool splitOffBestLiteral(const NGHolder &g, + + map<u32, region_info>::const_iterator lit = info.begin(); + while (1) { + ue2_literal s; + lit = findLaterLiteral(g, info, lit, s, cc.grey); + if (lit == info.end()) { + break; + } + DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(), + lit->first); + + if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { + DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n"); + ++lit; + continue; + } + + if (s.length() > best.length()) { + best = s; + assert(!lit->second.exits.empty()); + best_v = lit->second.exits[0]; + } + + ++lit; + } + + lit_out->swap(best); + *v = best_v; +} + +static +bool splitOffBestLiteral(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, - const CompileContext &cc) { + ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, + const CompileContext &cc) { NFAVertex v = NGHolder::null_vertex(); - - findBestLiteral(g, regions, lit_out, &v, cc); - if (lit_out->empty()) { - return false; - } - - DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str()); - + + findBestLiteral(g, regions, lit_out, &v, cc); + if (lit_out->empty()) { + return false; + } + + DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str()); + unordered_map<NFAVertex, NFAVertex> lhs_map; unordered_map<NFAVertex, NFAVertex> rhs_map; - - splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); - + + splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); + DEBUG_PRINTF("v = %zu\n", g[v].index); - - return true; -} - + + return true; +} + /** * Replace the given graph's EXTERNAL_CALLBACK reports with * EXTERNAL_CALLBACK_SOM_PASS reports. @@ -2442,706 +2442,706 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) { } } -static -bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { - ue2_literal lit; - shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); - if (!ng.cc.grey.allowLitHaig) { - return false; - } - - dumpHolder(g, 90, "lithaig_full", ng.cc.grey); - - if (!splitOffLeadingLiteral(g, &lit, &*rhs)) { - DEBUG_PRINTF("no literal\n"); - return false; - } - - if (lit.length() < ng.cc.grey.minRoseLiteralLength) { - DEBUG_PRINTF("lit too short\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - +static +bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { + ue2_literal lit; + shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); + if (!ng.cc.grey.allowLitHaig) { + return false; + } + + dumpHolder(g, 90, "lithaig_full", ng.cc.grey); + + if (!splitOffLeadingLiteral(g, &lit, &*rhs)) { + DEBUG_PRINTF("no literal\n"); + return false; + } + + if (lit.length() < ng.cc.grey.minRoseLiteralLength) { + DEBUG_PRINTF("lit too short\n"); + return false; + } + + assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); + makeReportsSomPass(ng.rm, *rhs); - dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); - - vector<vector<CharReach> > triggers; - triggers.push_back(as_cr_seq(lit)); - - assert(rhs->kind == NFA_SUFFIX); - shared_ptr<raw_som_dfa> haig - = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, - ng.cc.grey, false /* lit implies adv som */); - if (!haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - DEBUG_PRINTF("haig %p\n", haig.get()); - - RoseInGraph ig; - RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); - - RoseInVertex a - = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); - add_edge(v, a, RoseInEdgeProps(haig), ig); - - calcVertexOffsets(ig); - - return ng.rose->addSombeRose(ig); -} - -static -bool doHaigLitHaigSom(NG &ng, NGHolder &g, + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); + + vector<vector<CharReach> > triggers; + triggers.push_back(as_cr_seq(lit)); + + assert(rhs->kind == NFA_SUFFIX); + shared_ptr<raw_som_dfa> haig + = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, + ng.cc.grey, false /* lit implies adv som */); + if (!haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + DEBUG_PRINTF("haig %p\n", haig.get()); + + RoseInGraph ig; + RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); + add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); + + RoseInVertex a + = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); + add_edge(v, a, RoseInEdgeProps(haig), ig); + + calcVertexOffsets(ig); + + return ng.rose->addSombeRose(ig); +} + +static +bool doHaigLitHaigSom(NG &ng, NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - som_type som) { - if (!ng.cc.grey.allowLitHaig) { - return false; - } - - // In streaming mode, we can only delay up to our max available history. - const u32 max_delay = - ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX; - - ue2_literal lit; - shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); - shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); - if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { - return false; - } - - DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(), - lit.length()); - - if (lit.length() < ng.cc.grey.minRoseLiteralLength) { - DEBUG_PRINTF("lit too short\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - - if (edge(rhs->start, rhs->acceptEod, *rhs).second) { - return false; /* TODO: handle */ - } - + som_type som) { + if (!ng.cc.grey.allowLitHaig) { + return false; + } + + // In streaming mode, we can only delay up to our max available history. + const u32 max_delay = + ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX; + + ue2_literal lit; + shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); + shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); + if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { + return false; + } + + DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(), + lit.length()); + + if (lit.length() < ng.cc.grey.minRoseLiteralLength) { + DEBUG_PRINTF("lit too short\n"); + return false; + } + + assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); + + if (edge(rhs->start, rhs->acceptEod, *rhs).second) { + return false; /* TODO: handle */ + } + makeReportsSomPass(ng.rm, *rhs); - dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey); - dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey); - - u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay); - - RoseInGraph ig; - RoseInVertex s - = add_vertex(RoseInVertexProps::makeStart(false), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - bool lhs_all_vac = true; + dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey); + dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey); + + u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay); + + RoseInGraph ig; + RoseInVertex s + = add_vertex(RoseInVertexProps::makeStart(false), ig); + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); + + bool lhs_all_vac = true; NGHolder::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs); - ai != ae && lhs_all_vac; ++ai) { - if (!is_special(*ai, *lhs)) { - lhs_all_vac = false; - } - } - for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs); - ai != ae && lhs_all_vac; ++ai) { - if (!is_special(*ai, *lhs)) { - lhs_all_vac = false; - } - } - - if (lhs_all_vac) { - /* lhs is completely vacuous --> no prefix needed */ - add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); - } else { - assert(delay == lit.length()); - setReportOnHaigPrefix(*ng.rose, *lhs); - vector<vector<CharReach> > prefix_triggers; /* empty for prefix */ - assert(lhs->kind == NFA_PREFIX); - shared_ptr<raw_som_dfa> l_haig - = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(), - prefix_triggers, ng.cc.grey); - if (!l_haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - DEBUG_PRINTF("lhs haig %p\n", l_haig.get()); - - add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig); - } - - if (!edge(rhs->start, rhs->accept, *rhs).second) { - assert(rhs->kind == NFA_SUFFIX); - - vector<vector<CharReach> > triggers; - triggers.push_back(as_cr_seq(lit)); - - ue2_literal lit2; - if (getTrailingLiteral(g, &lit2) - && lit2.length() >= ng.cc.grey.minRoseLiteralLength - && minStringPeriod(lit2) >= 2) { - - /* TODO: handle delay */ - size_t overlap = maxOverlap(lit, lit2, 0); - u32 delay2 = min((size_t)max_delay, lit2.length() - overlap); - delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); - rhs->kind = NFA_INFIX; - assert(delay2 <= lit2.length()); - setReportOnHaigPrefix(*ng.rose, *rhs); - - shared_ptr<raw_som_dfa> m_haig - = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), - triggers, ng.cc.grey, true); - DEBUG_PRINTF("mhs haig %p\n", m_haig.get()); - if (!m_haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - - RoseInVertex w - = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); - add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig); - - NFAVertex reporter = getSoleSourceVertex(g, g.accept); - assert(reporter); - const auto &reports = g[reporter].reports; - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); - } else { - /* TODO: analysis to see if som is in fact always increasing */ - shared_ptr<raw_som_dfa> r_haig - = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), - triggers, ng.cc.grey, true); - DEBUG_PRINTF("rhs haig %p\n", r_haig.get()); - if (!r_haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - RoseInVertex a - = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), - ig); - add_edge(v, a, RoseInEdgeProps(r_haig), ig); - } - } else { - DEBUG_PRINTF("has start->accept edge\n"); + for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs); + ai != ae && lhs_all_vac; ++ai) { + if (!is_special(*ai, *lhs)) { + lhs_all_vac = false; + } + } + for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs); + ai != ae && lhs_all_vac; ++ai) { + if (!is_special(*ai, *lhs)) { + lhs_all_vac = false; + } + } + + if (lhs_all_vac) { + /* lhs is completely vacuous --> no prefix needed */ + add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); + } else { + assert(delay == lit.length()); + setReportOnHaigPrefix(*ng.rose, *lhs); + vector<vector<CharReach> > prefix_triggers; /* empty for prefix */ + assert(lhs->kind == NFA_PREFIX); + shared_ptr<raw_som_dfa> l_haig + = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(), + prefix_triggers, ng.cc.grey); + if (!l_haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + DEBUG_PRINTF("lhs haig %p\n", l_haig.get()); + + add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig); + } + + if (!edge(rhs->start, rhs->accept, *rhs).second) { + assert(rhs->kind == NFA_SUFFIX); + + vector<vector<CharReach> > triggers; + triggers.push_back(as_cr_seq(lit)); + + ue2_literal lit2; + if (getTrailingLiteral(g, &lit2) + && lit2.length() >= ng.cc.grey.minRoseLiteralLength + && minStringPeriod(lit2) >= 2) { + + /* TODO: handle delay */ + size_t overlap = maxOverlap(lit, lit2, 0); + u32 delay2 = min((size_t)max_delay, lit2.length() - overlap); + delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); + rhs->kind = NFA_INFIX; + assert(delay2 <= lit2.length()); + setReportOnHaigPrefix(*ng.rose, *rhs); + + shared_ptr<raw_som_dfa> m_haig + = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), + triggers, ng.cc.grey, true); + DEBUG_PRINTF("mhs haig %p\n", m_haig.get()); + if (!m_haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + + RoseInVertex w + = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); + add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig); + + NFAVertex reporter = getSoleSourceVertex(g, g.accept); + assert(reporter); + const auto &reports = g[reporter].reports; + RoseInVertex a = + add_vertex(RoseInVertexProps::makeAccept(reports), ig); + add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); + } else { + /* TODO: analysis to see if som is in fact always increasing */ + shared_ptr<raw_som_dfa> r_haig + = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), + triggers, ng.cc.grey, true); + DEBUG_PRINTF("rhs haig %p\n", r_haig.get()); + if (!r_haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + RoseInVertex a + = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), + ig); + add_edge(v, a, RoseInEdgeProps(r_haig), ig); + } + } else { + DEBUG_PRINTF("has start->accept edge\n"); if (in_degree(g.acceptEod, g) > 1) { - DEBUG_PRINTF("also has a path to EOD\n"); - return false; - } - NFAVertex reporter = getSoleSourceVertex(g, g.accept); - if (!reporter) { - return false; /* TODO: later */ - } - const auto &reports = g[reporter].reports; - assert(!reports.empty()); - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); - } - - calcVertexOffsets(ig); - - return ng.rose->addSombeRose(ig); -} - -static -bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { - set<ue2_literal> lits; - shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); - if (!ng.cc.grey.allowLitHaig) { - return false; - } - - dumpHolder(g, 90, "lithaig_full", ng.cc.grey); - - if (!splitOffLeadingLiterals(g, &lits, &*rhs)) { - DEBUG_PRINTF("no literal\n"); - return false; - } - + DEBUG_PRINTF("also has a path to EOD\n"); + return false; + } + NFAVertex reporter = getSoleSourceVertex(g, g.accept); + if (!reporter) { + return false; /* TODO: later */ + } + const auto &reports = g[reporter].reports; + assert(!reports.empty()); + RoseInVertex a = + add_vertex(RoseInVertexProps::makeAccept(reports), ig); + add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); + } + + calcVertexOffsets(ig); + + return ng.rose->addSombeRose(ig); +} + +static +bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { + set<ue2_literal> lits; + shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); + if (!ng.cc.grey.allowLitHaig) { + return false; + } + + dumpHolder(g, 90, "lithaig_full", ng.cc.grey); + + if (!splitOffLeadingLiterals(g, &lits, &*rhs)) { + DEBUG_PRINTF("no literal\n"); + return false; + } + makeReportsSomPass(ng.rm, *rhs); - dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); - - vector<vector<CharReach>> triggers; - for (const auto &lit : lits) { - if (lit.length() < ng.cc.grey.minRoseLiteralLength) { - DEBUG_PRINTF("lit too short\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - triggers.push_back(as_cr_seq(lit)); - } - - bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that - * we can promise ordering */ - - assert(rhs->kind == NFA_SUFFIX); - shared_ptr<raw_som_dfa> haig - = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, - ng.cc.grey, unordered_som_triggers); - if (!haig) { - DEBUG_PRINTF("failed to haig\n"); - return false; - } - DEBUG_PRINTF("haig %p\n", haig.get()); - - RoseInGraph ig; - RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); - - RoseInVertex a - = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); - - for (const auto &lit : lits) { - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); - add_edge(v, a, RoseInEdgeProps(haig), ig); - } - - calcVertexOffsets(ig); - - return ng.rose->addSombeRose(ig); -} - -static -bool trySombe(NG &ng, NGHolder &g, som_type som) { - if (doLitHaigSom(ng, g, som)) { - return true; - } - - auto regions = assignRegions(g); - - if (doHaigLitHaigSom(ng, g, regions, som)) { - return true; - } - - if (doMultiLitHaigSom(ng, g, som)) { - return true; - } - - return false; -} - -static -map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g, + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); + + vector<vector<CharReach>> triggers; + for (const auto &lit : lits) { + if (lit.length() < ng.cc.grey.minRoseLiteralLength) { + DEBUG_PRINTF("lit too short\n"); + return false; + } + + assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); + triggers.push_back(as_cr_seq(lit)); + } + + bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that + * we can promise ordering */ + + assert(rhs->kind == NFA_SUFFIX); + shared_ptr<raw_som_dfa> haig + = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, + ng.cc.grey, unordered_som_triggers); + if (!haig) { + DEBUG_PRINTF("failed to haig\n"); + return false; + } + DEBUG_PRINTF("haig %p\n", haig.get()); + + RoseInGraph ig; + RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); + + RoseInVertex a + = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); + + for (const auto &lit : lits) { + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); + add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); + add_edge(v, a, RoseInEdgeProps(haig), ig); + } + + calcVertexOffsets(ig); + + return ng.rose->addSombeRose(ig); +} + +static +bool trySombe(NG &ng, NGHolder &g, som_type som) { + if (doLitHaigSom(ng, g, som)) { + return true; + } + + auto regions = assignRegions(g); + + if (doHaigLitHaigSom(ng, g, regions, som)) { + return true; + } + + if (doMultiLitHaigSom(ng, g, som)) { + return true; + } + + return false; +} + +static +map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - const vector<DepthMinMax> &depths) { - map<u32, region_info>::const_iterator picked = info.end(); - for (map<u32, region_info>::const_iterator it = info.begin(); - it != info.end(); ++it) { - if (it->second.exits.empty()) { - assert(it == info.begin()); - continue; - } - - if (!regionCanEstablishSom(g, regions, it->first, it->second.exits, - depths)) { - /* last region is as far as we can go */ - DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first); - break; - } - picked = it; - } - - return picked; -} - -static -map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, + const map<u32, region_info> &info, + const vector<DepthMinMax> &depths) { + map<u32, region_info>::const_iterator picked = info.end(); + for (map<u32, region_info>::const_iterator it = info.begin(); + it != info.end(); ++it) { + if (it->second.exits.empty()) { + assert(it == info.begin()); + continue; + } + + if (!regionCanEstablishSom(g, regions, it->first, it->second.exits, + depths)) { + /* last region is as far as we can go */ + DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first); + break; + } + picked = it; + } + + return picked; +} + +static +map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - const vector<DepthMinMax> &depths, - const map<u32, region_info>::const_iterator &orig, - const CompileContext &cc) { - DEBUG_PRINTF("trying for later rev nfa cut\n"); - assert(orig != info.end()); - - vector<map<u32, region_info>::const_iterator> cands; - - map<u32, region_info>::const_iterator it = orig; - ++it; - for (; it != info.end(); ++it) { - /* for simplicity */ - if (it->second.exits.size() != 1 || it->second.optional) { - continue; - } - NFAVertex v = *it->second.exits.begin(); - - if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - continue; /* for simplicity would require external som nfa reports - * as well. */ - } - - const depth &max_depth = depths[g[v].index].max; - if (max_depth > - depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */ - continue; - } - - if (max_depth > depth(MAX_REV_NFA_PREFIX)) { - /* probably not a good idea, anyway */ - continue; - } - - cands.push_back(it); - } - - while (!cands.empty()) { - map<u32, region_info>::const_iterator rv = cands.back(); - cands.pop_back(); - - NFAVertex v = *rv->second.exits.begin(); - - set<ue2_literal> lits = getLiteralSet(g, v); - compressAndScore(lits); - if (lits.empty()) { - next_region: - continue; - } - for (const auto &lit : lits) { - if (lit.length() <= 3 || minStringPeriod(lit) < 2) { - goto next_region; - } - } - - if (rv->second.enters.empty() - || find(rv->second.full.begin(), rv->second.full.end(), g.startDs) - != rv->second.full.end()) { - continue; - } - - if (!isMandRegionBetween(info.begin(), rv) - && info.begin()->second.optional) { - continue; - } - - /* check to see if it is a reasonable size */ - auto prefix = - makePrefix(g, regions, rv->second, next(rv)->second, false); - - NGHolder g_rev; - reverseHolder(*prefix, g_rev); - anchorStarts(g_rev); - + const map<u32, region_info> &info, + const vector<DepthMinMax> &depths, + const map<u32, region_info>::const_iterator &orig, + const CompileContext &cc) { + DEBUG_PRINTF("trying for later rev nfa cut\n"); + assert(orig != info.end()); + + vector<map<u32, region_info>::const_iterator> cands; + + map<u32, region_info>::const_iterator it = orig; + ++it; + for (; it != info.end(); ++it) { + /* for simplicity */ + if (it->second.exits.size() != 1 || it->second.optional) { + continue; + } + NFAVertex v = *it->second.exits.begin(); + + if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { + continue; /* for simplicity would require external som nfa reports + * as well. */ + } + + const depth &max_depth = depths[g[v].index].max; + if (max_depth > + depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */ + continue; + } + + if (max_depth > depth(MAX_REV_NFA_PREFIX)) { + /* probably not a good idea, anyway */ + continue; + } + + cands.push_back(it); + } + + while (!cands.empty()) { + map<u32, region_info>::const_iterator rv = cands.back(); + cands.pop_back(); + + NFAVertex v = *rv->second.exits.begin(); + + set<ue2_literal> lits = getLiteralSet(g, v); + compressAndScore(lits); + if (lits.empty()) { + next_region: + continue; + } + for (const auto &lit : lits) { + if (lit.length() <= 3 || minStringPeriod(lit) < 2) { + goto next_region; + } + } + + if (rv->second.enters.empty() + || find(rv->second.full.begin(), rv->second.full.end(), g.startDs) + != rv->second.full.end()) { + continue; + } + + if (!isMandRegionBetween(info.begin(), rv) + && info.begin()->second.optional) { + continue; + } + + /* check to see if it is a reasonable size */ + auto prefix = + makePrefix(g, regions, rv->second, next(rv)->second, false); + + NGHolder g_rev; + reverseHolder(*prefix, g_rev); + anchorStarts(g_rev); + renumber_vertices(g_rev); - g_rev.kind = NFA_REV_PREFIX; - reduceGraphEquivalences(g_rev, cc); - removeRedundancy(g_rev, SOM_NONE); - - if (num_vertices(g_rev) > 128) { /* too big */ - continue; - } - - return rv; - } - - return info.end(); -} - -static -unique_ptr<NGHolder> makePrefixForChain(NGHolder &g, + g_rev.kind = NFA_REV_PREFIX; + reduceGraphEquivalences(g_rev, cc); + removeRedundancy(g_rev, SOM_NONE); + + if (num_vertices(g_rev) > 128) { /* too big */ + continue; + } + + return rv; + } + + return info.end(); +} + +static +unique_ptr<NGHolder> makePrefixForChain(NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, - const map<u32, region_info> &info, - const map<u32, region_info>::const_iterator &picked, - vector<DepthMinMax> *depths, bool prefix_by_rev, - ReportManager &rm) { - DEBUG_PRINTF("making prefix for chain attempt\n"); - auto prefix = - makePrefix(g, regions, picked->second, next(picked)->second, false); - - /* For the root SOM plan, we use a temporary SOM slot to start with so that - * we don't have to do any complicated rollback operations if the call to - * doSomPlanning() below fails. The temporary SOM slot is replaced with a - * real one afterwards. */ - const u32 temp_som_loc = UINT32_MAX; - setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE, - temp_som_loc, *depths, prefix_by_rev); - - /* handle direct edge to accepts from region */ - if (edge(picked->second.exits.front(), g.accept, g).second - || edge(picked->second.exits.front(), g.acceptEod, g).second) { - map<u32, region_info>::const_iterator it = picked; - do { - makeSomRelReports(rm, g, it->second.exits, *depths); - } while (it != info.begin() && it->second.optional && (it--)->first); - } - - depths->clear(); /* renumbering invalidates depths */ + const map<u32, region_info> &info, + const map<u32, region_info>::const_iterator &picked, + vector<DepthMinMax> *depths, bool prefix_by_rev, + ReportManager &rm) { + DEBUG_PRINTF("making prefix for chain attempt\n"); + auto prefix = + makePrefix(g, regions, picked->second, next(picked)->second, false); + + /* For the root SOM plan, we use a temporary SOM slot to start with so that + * we don't have to do any complicated rollback operations if the call to + * doSomPlanning() below fails. The temporary SOM slot is replaced with a + * real one afterwards. */ + const u32 temp_som_loc = UINT32_MAX; + setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE, + temp_som_loc, *depths, prefix_by_rev); + + /* handle direct edge to accepts from region */ + if (edge(picked->second.exits.front(), g.accept, g).second + || edge(picked->second.exits.front(), g.acceptEod, g).second) { + map<u32, region_info>::const_iterator it = picked; + do { + makeSomRelReports(rm, g, it->second.exits, *depths); + } while (it != info.begin() && it->second.optional && (it--)->first); + } + + depths->clear(); /* renumbering invalidates depths */ renumber_vertices(*prefix); - - DEBUG_PRINTF("done\n"); - return prefix; -} - + + DEBUG_PRINTF("done\n"); + return prefix; +} + sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, - som_type som) { - assert(som); - DEBUG_PRINTF("som hello\n"); - ReportManager &rm = ng.rm; - SomSlotManager &ssm = ng.ssm; - const CompileContext &cc = ng.cc; - - // Special case: if g is completely anchored or begins with a dot-star, we - // know that we have an absolute SOM of zero all the time. + som_type som) { + assert(som); + DEBUG_PRINTF("som hello\n"); + ReportManager &rm = ng.rm; + SomSlotManager &ssm = ng.ssm; + const CompileContext &cc = ng.cc; + + // Special case: if g is completely anchored or begins with a dot-star, we + // know that we have an absolute SOM of zero all the time. if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { - makeSomAbsReports(rm, g, g.accept); - makeSomAbsReports(rm, g, g.acceptEod); - return SOMBE_HANDLED_INTERNAL; - } - - if (!cc.grey.allowSomChain) { - return SOMBE_FAIL; - } - - // A pristine copy of the input graph, which must be restored to in paths - // that return false. Also used as the forward graph for som rev nfa - // construction. - NGHolder g_pristine; - cloneHolder(g_pristine, g); - - vector<DepthMinMax> depths = getDistancesFromSOM(g); - - // try a redundancy pass. - if (addSomRedundancy(g, depths)) { - depths = getDistancesFromSOM(g); // recalc - } - - auto regions = assignRegions(g); - - dumpHolder(g, regions, 11, "som_explode", cc.grey); - - map<u32, region_info> info; - buildRegionMapping(g, regions, info); - - map<u32, region_info>::const_iterator picked - = pickInitialSomCut(g, regions, info, depths); - DEBUG_PRINTF("picked %u\n", picked->first); - if (picked == info.end() || picked->second.exits.empty()) { - DEBUG_PRINTF("no regions/no progress possible\n"); - clear_graph(g); - cloneHolder(g, g_pristine); - if (doSomRevNfa(ng, g, cc)) { - return SOMBE_HANDLED_INTERNAL; - } else { - return SOMBE_FAIL; - } - } - - if (finalRegion(g, regions, picked->second.exits[0])) { - makeSomRelReports(rm, g, g.accept, depths); - makeSomRelReports(rm, g, g.acceptEod, depths); - return SOMBE_HANDLED_INTERNAL; - } - - if (doSomRevNfa(ng, g_pristine, cc)) { - clear_graph(g); - cloneHolder(g, g_pristine); - return SOMBE_HANDLED_INTERNAL; - } - - bool prefix_by_rev = false; - map<u32, region_info>::const_iterator picked_old = picked; - map<u32, region_info>::const_iterator rev_pick - = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc); - if (rev_pick != info.end()) { - DEBUG_PRINTF("found later rev prefix cut point\n"); - assert(rev_pick != picked); - picked = rev_pick; - prefix_by_rev = true; - } else { - /* sanity checks for picked region, these checks have already been done - * if we are using a prefix reverse nfa. */ - if (picked->second.enters.empty() - || find(picked->second.full.begin(), picked->second.full.end(), - g.startDs) != picked->second.full.end()) { - clear_graph(g); - cloneHolder(g, g_pristine); - return SOMBE_FAIL; - } - - if (!isMandRegionBetween(info.begin(), picked) - && info.begin()->second.optional) { - clear_graph(g); - cloneHolder(g, g_pristine); - return SOMBE_FAIL; - } - } - - DEBUG_PRINTF("region %u is the final\n", picked->first); - - shared_ptr<NGHolder> prefix = makePrefixForChain( - g, regions, info, picked, &depths, prefix_by_rev, rm); - /* note depths cleared as we have renumbered */ - - CharReach escapes; - bool stuck = isPossibleLock(g, picked, info, &escapes); - if (stuck) { - DEBUG_PRINTF("investigating potential lock\n"); - - NGHolder gg; - fillHolderForLockCheck(&gg, g, info, picked); - - stuck = firstMatchIsFirst(gg); - } - - if (stuck && escapes.none()) { - /* leads directly to .* --> woot */ - DEBUG_PRINTF("initial slot is full lock\n"); - u32 som_loc = ssm.getSomSlot(*prefix, escapes, false, - SomSlotManager::NO_PARENT); - replaceTempSomSlot(rm, *prefix, som_loc); - - /* update all reports on g to report the som_loc's som */ - updateReportToUseRecordedSom(rm, g, som_loc); - - /* create prefix to set the som_loc */ - updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); - if (prefix_by_rev) { + makeSomAbsReports(rm, g, g.accept); + makeSomAbsReports(rm, g, g.acceptEod); + return SOMBE_HANDLED_INTERNAL; + } + + if (!cc.grey.allowSomChain) { + return SOMBE_FAIL; + } + + // A pristine copy of the input graph, which must be restored to in paths + // that return false. Also used as the forward graph for som rev nfa + // construction. + NGHolder g_pristine; + cloneHolder(g_pristine, g); + + vector<DepthMinMax> depths = getDistancesFromSOM(g); + + // try a redundancy pass. + if (addSomRedundancy(g, depths)) { + depths = getDistancesFromSOM(g); // recalc + } + + auto regions = assignRegions(g); + + dumpHolder(g, regions, 11, "som_explode", cc.grey); + + map<u32, region_info> info; + buildRegionMapping(g, regions, info); + + map<u32, region_info>::const_iterator picked + = pickInitialSomCut(g, regions, info, depths); + DEBUG_PRINTF("picked %u\n", picked->first); + if (picked == info.end() || picked->second.exits.empty()) { + DEBUG_PRINTF("no regions/no progress possible\n"); + clear_graph(g); + cloneHolder(g, g_pristine); + if (doSomRevNfa(ng, g, cc)) { + return SOMBE_HANDLED_INTERNAL; + } else { + return SOMBE_FAIL; + } + } + + if (finalRegion(g, regions, picked->second.exits[0])) { + makeSomRelReports(rm, g, g.accept, depths); + makeSomRelReports(rm, g, g.acceptEod, depths); + return SOMBE_HANDLED_INTERNAL; + } + + if (doSomRevNfa(ng, g_pristine, cc)) { + clear_graph(g); + cloneHolder(g, g_pristine); + return SOMBE_HANDLED_INTERNAL; + } + + bool prefix_by_rev = false; + map<u32, region_info>::const_iterator picked_old = picked; + map<u32, region_info>::const_iterator rev_pick + = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc); + if (rev_pick != info.end()) { + DEBUG_PRINTF("found later rev prefix cut point\n"); + assert(rev_pick != picked); + picked = rev_pick; + prefix_by_rev = true; + } else { + /* sanity checks for picked region, these checks have already been done + * if we are using a prefix reverse nfa. */ + if (picked->second.enters.empty() + || find(picked->second.full.begin(), picked->second.full.end(), + g.startDs) != picked->second.full.end()) { + clear_graph(g); + cloneHolder(g, g_pristine); + return SOMBE_FAIL; + } + + if (!isMandRegionBetween(info.begin(), picked) + && info.begin()->second.optional) { + clear_graph(g); + cloneHolder(g, g_pristine); + return SOMBE_FAIL; + } + } + + DEBUG_PRINTF("region %u is the final\n", picked->first); + + shared_ptr<NGHolder> prefix = makePrefixForChain( + g, regions, info, picked, &depths, prefix_by_rev, rm); + /* note depths cleared as we have renumbered */ + + CharReach escapes; + bool stuck = isPossibleLock(g, picked, info, &escapes); + if (stuck) { + DEBUG_PRINTF("investigating potential lock\n"); + + NGHolder gg; + fillHolderForLockCheck(&gg, g, info, picked); + + stuck = firstMatchIsFirst(gg); + } + + if (stuck && escapes.none()) { + /* leads directly to .* --> woot */ + DEBUG_PRINTF("initial slot is full lock\n"); + u32 som_loc = ssm.getSomSlot(*prefix, escapes, false, + SomSlotManager::NO_PARENT); + replaceTempSomSlot(rm, *prefix, som_loc); + + /* update all reports on g to report the som_loc's som */ + updateReportToUseRecordedSom(rm, g, som_loc); + + /* create prefix to set the som_loc */ + updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); + if (prefix_by_rev) { u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); - updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); - } + updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); + } renumber_vertices(*prefix); - if (!ng.addHolder(*prefix)) { - DEBUG_PRINTF("failed to add holder\n"); - clear_graph(g); - cloneHolder(g, g_pristine); - return SOMBE_FAIL; - } - - DEBUG_PRINTF("ok found initial lock\n"); - return SOMBE_HANDLED_INTERNAL; - } - - vector<som_plan> plan; - retry: - // Note: no-one should ever pay attention to the root plan's parent. - plan.push_back(som_plan(prefix, escapes, false, 0)); - dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey); - if (!prefix_by_rev) { - if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) { - DEBUG_PRINTF("failed\n"); - clear_graph(g); - cloneHolder(g, g_pristine); - return SOMBE_FAIL; - } - } else { - DEBUG_PRINTF("trying for som plan\n"); - if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey, - DISALLOW_MODIFY_HOLDER)) { - /* Note: the larger prefixes generated by reverse nfas may not - * advance as fair as the original prefix - so we should retry - * with a smaller prefix. */ - - prefix_by_rev = false; - stuck = false; /* if we reached a lock, then prefix_by_rev would not - * have advanced. */ - picked = picked_old; - plan.clear(); - depths = getDistancesFromSOM(g); /* due to renumbering, need to - * regenerate */ - prefix = makePrefixForChain(g, regions, info, picked, &depths, - prefix_by_rev, rm); - escapes.clear(); - DEBUG_PRINTF("retrying\n"); - goto retry; - } - } - DEBUG_PRINTF("som planning ok\n"); - - /* if the initial prefix is weak is if sombe approaches are better */ - if (findMinWidth(*prefix) <= depth(2)) { - DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n"); - NGHolder g2; - cloneHolder(g2, g_pristine); - if (trySombe(ng, g2, som)) { - return SOMBE_HANDLED_ALL; - } - } - - /* From this point we know that we are going to succeed or die horribly with - * a pattern too large. Anything done past this point can be considered - * committed to the compile. */ - - regions = assignRegions(g); // Update as g may have changed. - - DEBUG_PRINTF("-- get slot for initial plan\n"); - u32 som_loc; - if (plan[0].is_reset) { - som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions, - picked->first, &plan[0].no_implement); - } else { - som_loc = ssm.getSomSlot(*prefix, escapes, false, - SomSlotManager::NO_PARENT); - } - - replaceTempSomSlot(rm, *prefix, som_loc); - - if (plan.front().is_reset) { - updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); - } - if (prefix_by_rev && !plan.front().no_implement) { + if (!ng.addHolder(*prefix)) { + DEBUG_PRINTF("failed to add holder\n"); + clear_graph(g); + cloneHolder(g, g_pristine); + return SOMBE_FAIL; + } + + DEBUG_PRINTF("ok found initial lock\n"); + return SOMBE_HANDLED_INTERNAL; + } + + vector<som_plan> plan; + retry: + // Note: no-one should ever pay attention to the root plan's parent. + plan.push_back(som_plan(prefix, escapes, false, 0)); + dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey); + if (!prefix_by_rev) { + if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) { + DEBUG_PRINTF("failed\n"); + clear_graph(g); + cloneHolder(g, g_pristine); + return SOMBE_FAIL; + } + } else { + DEBUG_PRINTF("trying for som plan\n"); + if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey, + DISALLOW_MODIFY_HOLDER)) { + /* Note: the larger prefixes generated by reverse nfas may not + * advance as fair as the original prefix - so we should retry + * with a smaller prefix. */ + + prefix_by_rev = false; + stuck = false; /* if we reached a lock, then prefix_by_rev would not + * have advanced. */ + picked = picked_old; + plan.clear(); + depths = getDistancesFromSOM(g); /* due to renumbering, need to + * regenerate */ + prefix = makePrefixForChain(g, regions, info, picked, &depths, + prefix_by_rev, rm); + escapes.clear(); + DEBUG_PRINTF("retrying\n"); + goto retry; + } + } + DEBUG_PRINTF("som planning ok\n"); + + /* if the initial prefix is weak is if sombe approaches are better */ + if (findMinWidth(*prefix) <= depth(2)) { + DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n"); + NGHolder g2; + cloneHolder(g2, g_pristine); + if (trySombe(ng, g2, som)) { + return SOMBE_HANDLED_ALL; + } + } + + /* From this point we know that we are going to succeed or die horribly with + * a pattern too large. Anything done past this point can be considered + * committed to the compile. */ + + regions = assignRegions(g); // Update as g may have changed. + + DEBUG_PRINTF("-- get slot for initial plan\n"); + u32 som_loc; + if (plan[0].is_reset) { + som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions, + picked->first, &plan[0].no_implement); + } else { + som_loc = ssm.getSomSlot(*prefix, escapes, false, + SomSlotManager::NO_PARENT); + } + + replaceTempSomSlot(rm, *prefix, som_loc); + + if (plan.front().is_reset) { + updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); + } + if (prefix_by_rev && !plan.front().no_implement) { u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); - updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); - } - + updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); + } + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); - - DEBUG_PRINTF("success\n"); - return SOMBE_HANDLED_INTERNAL; -} - + + DEBUG_PRINTF("success\n"); + return SOMBE_HANDLED_INTERNAL; +} + sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som) { - assert(som); - - DEBUG_PRINTF("som+haig hello\n"); - - // A pristine copy of the input graph, which must be restored to in paths - // that return false. Also used as the forward graph for som rev nfa - // construction. - NGHolder g_pristine; - cloneHolder(g_pristine, g); - - if (trySombe(ng, g, som)) { - return SOMBE_HANDLED_ALL; - } - - if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) { - return SOMBE_FAIL; - } - - // know that we have an absolute SOM of zero all the time. - assert(edge(g.startDs, g.startDs, g).second); - - vector<DepthMinMax> depths = getDistancesFromSOM(g); - - // try a redundancy pass. - if (addSomRedundancy(g, depths)) { - depths = getDistancesFromSOM(g); - } - - auto regions = assignRegions(g); - - dumpHolder(g, regions, 21, "som_explode", ng.cc.grey); - - map<u32, region_info> info; - buildRegionMapping(g, regions, info, true); - - sombe_rv rv = + assert(som); + + DEBUG_PRINTF("som+haig hello\n"); + + // A pristine copy of the input graph, which must be restored to in paths + // that return false. Also used as the forward graph for som rev nfa + // construction. + NGHolder g_pristine; + cloneHolder(g_pristine, g); + + if (trySombe(ng, g, som)) { + return SOMBE_HANDLED_ALL; + } + + if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) { + return SOMBE_FAIL; + } + + // know that we have an absolute SOM of zero all the time. + assert(edge(g.startDs, g.startDs, g).second); + + vector<DepthMinMax> depths = getDistancesFromSOM(g); + + // try a redundancy pass. + if (addSomRedundancy(g, depths)) { + depths = getDistancesFromSOM(g); + } + + auto regions = assignRegions(g); + + dumpHolder(g, regions, 21, "som_explode", ng.cc.grey); + + map<u32, region_info> info; + buildRegionMapping(g, regions, info, true); + + sombe_rv rv = doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); - if (rv == SOMBE_FAIL) { - clear_graph(g); - cloneHolder(g, g_pristine); - } - return rv; -} - -} // namespace ue2 + if (rv == SOMBE_FAIL) { + clear_graph(g); + cloneHolder(g, g_pristine); + } + return rv; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.h b/contrib/libs/hyperscan/src/nfagraph/ng_som.h index ecae4c67fb..b39c239ba2 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.h @@ -1,81 +1,81 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief SOM ("Start of Match") analysis. - */ - -#ifndef NG_SOM_H -#define NG_SOM_H - -#include "som/som.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief SOM ("Start of Match") analysis. + */ + +#ifndef NG_SOM_H +#define NG_SOM_H + +#include "som/som.h" #include "ue2common.h" - -namespace ue2 { - + +namespace ue2 { + class ExpressionInfo; -class NG; -class NGHolder; +class NG; +class NGHolder; class ReportManager; -struct Grey; - -enum sombe_rv { - SOMBE_FAIL, - SOMBE_HANDLED_INTERNAL, - SOMBE_HANDLED_ALL -}; - -/** \brief Perform SOM analysis on the given graph. - * - * This function will replace report IDs and mutate the graph, then return - * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still - * needs to be handled (rose, etc). - * - * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has - * been handled in all its glory. - * - * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be - * established. - * - * May throw a "Pattern too large" exception if prefixes of the - * pattern are too large to compile. - */ +struct Grey; + +enum sombe_rv { + SOMBE_FAIL, + SOMBE_HANDLED_INTERNAL, + SOMBE_HANDLED_ALL +}; + +/** \brief Perform SOM analysis on the given graph. + * + * This function will replace report IDs and mutate the graph, then return + * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still + * needs to be handled (rose, etc). + * + * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has + * been handled in all its glory. + * + * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be + * established. + * + * May throw a "Pattern too large" exception if prefixes of the + * pattern are too large to compile. + */ sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, - som_type som); - -/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. - * May also throw pattern too large if prefixes of the pattern are too large to - * compile. */ + som_type som); + +/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. + * May also throw pattern too large if prefixes of the pattern are too large to + * compile. */ sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, som_type som); - + void makeReportsSomPass(ReportManager &rm, NGHolder &g); -} // namespace ue2 - -#endif // NG_SOM_H +} // namespace ue2 + +#endif // NG_SOM_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp index 33544ec173..776d54f4f1 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp @@ -1,198 +1,198 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Add redundancy to graph to assist in SOM analysis. - * - * Currently patterns of the form: - * - * /(GET|POST).*foo/ - * - * baffle our SOM analysis as the T's get merged into one by our graph - * reductions and they lose the fixed depth property. One way to solve this is - * to tell the T vertex to go fork itself before we do the main SOM pass. - * - * Overall plan: - * - * 1. build a topo ordering - * 2. walk vertices in topo order - * 3. fix up vertices where possible - * 4. go home - * - * Vertex fix up plan: - * - * 1. consider depth of vertex - * - if vertex is at fixed depth continue to next vertex - * - if vertex can be at an unbounded depth continue to next vertex - * - if vertex has a pred which is not a fixed depth continue to next vertex - * 2. group preds by their depth - * 3. for each group: - * - create a clone of the vertex (vertex props and out edges) - * - create edges from each vertex in the group to the clone - * - work out the depth for the clone - * 4. blow away original vertex - * - * Originally in UE-1862. - */ -#include "ng_som_add_redundancy.h" - -#include "ng_dump.h" -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/container.h" -#include "util/depth.h" -#include "util/graph.h" -#include "util/graph_range.h" - -using namespace std; - -namespace ue2 { - -/** \brief Hard limit on the maximum number of new vertices to create. */ -static const size_t MAX_NEW_VERTICES = 32; - -static -const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g, - const vector<DepthMinMax> &depths) { - return depths.at(g[v].index); -} - -static -bool hasFloatingPred(NFAVertex v, const NGHolder &g, - const vector<DepthMinMax> &depths) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - const DepthMinMax &d = getDepth(u, g, depths); - if (d.min != d.max) { - return true; - } - } - return false; -} - -static -bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths, - set<NFAVertex> &dead, size_t *numNewVertices) { - map<depth, vector<NFAEdge>> predGroups; - for (const auto &e : in_edges_range(v, g)) { - const DepthMinMax &d = getDepth(source(e, g), g, depths); - assert(d.min == d.max); - predGroups[d.min].push_back(e); - } - - DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size()); - - if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) { - return false; - } - *numNewVertices += predGroups.size(); - - for (auto &group : predGroups) { - const depth &predDepth = group.first; - const vector<NFAEdge> &preds = group.second; - - // Clone v for this depth with all its associated out-edges. - u32 clone_idx = depths.size(); // next index to be used - NFAVertex clone = add_vertex(g[v], g); - depth clone_depth = predDepth + 1; - g[clone].index = clone_idx; - depths.push_back(DepthMinMax(clone_depth, clone_depth)); - DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx, - clone_depth.str().c_str()); - - // Add copies of the out-edges from v. - for (const auto &e : out_edges_range(v, g)) { - add_edge(clone, target(e, g), g[e], g); - } - - // Add in-edges from preds in this group. - for (const auto &e : preds) { - add_edge(source(e, g), clone, g[e], g); - } - } - - clear_vertex(v, g); - dead.insert(v); - return true; -} - -bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) { - DEBUG_PRINTF("entry\n"); - - const vector<NFAVertex> ordering = getTopoOrdering(g); - - set<NFAVertex> dead; - size_t numNewVertices = 0; - - for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) { - NFAVertex v = *it; - - if (is_special(v, g)) { - continue; - } + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Add redundancy to graph to assist in SOM analysis. + * + * Currently patterns of the form: + * + * /(GET|POST).*foo/ + * + * baffle our SOM analysis as the T's get merged into one by our graph + * reductions and they lose the fixed depth property. One way to solve this is + * to tell the T vertex to go fork itself before we do the main SOM pass. + * + * Overall plan: + * + * 1. build a topo ordering + * 2. walk vertices in topo order + * 3. fix up vertices where possible + * 4. go home + * + * Vertex fix up plan: + * + * 1. consider depth of vertex + * - if vertex is at fixed depth continue to next vertex + * - if vertex can be at an unbounded depth continue to next vertex + * - if vertex has a pred which is not a fixed depth continue to next vertex + * 2. group preds by their depth + * 3. for each group: + * - create a clone of the vertex (vertex props and out edges) + * - create edges from each vertex in the group to the clone + * - work out the depth for the clone + * 4. blow away original vertex + * + * Originally in UE-1862. + */ +#include "ng_som_add_redundancy.h" + +#include "ng_dump.h" +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/container.h" +#include "util/depth.h" +#include "util/graph.h" +#include "util/graph_range.h" + +using namespace std; + +namespace ue2 { + +/** \brief Hard limit on the maximum number of new vertices to create. */ +static const size_t MAX_NEW_VERTICES = 32; + +static +const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g, + const vector<DepthMinMax> &depths) { + return depths.at(g[v].index); +} + +static +bool hasFloatingPred(NFAVertex v, const NGHolder &g, + const vector<DepthMinMax> &depths) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + const DepthMinMax &d = getDepth(u, g, depths); + if (d.min != d.max) { + return true; + } + } + return false; +} + +static +bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths, + set<NFAVertex> &dead, size_t *numNewVertices) { + map<depth, vector<NFAEdge>> predGroups; + for (const auto &e : in_edges_range(v, g)) { + const DepthMinMax &d = getDepth(source(e, g), g, depths); + assert(d.min == d.max); + predGroups[d.min].push_back(e); + } + + DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size()); + + if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) { + return false; + } + *numNewVertices += predGroups.size(); + + for (auto &group : predGroups) { + const depth &predDepth = group.first; + const vector<NFAEdge> &preds = group.second; + + // Clone v for this depth with all its associated out-edges. + u32 clone_idx = depths.size(); // next index to be used + NFAVertex clone = add_vertex(g[v], g); + depth clone_depth = predDepth + 1; + g[clone].index = clone_idx; + depths.push_back(DepthMinMax(clone_depth, clone_depth)); + DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx, + clone_depth.str().c_str()); + + // Add copies of the out-edges from v. + for (const auto &e : out_edges_range(v, g)) { + add_edge(clone, target(e, g), g[e], g); + } + + // Add in-edges from preds in this group. + for (const auto &e : preds) { + add_edge(source(e, g), clone, g[e], g); + } + } + + clear_vertex(v, g); + dead.insert(v); + return true; +} + +bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) { + DEBUG_PRINTF("entry\n"); + + const vector<NFAVertex> ordering = getTopoOrdering(g); + + set<NFAVertex> dead; + size_t numNewVertices = 0; + + for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) { + NFAVertex v = *it; + + if (is_special(v, g)) { + continue; + } if (!in_degree(v, g)) { - continue; // unreachable, probably killed - } - - const DepthMinMax &d = getDepth(v, g, depths); - + continue; // unreachable, probably killed + } + + const DepthMinMax &d = getDepth(v, g, depths); + DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index, - d.str().c_str()); - - if (d.min == d.max) { - DEBUG_PRINTF("fixed depth\n"); - continue; - } - - if (d.max.is_unreachable()) { - DEBUG_PRINTF("unbounded depth\n"); - continue; - } - - if (hasFloatingPred(v, g, depths)) { - DEBUG_PRINTF("has floating pred\n"); - continue; - } - - if (!forkVertex(v, g, depths, dead, &numNewVertices)) { - DEBUG_PRINTF("new vertex limit reached\n"); - break; - } - } - - assert(numNewVertices <= MAX_NEW_VERTICES); - - if (dead.empty()) { - return false; // no changes made to the graph - } - - remove_vertices(dead, g); - return true; -} - -} // namespace ue2 + d.str().c_str()); + + if (d.min == d.max) { + DEBUG_PRINTF("fixed depth\n"); + continue; + } + + if (d.max.is_unreachable()) { + DEBUG_PRINTF("unbounded depth\n"); + continue; + } + + if (hasFloatingPred(v, g, depths)) { + DEBUG_PRINTF("has floating pred\n"); + continue; + } + + if (!forkVertex(v, g, depths, dead, &numNewVertices)) { + DEBUG_PRINTF("new vertex limit reached\n"); + break; + } + } + + assert(numNewVertices <= MAX_NEW_VERTICES); + + if (dead.empty()) { + return false; // no changes made to the graph + } + + remove_vertices(dead, g); + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h index 890dc9c942..bec63ccd18 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h @@ -1,47 +1,47 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Add redundancy to graph to assist in SOM analysis. - */ - -#ifndef NG_SOM_ADD_REDUNDANCY_H -#define NG_SOM_ADD_REDUNDANCY_H - -#include "util/depth.h" -#include <vector> - -namespace ue2 { - -class NGHolder; - -bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Add redundancy to graph to assist in SOM analysis. + */ + +#ifndef NG_SOM_ADD_REDUNDANCY_H +#define NG_SOM_ADD_REDUNDANCY_H + +#include "util/depth.h" +#include <vector> + +namespace ue2 { + +class NGHolder; + +bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp index 1e7a41bb0c..3d49bd15db 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp @@ -1,357 +1,357 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Utility functions related to SOM ("Start of Match"). - */ -#include "ng_som_util.h" - -#include "ng_depth.h" -#include "ng_execute.h" -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "util/container.h" -#include "util/graph_range.h" - -using namespace std; - -namespace ue2 { - -static -void wireSuccessorsToStart(NGHolder &g, NFAVertex u) { - for (auto v : adjacent_vertices_range(u, g)) { - add_edge_if_not_present(g.start, v, g); - } -} - -vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { - // We operate on a temporary copy of the original graph here, so we don't - // have to mutate the original. - NGHolder g; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Utility functions related to SOM ("Start of Match"). + */ +#include "ng_som_util.h" + +#include "ng_depth.h" +#include "ng_execute.h" +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "util/container.h" +#include "util/graph_range.h" + +using namespace std; + +namespace ue2 { + +static +void wireSuccessorsToStart(NGHolder &g, NFAVertex u) { + for (auto v : adjacent_vertices_range(u, g)) { + add_edge_if_not_present(g.start, v, g); + } +} + +vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { + // We operate on a temporary copy of the original graph here, so we don't + // have to mutate the original. + NGHolder g; unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g - cloneHolder(g, g_orig, &vmap); - - vector<NFAVertex> vstarts; - for (auto v : vertices_range(g)) { - if (is_virtual_start(v, g)) { - vstarts.push_back(v); - } - } - vstarts.push_back(g.startDs); - - // wire the successors of every virtual start or startDs to g.start. - for (auto v : vstarts) { - wireSuccessorsToStart(g, v); - } - - // drop the in-edges of every virtual start so that they don't participate - // in the depth calculation. - for (auto v : vstarts) { - clear_in_edges(v, g); - } - + cloneHolder(g, g_orig, &vmap); + + vector<NFAVertex> vstarts; + for (auto v : vertices_range(g)) { + if (is_virtual_start(v, g)) { + vstarts.push_back(v); + } + } + vstarts.push_back(g.startDs); + + // wire the successors of every virtual start or startDs to g.start. + for (auto v : vstarts) { + wireSuccessorsToStart(g, v); + } + + // drop the in-edges of every virtual start so that they don't participate + // in the depth calculation. + for (auto v : vstarts) { + clear_in_edges(v, g); + } + //dumpGraph("som_depth.dot", g); - + // Find depths, indexed by vertex index in g auto temp_depths = calcDepthsFrom(g, g.start); - - // Transfer depths, indexed by vertex index in g_orig. - vector<DepthMinMax> depths(num_vertices(g_orig)); - - for (auto v_orig : vertices_range(g_orig)) { - assert(contains(vmap, v_orig)); - NFAVertex v_new = vmap[v_orig]; - - u32 orig_idx = g_orig[v_orig].index; - - DepthMinMax &d = depths.at(orig_idx); - - if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { - // StartDs and virtual starts always have zero depth. + + // Transfer depths, indexed by vertex index in g_orig. + vector<DepthMinMax> depths(num_vertices(g_orig)); + + for (auto v_orig : vertices_range(g_orig)) { + assert(contains(vmap, v_orig)); + NFAVertex v_new = vmap[v_orig]; + + u32 orig_idx = g_orig[v_orig].index; + + DepthMinMax &d = depths.at(orig_idx); + + if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { + // StartDs and virtual starts always have zero depth. d = DepthMinMax(depth(0), depth(0)); - } else { - u32 new_idx = g[v_new].index; - d = temp_depths.at(new_idx); - } - } - - return depths; -} - -bool firstMatchIsFirst(const NGHolder &p) { - /* If the first match (by end offset) is not the first match (by start - * offset) then we can't create a lock after it. - * - * Consider: 4009:/(foobar|ob).*bugger/s - * - * We don't care about races on the last byte as they can be resolved easily - * at runtime /(foobar|obar).*hi/ - * - * It should be obvious we don't care about one match being a prefix - * of another as they share the same start offset. - * - * Therefore, the case were we cannot establish that the som does not - * regress is when there exists s1 and s2 in the language of p and s2 is a - * proper infix of s1. - * - * It is tempting to add the further restriction that there does not exist a - * prefix of s1 that is in the language of p (as in which case we would - * presume, the lock has already been set). However, we have no way of - * knowing if the lock can be cleared by some characters, and if so, if it - * is still set. TODO: if we knew the lock's escapes where we could verify - * that the rest of s1 does not clear the lock. (1) - */ - - DEBUG_PRINTF("entry\n"); - - /* If there are any big cycles throw up our hands in despair */ - if (hasBigCycles(p)) { - DEBUG_PRINTF("fail, big cycles\n"); - return false; - } - + } else { + u32 new_idx = g[v_new].index; + d = temp_depths.at(new_idx); + } + } + + return depths; +} + +bool firstMatchIsFirst(const NGHolder &p) { + /* If the first match (by end offset) is not the first match (by start + * offset) then we can't create a lock after it. + * + * Consider: 4009:/(foobar|ob).*bugger/s + * + * We don't care about races on the last byte as they can be resolved easily + * at runtime /(foobar|obar).*hi/ + * + * It should be obvious we don't care about one match being a prefix + * of another as they share the same start offset. + * + * Therefore, the case were we cannot establish that the som does not + * regress is when there exists s1 and s2 in the language of p and s2 is a + * proper infix of s1. + * + * It is tempting to add the further restriction that there does not exist a + * prefix of s1 that is in the language of p (as in which case we would + * presume, the lock has already been set). However, we have no way of + * knowing if the lock can be cleared by some characters, and if so, if it + * is still set. TODO: if we knew the lock's escapes where we could verify + * that the rest of s1 does not clear the lock. (1) + */ + + DEBUG_PRINTF("entry\n"); + + /* If there are any big cycles throw up our hands in despair */ + if (hasBigCycles(p)) { + DEBUG_PRINTF("fail, big cycles\n"); + return false; + } + flat_set<NFAVertex> states; - /* turn on all states (except starts - avoid suffix matches) */ - /* If we were doing (1) we would also except states leading to accepts - - avoid prefix matches */ - for (auto v : vertices_range(p)) { - assert(!is_virtual_start(v, p)); - if (!is_special(v, p)) { + /* turn on all states (except starts - avoid suffix matches) */ + /* If we were doing (1) we would also except states leading to accepts - + avoid prefix matches */ + for (auto v : vertices_range(p)) { + assert(!is_virtual_start(v, p)); + if (!is_special(v, p)) { DEBUG_PRINTF("turning on %zu\n", p[v].index); - states.insert(v); - } - } - - /* run the prefix the main graph */ - states = execute_graph(p, p, states); - - for (auto v : states) { - /* need to check if this vertex may represent an infix match - ie - * it does not have an edge to accept. */ + states.insert(v); + } + } + + /* run the prefix the main graph */ + states = execute_graph(p, p, states); + + for (auto v : states) { + /* need to check if this vertex may represent an infix match - ie + * it does not have an edge to accept. */ DEBUG_PRINTF("check %zu\n", p[v].index); - if (!edge(v, p.accept, p).second) { + if (!edge(v, p.accept, p).second) { DEBUG_PRINTF("fail %zu\n", p[v].index); - return false; - } - } - - DEBUG_PRINTF("done first is first check\n"); - return true; -} - -bool somMayGoBackwards(NFAVertex u, const NGHolder &g, + return false; + } + } + + DEBUG_PRINTF("done first is first check\n"); + return true; +} + +bool somMayGoBackwards(NFAVertex u, const NGHolder &g, const unordered_map<NFAVertex, u32> ®ion_map, - smgb_cache &cache) { - /* Need to ensure all matches of the graph g up to u contain no infixes - * which are also matches of the graph to u. - * - * This is basically the same as firstMatchIsFirst except we g is not - * always a dag. As we haven't gotten around to writing an execute_graph - * that operates on general graphs, we take some (hopefully) conservative - * short cuts. - * - * Note: if the u can be jumped we will take jump edges - * into account as a possibility of som going backwards - * - * TODO: write a generalised ng_execute_graph/make this less hacky - */ - assert(&g == &cache.g); - if (contains(cache.smgb, u)) { - return cache.smgb[u]; - } - + smgb_cache &cache) { + /* Need to ensure all matches of the graph g up to u contain no infixes + * which are also matches of the graph to u. + * + * This is basically the same as firstMatchIsFirst except we g is not + * always a dag. As we haven't gotten around to writing an execute_graph + * that operates on general graphs, we take some (hopefully) conservative + * short cuts. + * + * Note: if the u can be jumped we will take jump edges + * into account as a possibility of som going backwards + * + * TODO: write a generalised ng_execute_graph/make this less hacky + */ + assert(&g == &cache.g); + if (contains(cache.smgb, u)) { + return cache.smgb[u]; + } + DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index); - - set<NFAEdge> be; - BackEdges<set<NFAEdge>> backEdgeVisitor(be); + + set<NFAEdge> be; + BackEdges<set<NFAEdge>> backEdgeVisitor(be); boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); - - bool rv; - if (0) { - exit: - DEBUG_PRINTF("using cached result\n"); - cache.smgb[u] = rv; - return rv; - } - - assert(contains(region_map, u)); - const u32 u_region = region_map.at(u); - - for (const auto &e : be) { - NFAVertex s = source(e, g); - NFAVertex t = target(e, g); - /* only need to worry about big cycles including/before u */ + + bool rv; + if (0) { + exit: + DEBUG_PRINTF("using cached result\n"); + cache.smgb[u] = rv; + return rv; + } + + assert(contains(region_map, u)); + const u32 u_region = region_map.at(u); + + for (const auto &e : be) { + NFAVertex s = source(e, g); + NFAVertex t = target(e, g); + /* only need to worry about big cycles including/before u */ DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index); - if (s != t && region_map.at(s) <= u_region) { - DEBUG_PRINTF("eek big cycle\n"); - rv = true; /* big cycle -> eek */ - goto exit; - } - } - + if (s != t && region_map.at(s) <= u_region) { + DEBUG_PRINTF("eek big cycle\n"); + rv = true; /* big cycle -> eek */ + goto exit; + } + } + unordered_map<NFAVertex, NFAVertex> orig_to_copy; - NGHolder c_g; - cloneHolder(c_g, g, &orig_to_copy); - + NGHolder c_g; + cloneHolder(c_g, g, &orig_to_copy); + /* treat virtual starts as unconditional - wire to startDs instead */ - for (NFAVertex v : vertices_range(g)) { - if (!is_virtual_start(v, g)) { - continue; - } - NFAVertex c_v = orig_to_copy[v]; - orig_to_copy[v] = c_g.startDs; - for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) { - add_edge_if_not_present(c_g.startDs, c_w, c_g); - } - clear_vertex(c_v, c_g); - } - + for (NFAVertex v : vertices_range(g)) { + if (!is_virtual_start(v, g)) { + continue; + } + NFAVertex c_v = orig_to_copy[v]; + orig_to_copy[v] = c_g.startDs; + for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) { + add_edge_if_not_present(c_g.startDs, c_w, c_g); + } + clear_vertex(c_v, c_g); + } + /* treat u as the only accept state */ - NFAVertex c_u = orig_to_copy[u]; - clear_in_edges(c_g.acceptEod, c_g); - add_edge(c_g.accept, c_g.acceptEod, c_g); - clear_in_edges(c_g.accept, c_g); - clear_out_edges(c_u, c_g); - if (hasSelfLoop(u, g)) { - add_edge(c_u, c_u, c_g); - } - add_edge(c_u, c_g.accept, c_g); - - set<NFAVertex> u_succ; - insert(&u_succ, adjacent_vertices(u, g)); - u_succ.erase(u); - - for (auto t : inv_adjacent_vertices_range(u, g)) { - if (t == u) { - continue; - } - for (auto v : adjacent_vertices_range(t, g)) { - if (contains(u_succ, v)) { + NFAVertex c_u = orig_to_copy[u]; + clear_in_edges(c_g.acceptEod, c_g); + add_edge(c_g.accept, c_g.acceptEod, c_g); + clear_in_edges(c_g.accept, c_g); + clear_out_edges(c_u, c_g); + if (hasSelfLoop(u, g)) { + add_edge(c_u, c_u, c_g); + } + add_edge(c_u, c_g.accept, c_g); + + set<NFAVertex> u_succ; + insert(&u_succ, adjacent_vertices(u, g)); + u_succ.erase(u); + + for (auto t : inv_adjacent_vertices_range(u, g)) { + if (t == u) { + continue; + } + for (auto v : adjacent_vertices_range(t, g)) { + if (contains(u_succ, v)) { /* due to virtual starts being aliased with normal starts in the * copy of the graph, we may have already added the edges. */ add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g); - break; - } - } - } - - pruneUseless(c_g); - - be.clear(); + break; + } + } + } + + pruneUseless(c_g); + + be.clear(); boost::depth_first_search(c_g, visitor(backEdgeVisitor) .root_vertex(c_g.start)); - - for (const auto &e : be) { - NFAVertex s = source(e, c_g); - NFAVertex t = target(e, c_g); + + for (const auto &e : be) { + NFAVertex s = source(e, c_g); + NFAVertex t = target(e, c_g); DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); - if (s != t) { - assert(0); - DEBUG_PRINTF("eek big cycle\n"); - rv = true; /* big cycle -> eek */ - goto exit; - } - } - - DEBUG_PRINTF("checking acyclic+selfloop graph\n"); - - rv = !firstMatchIsFirst(c_g); - DEBUG_PRINTF("som may regress? %d\n", (int)rv); - goto exit; -} - -bool sentClearsTail(const NGHolder &g, + if (s != t) { + assert(0); + DEBUG_PRINTF("eek big cycle\n"); + rv = true; /* big cycle -> eek */ + goto exit; + } + } + + DEBUG_PRINTF("checking acyclic+selfloop graph\n"); + + rv = !firstMatchIsFirst(c_g); + DEBUG_PRINTF("som may regress? %d\n", (int)rv); + goto exit; +} + +bool sentClearsTail(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ion_map, - const NGHolder &sent, u32 last_head_region, - u32 *bad_region) { - /* if a subsequent match from the prefix clears the rest of the pattern - * we can just keep track of the last match of the prefix. - * To see if this property holds, we could: - * - * 1A: turn on all states in the tail and run all strings that may - * match the prefix past the tail, if we are still in any states then - * this property does not hold. - * - * 1B: we turn on the initial states of the tail and run any strings which - * may finish any partial matches in the prefix and see if we end up with - * anything which would also imply that this property does not hold. - * - * OR - * - * 2: we just turn everything and run the prefix inputs past it and see what - * we are left with. I think that is equivalent to scheme 1 and is easier to - * implement. TODO: ponder - * - * Anyway, we are going with scheme 2 until further notice. - */ - - u32 first_bad_region = ~0U; + const NGHolder &sent, u32 last_head_region, + u32 *bad_region) { + /* if a subsequent match from the prefix clears the rest of the pattern + * we can just keep track of the last match of the prefix. + * To see if this property holds, we could: + * + * 1A: turn on all states in the tail and run all strings that may + * match the prefix past the tail, if we are still in any states then + * this property does not hold. + * + * 1B: we turn on the initial states of the tail and run any strings which + * may finish any partial matches in the prefix and see if we end up with + * anything which would also imply that this property does not hold. + * + * OR + * + * 2: we just turn everything and run the prefix inputs past it and see what + * we are left with. I think that is equivalent to scheme 1 and is easier to + * implement. TODO: ponder + * + * Anyway, we are going with scheme 2 until further notice. + */ + + u32 first_bad_region = ~0U; flat_set<NFAVertex> states; - /* turn on all states */ - DEBUG_PRINTF("region %u is cutover\n", last_head_region); - for (auto v : vertices_range(g)) { - if (v != g.accept && v != g.acceptEod) { - states.insert(v); - } - } - - for (UNUSED auto v : states) { + /* turn on all states */ + DEBUG_PRINTF("region %u is cutover\n", last_head_region); + for (auto v : vertices_range(g)) { + if (v != g.accept && v != g.acceptEod) { + states.insert(v); + } + } + + for (UNUSED auto v : states) { DEBUG_PRINTF("start state: %zu\n", g[v].index); - } - - /* run the prefix the main graph */ - states = execute_graph(g, sent, states); - - /* .. and check if we are left with anything in the tail region */ - for (auto v : states) { - if (v == g.start || v == g.startDs) { - continue; /* not in tail */ - } - + } + + /* run the prefix the main graph */ + states = execute_graph(g, sent, states); + + /* .. and check if we are left with anything in the tail region */ + for (auto v : states) { + if (v == g.start || v == g.startDs) { + continue; /* not in tail */ + } + DEBUG_PRINTF("v %zu is still on\n", g[v].index); - assert(v != g.accept && v != g.acceptEod); /* no cr */ - - assert(contains(region_map, v)); - const u32 v_region = region_map.at(v); - if (v_region > last_head_region) { - DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region); - first_bad_region = min(first_bad_region, v_region); - } - } - - if (first_bad_region != ~0U) { - DEBUG_PRINTF("first bad region is %u\n", first_bad_region); - *bad_region = first_bad_region; - return false; - } - - return true; -} - -} // namespace ue2 + assert(v != g.accept && v != g.acceptEod); /* no cr */ + + assert(contains(region_map, v)); + const u32 v_region = region_map.at(v); + if (v_region > last_head_region) { + DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region); + first_bad_region = min(first_bad_region, v_region); + } + } + + if (first_bad_region != ~0U) { + DEBUG_PRINTF("first bad region is %u\n", first_bad_region); + *bad_region = first_bad_region; + return false; + } + + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h index e2d38642c4..3f4fcb5b3a 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h @@ -1,84 +1,84 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Utility functions related to SOM ("Start of Match"). - */ - -#ifndef NG_SOM_UTIL_H -#define NG_SOM_UTIL_H - -#include "ng_util.h" -#include "util/depth.h" - -#include <map> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Utility functions related to SOM ("Start of Match"). + */ + +#ifndef NG_SOM_UTIL_H +#define NG_SOM_UTIL_H + +#include "ng_util.h" +#include "util/depth.h" + +#include <map> #include <unordered_map> -#include <vector> - -namespace ue2 { - -class NGHolder; - -/** - * Returns min/max distance from start of match, index by vertex_id. - */ -std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g); - -/** - * Returns true if the first match by end-offset must always be the first match - * by start-offset. - */ -bool firstMatchIsFirst(const NGHolder &p); - -struct smgb_cache : public mbsb_cache { - explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {} - std::map<NFAVertex, bool> smgb; -}; - -bool somMayGoBackwards(NFAVertex u, const NGHolder &g, +#include <vector> + +namespace ue2 { + +class NGHolder; + +/** + * Returns min/max distance from start of match, index by vertex_id. + */ +std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g); + +/** + * Returns true if the first match by end-offset must always be the first match + * by start-offset. + */ +bool firstMatchIsFirst(const NGHolder &p); + +struct smgb_cache : public mbsb_cache { + explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {} + std::map<NFAVertex, bool> smgb; +}; + +bool somMayGoBackwards(NFAVertex u, const NGHolder &g, const std::unordered_map<NFAVertex, u32> ®ion_map, - smgb_cache &cache); - -/** - * Returns true if matching 'sent' causes all tail states in the main graph \a - * g to go dead. A tail state is any state with a region greater than - * \a last_head_region. - * - * - The graph \a sent must be a "kinda-DAG", where the only back-edges present - * are self-loops. - * - If the result is false, \a bad_region will be updated with the smallest - * region ID associated with a tail state that is still on. - */ -bool sentClearsTail(const NGHolder &g, + smgb_cache &cache); + +/** + * Returns true if matching 'sent' causes all tail states in the main graph \a + * g to go dead. A tail state is any state with a region greater than + * \a last_head_region. + * + * - The graph \a sent must be a "kinda-DAG", where the only back-edges present + * are self-loops. + * - If the result is false, \a bad_region will be updated with the smallest + * region ID associated with a tail state that is still on. + */ +bool sentClearsTail(const NGHolder &g, const std::unordered_map<NFAVertex, u32> ®ion_map, - const NGHolder &sent, u32 last_head_region, - u32 *bad_region); - -} // namespace ue2 - -#endif // NG_SOM_UTIL_H + const NGHolder &sent, u32 last_head_region, + u32 *bad_region); + +} // namespace ue2 + +#endif // NG_SOM_UTIL_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp index 91a099fc38..73170a9104 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp @@ -1,244 +1,244 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for splitting NFAGraphs into LHS and RHS. - */ -#include "ng_split.h" - -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_util.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" - -#include <map> -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -static -void clearAccepts(NGHolder &g) { - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - g[v].reports.clear(); - } - - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - g[v].reports.clear(); - } - - clear_in_edges(g.accept, g); - clear_in_edges(g.acceptEod, g); - add_edge(g.accept, g.acceptEod, g); -} - -static + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for splitting NFAGraphs into LHS and RHS. + */ +#include "ng_split.h" + +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_util.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" + +#include <map> +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +static +void clearAccepts(NGHolder &g) { + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + g[v].reports.clear(); + } + + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + g[v].reports.clear(); + } + + clear_in_edges(g.accept, g); + clear_in_edges(g.acceptEod, g); + add_edge(g.accept, g.acceptEod, g); +} + +static void filterSplitMap(const NGHolder &g, unordered_map<NFAVertex, NFAVertex> *out_map) { unordered_set<NFAVertex> verts; - insert(&verts, vertices(g)); + insert(&verts, vertices(g)); auto it = out_map->begin(); - while (it != out_map->end()) { + while (it != out_map->end()) { auto jt = it; - ++it; - if (!contains(verts, jt->second)) { - out_map->erase(jt); - } - } -} - -static -void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, + ++it; + if (!contains(verts, jt->second)) { + out_map->erase(jt); + } + } +} + +static +void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, const vector<NFAVertex> &rhs_pivots, NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) { - assert(lhs && lhs_map); - - cloneHolder(*lhs, base, lhs_map); - - clearAccepts(*lhs); - - for (auto pivot : pivots) { + assert(lhs && lhs_map); + + cloneHolder(*lhs, base, lhs_map); + + clearAccepts(*lhs); + + for (auto pivot : pivots) { DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index, - num_vertices(*lhs), lhs_map->size()); - assert(contains(*lhs_map, pivot)); - - for (auto v : rhs_pivots) { - assert(contains(*lhs_map, v)); - remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs); - } - - (*lhs)[(*lhs_map)[pivot]].reports.insert(0); - add_edge((*lhs_map)[pivot], lhs->accept, *lhs); - } - + num_vertices(*lhs), lhs_map->size()); + assert(contains(*lhs_map, pivot)); + + for (auto v : rhs_pivots) { + assert(contains(*lhs_map, v)); + remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs); + } + + (*lhs)[(*lhs_map)[pivot]].reports.insert(0); + add_edge((*lhs_map)[pivot], lhs->accept, *lhs); + } + /* should do the renumbering unconditionally as we know edges are already * misnumbered */ pruneUseless(*lhs, false); renumber_edges(*lhs); renumber_vertices(*lhs); - filterSplitMap(*lhs, lhs_map); - - switch (base.kind) { - case NFA_PREFIX: - case NFA_OUTFIX: - lhs->kind = NFA_PREFIX; - break; - case NFA_INFIX: - case NFA_SUFFIX: - lhs->kind = NFA_INFIX; - break; + filterSplitMap(*lhs, lhs_map); + + switch (base.kind) { + case NFA_PREFIX: + case NFA_OUTFIX: + lhs->kind = NFA_PREFIX; + break; + case NFA_INFIX: + case NFA_SUFFIX: + lhs->kind = NFA_INFIX; + break; case NFA_EAGER_PREFIX: /* Current code should not be assigning eager until well after all the * splitting is done. */ assert(0); lhs->kind = NFA_EAGER_PREFIX; break; - case NFA_REV_PREFIX: + case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: - assert(0); - break; - } -} - -void splitLHS(const NGHolder &base, NFAVertex pivot, + assert(0); + break; + } +} + +void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) { - vector<NFAVertex> pivots(1, pivot); - vector<NFAVertex> rhs_pivots; - insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base)); - splitLHS(base, pivots, rhs_pivots, lhs, lhs_map); -} - -void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots, + vector<NFAVertex> pivots(1, pivot); + vector<NFAVertex> rhs_pivots; + insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base)); + splitLHS(base, pivots, rhs_pivots, lhs, lhs_map); +} + +void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots, NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { - assert(rhs && rhs_map); - - cloneHolder(*rhs, base, rhs_map); - - clear_out_edges(rhs->start, *rhs); - clear_out_edges(rhs->startDs, *rhs); - add_edge(rhs->start, rhs->startDs, *rhs); - add_edge(rhs->startDs, rhs->startDs, *rhs); - - for (auto pivot : pivots) { - assert(contains(*rhs_map, pivot)); + assert(rhs && rhs_map); + + cloneHolder(*rhs, base, rhs_map); + + clear_out_edges(rhs->start, *rhs); + clear_out_edges(rhs->startDs, *rhs); + add_edge(rhs->start, rhs->startDs, *rhs); + add_edge(rhs->startDs, rhs->startDs, *rhs); + + for (auto pivot : pivots) { + assert(contains(*rhs_map, pivot)); NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); (*rhs)[e].tops.insert(DEFAULT_TOP); - } + } /* should do the renumbering unconditionally as we know edges are already * misnumbered */ pruneUseless(*rhs, false); renumber_edges(*rhs); renumber_vertices(*rhs); - filterSplitMap(*rhs, rhs_map); - - switch (base.kind) { - case NFA_PREFIX: - case NFA_INFIX: - rhs->kind = NFA_INFIX; - break; - case NFA_SUFFIX: - case NFA_OUTFIX: - rhs->kind = NFA_SUFFIX; - break; + filterSplitMap(*rhs, rhs_map); + + switch (base.kind) { + case NFA_PREFIX: + case NFA_INFIX: + rhs->kind = NFA_INFIX; + break; + case NFA_SUFFIX: + case NFA_OUTFIX: + rhs->kind = NFA_SUFFIX; + break; case NFA_EAGER_PREFIX: /* Current code should not be assigning eager until well after all the * splitting is done. */ assert(0); rhs->kind = NFA_INFIX; break; - case NFA_REV_PREFIX: + case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: - assert(0); - break; - } -} - -/** \brief Fills \a succ with the common successors of the vertices in \a - * pivots. */ -static -void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots, - vector<NFAVertex> &succ) { - assert(!pivots.empty()); - + assert(0); + break; + } +} + +/** \brief Fills \a succ with the common successors of the vertices in \a + * pivots. */ +static +void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots, + vector<NFAVertex> &succ) { + assert(!pivots.empty()); + set<NFAVertex> adj; set<NFAVertex> adj_temp; - - insert(&adj, adjacent_vertices(pivots.at(0), g)); - - for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) { - NFAVertex pivot = *it; - adj_temp.clear(); - for (auto v : adjacent_vertices_range(pivot, g)) { - if (contains(adj, v)) { - adj_temp.insert(v); - } - } - adj.swap(adj_temp); - } - - succ.insert(succ.end(), adj.begin(), adj.end()); -} - -void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots, + + insert(&adj, adjacent_vertices(pivots.at(0), g)); + + for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) { + NFAVertex pivot = *it; + adj_temp.clear(); + for (auto v : adjacent_vertices_range(pivot, g)) { + if (contains(adj, v)) { + adj_temp.insert(v); + } + } + adj.swap(adj_temp); + } + + succ.insert(succ.end(), adj.begin(), adj.end()); +} + +void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots, NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { - DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); - - assert(!has_parallel_edge(base)); + DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); + + assert(!has_parallel_edge(base)); assert(isCorrectlyTopped(base)); - - /* RHS pivots are built from the common set of successors of pivots. */ - vector<NFAVertex> rhs_pivots; - findCommonSuccessors(base, pivots, rhs_pivots); - - /* generate lhs */ - splitLHS(base, pivots, rhs_pivots, lhs, lhs_map); - - /* generate the rhs */ - splitRHS(base, rhs_pivots, rhs, rhs_map); - - assert(!has_parallel_edge(*lhs)); - assert(!has_parallel_edge(*rhs)); + + /* RHS pivots are built from the common set of successors of pivots. */ + vector<NFAVertex> rhs_pivots; + findCommonSuccessors(base, pivots, rhs_pivots); + + /* generate lhs */ + splitLHS(base, pivots, rhs_pivots, lhs, lhs_map); + + /* generate the rhs */ + splitRHS(base, rhs_pivots, rhs, rhs_map); + + assert(!has_parallel_edge(*lhs)); + assert(!has_parallel_edge(*rhs)); assert(isCorrectlyTopped(*lhs)); assert(isCorrectlyTopped(*rhs)); -} - -void splitGraph(const NGHolder &base, NFAVertex pivot, +} + +void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { - vector<NFAVertex> pivots(1, pivot); - splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map); -} - -} // namespace ue2 + vector<NFAVertex> pivots(1, pivot); + splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.h b/contrib/libs/hyperscan/src/nfagraph/ng_split.h index 9ddc033257..3867cb76f6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_split.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.h @@ -1,76 +1,76 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for splitting NFAGraphs into LHS and RHS. - */ - -#ifndef NG_SPLIT_H -#define NG_SPLIT_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for splitting NFAGraphs into LHS and RHS. + */ + +#ifndef NG_SPLIT_H +#define NG_SPLIT_H + #include "ng_holder.h" #include <unordered_map> -#include <vector> - -namespace ue2 { - -class NGHolder; - -/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in' - * allocated to rhs if they are reachable from the pivot. Conversely, a vertex - * is in the lhs if it is reachable from start without going through the - * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS. - * +#include <vector> + +namespace ue2 { + +class NGHolder; + +/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in' + * allocated to rhs if they are reachable from the pivot. Conversely, a vertex + * is in the lhs if it is reachable from start without going through the + * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS. + * * Note: The RHS is setup to be triggered by TOP 0 * - * When multiple split vertices are provided: - * - RHS contains all vertices reachable from every pivot - * - LHS contains all vertices which are reachable from start ignoring any - * vertices which have an edge to every pivot - */ -void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, + * When multiple split vertices are provided: + * - RHS contains all vertices reachable from every pivot + * - LHS contains all vertices which are reachable from start ignoring any + * vertices which have an edge to every pivot + */ +void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, std::unordered_map<NFAVertex, NFAVertex> *lhs_map, - NGHolder *rhs, + NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map); - -void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots, - NGHolder *lhs, + +void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots, + NGHolder *lhs, std::unordered_map<NFAVertex, NFAVertex> *lhs_map, - NGHolder *rhs, + NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map); - -void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, + +void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, std::unordered_map<NFAVertex, NFAVertex> *lhs_map); - -void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots, + +void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots, NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map); - -} // namespace ue2 - -#endif // NG_SPLIT_H + +} // namespace ue2 + +#endif // NG_SPLIT_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp index 03495d1441..ac788157b0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp @@ -1,324 +1,324 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph state squashing analysis. - * - * The basic idea behind the state squashing is that when we are in a cyclic - * state v there are certain other states which are completely irrelevant. This - * is used primarily by the determinisation process to produce smaller DFAs by - * not tracking irrelevant states. It's also used by the LimEx NFA model. - * - * Working out which states we can ignore mainly uses the post-dominator - * analysis. - * - * ### Dot Squash Masks: - * - * The following vertices are added to the squash mask: - * - (1) Any vertex post-dominated by the cyclic dot state - * - (2) Any other vertex post-dominated by the cyclic dot state's successors - * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state - - * provided the predecessor's successors are a subset of the cyclic state's - * successors [For (3), the term successor also includes report information] - * - * (2) and (3) allow us to get squash masks from .* as well as .+ - * - * The squash masks are not optimal especially in the case where there - * alternations on both sides - for example in: - * - * /foo(bar|baz).*(abc|xyz)/s - * - * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators - * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to - * do a more complete analysis based on cutting the graph and seeing which - * vertices are unreachable but the current approach is quick and probably - * adequate. - * - * - * ### Non-Dot Squash Masks: - * - * As for dot states. However, if anything in a pdom tree falls outside the - * character range of the cyclic state the whole pdom tree is ignored. Also when - * considering the predecessor's pdom tree it is necessary to verify that the - * predecessor's character reachability falls within that of the cyclic state. - * - * We could do better in this case by not throwing away the whole pdom tree - - * however the bits which we can keep are not clear from the pdom tree of the - * cyclic state - it probably can be based on the dom or pdom tree of the bad - * vertex. - * - * An example of us doing badly is: - * - * /HTTP.*Referer[^\n]*google/s - * - * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able - * to squash 'Referer'. - * - * ### Extension: - * - * If a state leads solely to a squashable state (or its immediate successors) - * with the same reachability we can make this state a squash state of any of - * the original states squashees which we postdominate. Could probably tighten - * this up but it would require thought. May not need to keep the original - * squasher around but that would also require thought. - * - * ### SOM Notes: - * - * If (left) start of match is required, it is illegal to squash any state which - * may result in an early start of match reaching the squashing state. - */ - -#include "config.h" - -#include "ng_squash.h" - -#include "ng_dominators.h" -#include "ng_dump.h" -#include "ng_holder.h" -#include "ng_prune.h" -#include "ng_region.h" -#include "ng_som_util.h" -#include "ng_util.h" -#include "util/container.h" -#include "util/graph_range.h" -#include "util/report_manager.h" -#include "ue2common.h" - -#include <deque> -#include <map> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph state squashing analysis. + * + * The basic idea behind the state squashing is that when we are in a cyclic + * state v there are certain other states which are completely irrelevant. This + * is used primarily by the determinisation process to produce smaller DFAs by + * not tracking irrelevant states. It's also used by the LimEx NFA model. + * + * Working out which states we can ignore mainly uses the post-dominator + * analysis. + * + * ### Dot Squash Masks: + * + * The following vertices are added to the squash mask: + * - (1) Any vertex post-dominated by the cyclic dot state + * - (2) Any other vertex post-dominated by the cyclic dot state's successors + * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state - + * provided the predecessor's successors are a subset of the cyclic state's + * successors [For (3), the term successor also includes report information] + * + * (2) and (3) allow us to get squash masks from .* as well as .+ + * + * The squash masks are not optimal especially in the case where there + * alternations on both sides - for example in: + * + * /foo(bar|baz).*(abc|xyz)/s + * + * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators + * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to + * do a more complete analysis based on cutting the graph and seeing which + * vertices are unreachable but the current approach is quick and probably + * adequate. + * + * + * ### Non-Dot Squash Masks: + * + * As for dot states. However, if anything in a pdom tree falls outside the + * character range of the cyclic state the whole pdom tree is ignored. Also when + * considering the predecessor's pdom tree it is necessary to verify that the + * predecessor's character reachability falls within that of the cyclic state. + * + * We could do better in this case by not throwing away the whole pdom tree - + * however the bits which we can keep are not clear from the pdom tree of the + * cyclic state - it probably can be based on the dom or pdom tree of the bad + * vertex. + * + * An example of us doing badly is: + * + * /HTTP.*Referer[^\n]*google/s + * + * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able + * to squash 'Referer'. + * + * ### Extension: + * + * If a state leads solely to a squashable state (or its immediate successors) + * with the same reachability we can make this state a squash state of any of + * the original states squashees which we postdominate. Could probably tighten + * this up but it would require thought. May not need to keep the original + * squasher around but that would also require thought. + * + * ### SOM Notes: + * + * If (left) start of match is required, it is illegal to squash any state which + * may result in an early start of match reaching the squashing state. + */ + +#include "config.h" + +#include "ng_squash.h" + +#include "ng_dominators.h" +#include "ng_dump.h" +#include "ng_holder.h" +#include "ng_prune.h" +#include "ng_region.h" +#include "ng_som_util.h" +#include "ng_util.h" +#include "util/container.h" +#include "util/graph_range.h" +#include "util/report_manager.h" +#include "ue2common.h" + +#include <deque> +#include <map> #include <unordered_map> #include <unordered_set> - -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/reverse_graph.hpp> - -using namespace std; - -namespace ue2 { - + +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/reverse_graph.hpp> + +using namespace std; + +namespace ue2 { + using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>; - -static + +static PostDomTree buildPDomTree(const NGHolder &g) { PostDomTree tree; tree.reserve(num_vertices(g)); - + auto postdominators = findPostDominators(g); - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - NFAVertex pdom = postdominators[v]; - if (pdom) { + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + NFAVertex pdom = postdominators[v]; + if (pdom) { DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index); - tree[pdom].insert(v); - } - } + tree[pdom].insert(v); + } + } return tree; -} - -/** - * Builds a squash mask based on the pdom tree of v and the given char reach. - * The built squash mask is a bit conservative for non-dot cases and could - * be improved with a bit of thought. - */ -static -void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, - const CharReach &cr, const NFAStateSet &init, - const vector<NFAVertex> &vByIndex, const PostDomTree &tree, - som_type som, const vector<DepthMinMax> &som_depths, +} + +/** + * Builds a squash mask based on the pdom tree of v and the given char reach. + * The built squash mask is a bit conservative for non-dot cases and could + * be improved with a bit of thought. + */ +static +void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, + const CharReach &cr, const NFAStateSet &init, + const vector<NFAVertex> &vByIndex, const PostDomTree &tree, + som_type som, const vector<DepthMinMax> &som_depths, const unordered_map<NFAVertex, u32> ®ion_map, - smgb_cache &cache) { + smgb_cache &cache) { DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); - - vector<NFAVertex> q; - + + vector<NFAVertex> q; + auto it = tree.find(v); - if (it != tree.end()) { - q.insert(q.end(), it->second.begin(), it->second.end()); - } - - const u32 v_index = g[v].index; - - while (!q.empty()) { - NFAVertex u = q.back(); - q.pop_back(); - const CharReach &cru = g[u].char_reach; - - if ((cru & ~cr).any()) { - /* bail: bad cr on vertex u */ - /* TODO: this could be better - * - * we still need to ensure that we record any paths leading to u. - * Hence all vertices R which can reach u must be excluded from the - * squash mask. Note: R != pdom(u) and there may exist an x in (R - - * pdom(u)) which is in pdom(y) where y is in q. Clear ? - */ - mask.set(); - return; - } - - const u32 u_index = g[u].index; - - if (som) { - /* We cannot add a state u to the squash mask of v if it may have an - * earlier start of match offset. ie for us to add a state u to v - * maxSomDist(u) <= minSomDist(v) - */ - const depth &max_som_dist_u = som_depths[u_index].max; - const depth &min_som_dist_v = som_depths[v_index].min; - - if (max_som_dist_u.is_infinite()) { - /* it is hard to tell due to the INF if u can actually store an - * earlier SOM than w (state we are building the squash mask - * for) - need to think more deeply - */ - - if (mustBeSetBefore(u, v, g, cache) - && !somMayGoBackwards(u, g, region_map, cache)) { - DEBUG_PRINTF("u %u v %u\n", u_index, v_index); - goto squash_ok; - } - } - - if (max_som_dist_u > min_som_dist_v) { - /* u can't be squashed as it may be storing an earlier SOM */ - goto add_children_to_queue; - } - - } - - squash_ok: - mask.set(u_index); - DEBUG_PRINTF("pdom'ed %u\n", u_index); - add_children_to_queue: - it = tree.find(u); - if (it != tree.end()) { - q.insert(q.end(), it->second.begin(), it->second.end()); - } - } - - if (cr.all()) { - /* the init states aren't in the pdom tree. If all their succ states - * are set (or v), we can consider them post dominated */ - - /* Note: init states will always result in a later som */ - for (size_t i = init.find_first(); i != init.npos; - i = init.find_next(i)) { - /* Yes vacuous patterns do exist */ - NFAVertex iv = vByIndex[i]; - for (auto w : adjacent_vertices_range(iv, g)) { - if (w == g.accept || w == g.acceptEod) { - DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i); - goto next_init_state; - } - - u32 vert_id = g[w].index; - if (w != iv && w != v && !mask.test(vert_id)) { - DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id); - goto next_init_state; - } - } - DEBUG_PRINTF("pdom'ed %zu\n", i); - mask.set(i); - next_init_state:; - } - } - - mask.flip(); -} - -static -void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) { - for (auto w : adjacent_vertices_range(v, g)) { - if (!is_special(w, g)) { - succ.set(g[w].index); - } - } -} - -static -void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g)) { - pred.set(g[u].index); - } - } -} - -static -void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, - const PostDomTree &pdom_tree, const NFAStateSet &init, + if (it != tree.end()) { + q.insert(q.end(), it->second.begin(), it->second.end()); + } + + const u32 v_index = g[v].index; + + while (!q.empty()) { + NFAVertex u = q.back(); + q.pop_back(); + const CharReach &cru = g[u].char_reach; + + if ((cru & ~cr).any()) { + /* bail: bad cr on vertex u */ + /* TODO: this could be better + * + * we still need to ensure that we record any paths leading to u. + * Hence all vertices R which can reach u must be excluded from the + * squash mask. Note: R != pdom(u) and there may exist an x in (R - + * pdom(u)) which is in pdom(y) where y is in q. Clear ? + */ + mask.set(); + return; + } + + const u32 u_index = g[u].index; + + if (som) { + /* We cannot add a state u to the squash mask of v if it may have an + * earlier start of match offset. ie for us to add a state u to v + * maxSomDist(u) <= minSomDist(v) + */ + const depth &max_som_dist_u = som_depths[u_index].max; + const depth &min_som_dist_v = som_depths[v_index].min; + + if (max_som_dist_u.is_infinite()) { + /* it is hard to tell due to the INF if u can actually store an + * earlier SOM than w (state we are building the squash mask + * for) - need to think more deeply + */ + + if (mustBeSetBefore(u, v, g, cache) + && !somMayGoBackwards(u, g, region_map, cache)) { + DEBUG_PRINTF("u %u v %u\n", u_index, v_index); + goto squash_ok; + } + } + + if (max_som_dist_u > min_som_dist_v) { + /* u can't be squashed as it may be storing an earlier SOM */ + goto add_children_to_queue; + } + + } + + squash_ok: + mask.set(u_index); + DEBUG_PRINTF("pdom'ed %u\n", u_index); + add_children_to_queue: + it = tree.find(u); + if (it != tree.end()) { + q.insert(q.end(), it->second.begin(), it->second.end()); + } + } + + if (cr.all()) { + /* the init states aren't in the pdom tree. If all their succ states + * are set (or v), we can consider them post dominated */ + + /* Note: init states will always result in a later som */ + for (size_t i = init.find_first(); i != init.npos; + i = init.find_next(i)) { + /* Yes vacuous patterns do exist */ + NFAVertex iv = vByIndex[i]; + for (auto w : adjacent_vertices_range(iv, g)) { + if (w == g.accept || w == g.acceptEod) { + DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i); + goto next_init_state; + } + + u32 vert_id = g[w].index; + if (w != iv && w != v && !mask.test(vert_id)) { + DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id); + goto next_init_state; + } + } + DEBUG_PRINTF("pdom'ed %zu\n", i); + mask.set(i); + next_init_state:; + } + } + + mask.flip(); +} + +static +void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) { + for (auto w : adjacent_vertices_range(v, g)) { + if (!is_special(w, g)) { + succ.set(g[w].index); + } + } +} + +static +void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_special(u, g)) { + pred.set(g[u].index); + } + } +} + +static +void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, + const PostDomTree &pdom_tree, const NFAStateSet &init, unordered_map<NFAVertex, NFAStateSet> *squash, som_type som, const vector<DepthMinMax> &som_depths, const unordered_map<NFAVertex, u32> ®ion_map, - smgb_cache &cache) { - deque<NFAVertex> remaining; - for (const auto &m : *squash) { - remaining.push_back(m.first); - } - - while (!remaining.empty()) { - NFAVertex v = remaining.back(); - remaining.pop_back(); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (is_special(u, g)) { - continue; - } - - if (g[v].char_reach != g[u].char_reach) { - continue; - } - - if (out_degree(u, g) != 1) { - continue; - } - - NFAStateSet u_squash(init.size()); + smgb_cache &cache) { + deque<NFAVertex> remaining; + for (const auto &m : *squash) { + remaining.push_back(m.first); + } + + while (!remaining.empty()) { + NFAVertex v = remaining.back(); + remaining.pop_back(); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (is_special(u, g)) { + continue; + } + + if (g[v].char_reach != g[u].char_reach) { + continue; + } + + if (out_degree(u, g) != 1) { + continue; + } + + NFAStateSet u_squash(init.size()); size_t u_index = g[u].index; - - buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, - pdom_tree, som, som_depths, region_map, cache); - - u_squash.set(u_index); /* never clear ourselves */ - - if ((~u_squash).any()) { // i.e. some bits unset in mask + + buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, + pdom_tree, som, som_depths, region_map, cache); + + u_squash.set(u_index); /* never clear ourselves */ + + if ((~u_squash).any()) { // i.e. some bits unset in mask DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index, - g[v].index); - (*squash)[u] = u_squash; - remaining.push_back(u); - } - } - } -} - + g[v].index); + (*squash)[u] = u_squash; + remaining.push_back(u); + } + } + } +} + /* If there are redundant states in the graph, it may be possible for two * sibling .* states to try to squash each other -- which should be prevented. * @@ -330,7 +330,7 @@ void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, for (auto it = squash.begin(); it != squash.end();) { NFAVertex a = it->first; u32 a_index = g[a].index; - + NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ for (auto b_index = a_squash.find_first(); b_index != a_squash.npos; b_index = a_squash.find_next(b_index)) { @@ -365,336 +365,336 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) { unordered_map<NFAVertex, NFAStateSet> squash; - // Number of bits to use for all our masks. If we're a triggered graph, - // tops have already been assigned, so we don't have to account for them. - const u32 numStates = num_vertices(g); - - // Build post-dominator tree. + // Number of bits to use for all our masks. If we're a triggered graph, + // tops have already been assigned, so we don't have to account for them. + const u32 numStates = num_vertices(g); + + // Build post-dominator tree. auto pdom_tree = buildPDomTree(g); - - // Build list of vertices by state ID and a set of init states. + + // Build list of vertices by state ID and a set of init states. vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex()); - NFAStateSet initStates(numStates); - smgb_cache cache(g); - - // Mappings used for SOM mode calculations, otherwise left empty. - unordered_map<NFAVertex, u32> region_map; - vector<DepthMinMax> som_depths; - if (som) { - region_map = assignRegions(g); - som_depths = getDistancesFromSOM(g); - } - - for (auto v : vertices_range(g)) { - const u32 vert_id = g[v].index; - DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates); - assert(vert_id < numStates); - vByIndex[vert_id] = v; - - if (is_any_start(v, g) || !in_degree(v, g)) { - initStates.set(vert_id); - } - } - - for (u32 i = 0; i < numStates; i++) { - NFAVertex v = vByIndex[i]; + NFAStateSet initStates(numStates); + smgb_cache cache(g); + + // Mappings used for SOM mode calculations, otherwise left empty. + unordered_map<NFAVertex, u32> region_map; + vector<DepthMinMax> som_depths; + if (som) { + region_map = assignRegions(g); + som_depths = getDistancesFromSOM(g); + } + + for (auto v : vertices_range(g)) { + const u32 vert_id = g[v].index; + DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates); + assert(vert_id < numStates); + vByIndex[vert_id] = v; + + if (is_any_start(v, g) || !in_degree(v, g)) { + initStates.set(vert_id); + } + } + + for (u32 i = 0; i < numStates; i++) { + NFAVertex v = vByIndex[i]; assert(v != NGHolder::null_vertex()); - const CharReach &cr = g[v].char_reach; - - /* only non-init cyclics can be squashers */ - if (!hasSelfLoop(v, g) || initStates.test(i)) { - continue; - } - - DEBUG_PRINTF("state %u is cyclic\n", i); - - NFAStateSet mask(numStates), succ(numStates), pred(numStates); - buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, - som_depths, region_map, cache); - buildSucc(succ, g, v); - buildPred(pred, g, v); - const auto &reports = g[v].reports; - - for (size_t j = succ.find_first(); j != succ.npos; - j = succ.find_next(j)) { - NFAVertex vj = vByIndex[j]; - NFAStateSet pred2(numStates); - buildPred(pred2, g, vj); - if (pred2 == pred) { - DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); - NFAStateSet tmp(numStates); - buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, - som, som_depths, region_map, cache); - mask &= tmp; - } - } - - for (size_t j = pred.find_first(); j != pred.npos; - j = pred.find_next(j)) { - NFAVertex vj = vByIndex[j]; - NFAStateSet succ2(numStates); - buildSucc(succ2, g, vj); - /* we can use j as a basis for squashing if its succs are a subset - * of ours */ - if ((succ2 & ~succ).any()) { - continue; - } - - if (som) { - /* We cannot use j to add to the squash mask of v if it may - * have an earlier start of match offset. ie for us j as a - * basis for the squash mask of v we require: - * maxSomDist(j) <= minSomDist(v) - */ - - /* ** TODO ** */ - - const depth &max_som_dist_j = - som_depths[g[vj].index].max; - const depth &min_som_dist_v = - som_depths[g[v].index].min; - if (max_som_dist_j > min_som_dist_v || - max_som_dist_j.is_infinite()) { - /* j can't be used as it may be storing an earlier SOM */ - continue; - } - } - - const CharReach &crv = g[vj].char_reach; - - /* we also require that j's report information be a subset of ours - */ - bool seen_special = false; - for (auto w : adjacent_vertices_range(vj, g)) { - if (is_special(w, g)) { - if (!edge(v, w, g).second) { - goto next_j; - } - seen_special = true; - } - } - - // FIXME: should be subset check? - if (seen_special && g[vj].reports != reports) { - continue; - } - - /* ok we can use j */ - if ((crv & ~cr).none()) { - NFAStateSet tmp(numStates); - buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, - som, som_depths, region_map, cache); - mask &= tmp; - mask.reset(j); - } - - next_j:; - } - - mask.set(i); /* never clear ourselves */ - - if ((~mask).any()) { // i.e. some bits unset in mask - DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count()); - squash.emplace(v, mask); - } - } - - findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, - som_depths, region_map, cache); - + const CharReach &cr = g[v].char_reach; + + /* only non-init cyclics can be squashers */ + if (!hasSelfLoop(v, g) || initStates.test(i)) { + continue; + } + + DEBUG_PRINTF("state %u is cyclic\n", i); + + NFAStateSet mask(numStates), succ(numStates), pred(numStates); + buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, + som_depths, region_map, cache); + buildSucc(succ, g, v); + buildPred(pred, g, v); + const auto &reports = g[v].reports; + + for (size_t j = succ.find_first(); j != succ.npos; + j = succ.find_next(j)) { + NFAVertex vj = vByIndex[j]; + NFAStateSet pred2(numStates); + buildPred(pred2, g, vj); + if (pred2 == pred) { + DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); + NFAStateSet tmp(numStates); + buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, + som, som_depths, region_map, cache); + mask &= tmp; + } + } + + for (size_t j = pred.find_first(); j != pred.npos; + j = pred.find_next(j)) { + NFAVertex vj = vByIndex[j]; + NFAStateSet succ2(numStates); + buildSucc(succ2, g, vj); + /* we can use j as a basis for squashing if its succs are a subset + * of ours */ + if ((succ2 & ~succ).any()) { + continue; + } + + if (som) { + /* We cannot use j to add to the squash mask of v if it may + * have an earlier start of match offset. ie for us j as a + * basis for the squash mask of v we require: + * maxSomDist(j) <= minSomDist(v) + */ + + /* ** TODO ** */ + + const depth &max_som_dist_j = + som_depths[g[vj].index].max; + const depth &min_som_dist_v = + som_depths[g[v].index].min; + if (max_som_dist_j > min_som_dist_v || + max_som_dist_j.is_infinite()) { + /* j can't be used as it may be storing an earlier SOM */ + continue; + } + } + + const CharReach &crv = g[vj].char_reach; + + /* we also require that j's report information be a subset of ours + */ + bool seen_special = false; + for (auto w : adjacent_vertices_range(vj, g)) { + if (is_special(w, g)) { + if (!edge(v, w, g).second) { + goto next_j; + } + seen_special = true; + } + } + + // FIXME: should be subset check? + if (seen_special && g[vj].reports != reports) { + continue; + } + + /* ok we can use j */ + if ((crv & ~cr).none()) { + NFAStateSet tmp(numStates); + buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, + som, som_depths, region_map, cache); + mask &= tmp; + mask.reset(j); + } + + next_j:; + } + + mask.set(i); /* never clear ourselves */ + + if ((~mask).any()) { // i.e. some bits unset in mask + DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count()); + squash.emplace(v, mask); + } + } + + findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, + som_depths, region_map, cache); + clearMutualSquashers(g, vByIndex, squash); - return squash; -} - -#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */ - -/** Some squash states are clearly not advantageous in the NFA, as they do - * incur the cost of an exception: - * -# acyclic states - * -# squash only a few acyclic states - */ -void filterSquashers(const NGHolder &g, + return squash; +} + +#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */ + +/** Some squash states are clearly not advantageous in the NFA, as they do + * incur the cost of an exception: + * -# acyclic states + * -# squash only a few acyclic states + */ +void filterSquashers(const NGHolder &g, unordered_map<NFAVertex, NFAStateSet> &squash) { assert(hasCorrectlyNumberedVertices(g)); - DEBUG_PRINTF("filtering\n"); + DEBUG_PRINTF("filtering\n"); vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */ - for (auto v : vertices_range(g)) { - rev[g[v].index] = v; - } - - for (auto v : vertices_range(g)) { - if (!contains(squash, v)) { - continue; - } + for (auto v : vertices_range(g)) { + rev[g[v].index] = v; + } + + for (auto v : vertices_range(g)) { + if (!contains(squash, v)) { + continue; + } DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index); - - if (!hasSelfLoop(v, g)) { - DEBUG_PRINTF("acyclic\n"); - squash.erase(v); - continue; - } - - NFAStateSet squashed = squash[v]; - squashed.flip(); /* default sense for mask of survivors */ + + if (!hasSelfLoop(v, g)) { + DEBUG_PRINTF("acyclic\n"); + squash.erase(v); + continue; + } + + NFAStateSet squashed = squash[v]; + squashed.flip(); /* default sense for mask of survivors */ for (auto sq = squashed.find_first(); sq != squashed.npos; sq = squashed.find_next(sq)) { - NFAVertex u = rev[sq]; - if (hasSelfLoop(u, g)) { - DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); - goto next_vertex; - } - } - - if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) { - DEBUG_PRINTF("squash set too small\n"); - squash.erase(v); - continue; - } - - next_vertex:; - DEBUG_PRINTF("squash set ok\n"); - } -} - -static -void getHighlanderReporters(const NGHolder &g, const NFAVertex accept, - const ReportManager &rm, - set<NFAVertex> &verts) { - for (auto v : inv_adjacent_vertices_range(accept, g)) { - if (v == g.accept) { - continue; - } - - const auto &reports = g[v].reports; - if (reports.empty()) { - assert(0); - continue; - } - - // Must be _all_ highlander callback reports. - for (auto report : reports) { - const Report &ir = rm.getReport(report); - if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) { - goto next_vertex; - } - - // If there's any bounds, these are handled outside the NFA and - // probably shouldn't be pre-empted. - if (ir.hasBounds()) { - goto next_vertex; - } - } - - verts.insert(v); - next_vertex: - continue; - } -} - -static -void removeEdgesToAccept(NGHolder &g, NFAVertex v) { - const auto &reports = g[v].reports; - assert(!reports.empty()); - - // We remove any accept edge with a non-empty subset of the reports of v. - - set<NFAEdge> dead; - - for (const auto &e : in_edges_range(g.accept, g)) { - NFAVertex u = source(e, g); - const auto &r = g[u].reports; - if (!r.empty() && is_subset_of(r, reports)) { + NFAVertex u = rev[sq]; + if (hasSelfLoop(u, g)) { + DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); + goto next_vertex; + } + } + + if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) { + DEBUG_PRINTF("squash set too small\n"); + squash.erase(v); + continue; + } + + next_vertex:; + DEBUG_PRINTF("squash set ok\n"); + } +} + +static +void getHighlanderReporters(const NGHolder &g, const NFAVertex accept, + const ReportManager &rm, + set<NFAVertex> &verts) { + for (auto v : inv_adjacent_vertices_range(accept, g)) { + if (v == g.accept) { + continue; + } + + const auto &reports = g[v].reports; + if (reports.empty()) { + assert(0); + continue; + } + + // Must be _all_ highlander callback reports. + for (auto report : reports) { + const Report &ir = rm.getReport(report); + if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) { + goto next_vertex; + } + + // If there's any bounds, these are handled outside the NFA and + // probably shouldn't be pre-empted. + if (ir.hasBounds()) { + goto next_vertex; + } + } + + verts.insert(v); + next_vertex: + continue; + } +} + +static +void removeEdgesToAccept(NGHolder &g, NFAVertex v) { + const auto &reports = g[v].reports; + assert(!reports.empty()); + + // We remove any accept edge with a non-empty subset of the reports of v. + + set<NFAEdge> dead; + + for (const auto &e : in_edges_range(g.accept, g)) { + NFAVertex u = source(e, g); + const auto &r = g[u].reports; + if (!r.empty() && is_subset_of(r, reports)) { DEBUG_PRINTF("vertex %zu\n", g[u].index); - dead.insert(e); - } - } - - for (const auto &e : in_edges_range(g.acceptEod, g)) { - NFAVertex u = source(e, g); - const auto &r = g[u].reports; - if (!r.empty() && is_subset_of(r, reports)) { + dead.insert(e); + } + } + + for (const auto &e : in_edges_range(g.acceptEod, g)) { + NFAVertex u = source(e, g); + const auto &r = g[u].reports; + if (!r.empty() && is_subset_of(r, reports)) { DEBUG_PRINTF("vertex %zu\n", g[u].index); - dead.insert(e); - } - } - - assert(!dead.empty()); - remove_edges(dead, g); -} - -static -vector<NFAVertex> findUnreachable(const NGHolder &g) { + dead.insert(e); + } + } + + assert(!dead.empty()); + remove_edges(dead, g); +} + +static +vector<NFAVertex> findUnreachable(const NGHolder &g) { const boost::reverse_graph<NGHolder, const NGHolder &> revg(g); - + unordered_map<NFAVertex, boost::default_color_type> colours; - colours.reserve(num_vertices(g)); - - depth_first_visit(revg, g.acceptEod, - make_dfs_visitor(boost::null_visitor()), - make_assoc_property_map(colours)); - - // Unreachable vertices are not in the colour map. - vector<NFAVertex> unreach; - for (auto v : vertices_range(revg)) { - if (!contains(colours, v)) { + colours.reserve(num_vertices(g)); + + depth_first_visit(revg, g.acceptEod, + make_dfs_visitor(boost::null_visitor()), + make_assoc_property_map(colours)); + + // Unreachable vertices are not in the colour map. + vector<NFAVertex> unreach; + for (auto v : vertices_range(revg)) { + if (!contains(colours, v)) { unreach.push_back(NFAVertex(v)); - } - } - return unreach; -} - -/** Populates squash masks for states that can be switched off by highlander - * (single match) reporters. */ + } + } + return unreach; +} + +/** Populates squash masks for states that can be switched off by highlander + * (single match) reporters. */ unordered_map<NFAVertex, NFAStateSet> -findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { +findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { unordered_map<NFAVertex, NFAStateSet> squash; - - set<NFAVertex> verts; - getHighlanderReporters(g, g.accept, rm, verts); - getHighlanderReporters(g, g.acceptEod, rm, verts); - if (verts.empty()) { - DEBUG_PRINTF("no highlander reports\n"); - return squash; - } - - const u32 numStates = num_vertices(g); - - for (auto v : verts) { + + set<NFAVertex> verts; + getHighlanderReporters(g, g.accept, rm, verts); + getHighlanderReporters(g, g.acceptEod, rm, verts); + if (verts.empty()) { + DEBUG_PRINTF("no highlander reports\n"); + return squash; + } + + const u32 numStates = num_vertices(g); + + for (auto v : verts) { DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index, - g[v].reports.size()); - - // Find the set of vertices that lead to v or any other reporter with a - // subset of v's reports. We do this by creating a copy of the graph, - // cutting the appropriate out-edges to accept and seeing which - // vertices become unreachable. - + g[v].reports.size()); + + // Find the set of vertices that lead to v or any other reporter with a + // subset of v's reports. We do this by creating a copy of the graph, + // cutting the appropriate out-edges to accept and seeing which + // vertices become unreachable. + unordered_map<NFAVertex, NFAVertex> orig_to_copy; - NGHolder h; - cloneHolder(h, g, &orig_to_copy); - removeEdgesToAccept(h, orig_to_copy[v]); - - vector<NFAVertex> unreach = findUnreachable(h); - DEBUG_PRINTF("can squash %zu vertices\n", unreach.size()); - if (unreach.empty()) { - continue; - } - - if (!contains(squash, v)) { - squash[v] = NFAStateSet(numStates); - squash[v].set(); - } - - NFAStateSet &mask = squash[v]; - - for (auto uv : unreach) { + NGHolder h; + cloneHolder(h, g, &orig_to_copy); + removeEdgesToAccept(h, orig_to_copy[v]); + + vector<NFAVertex> unreach = findUnreachable(h); + DEBUG_PRINTF("can squash %zu vertices\n", unreach.size()); + if (unreach.empty()) { + continue; + } + + if (!contains(squash, v)) { + squash[v] = NFAStateSet(numStates); + squash[v].set(); + } + + NFAStateSet &mask = squash[v]; + + for (auto uv : unreach) { DEBUG_PRINTF("squashes index %zu\n", h[uv].index); - mask.reset(h[uv].index); - } - } - - return squash; -} - -} // namespace ue2 + mask.reset(h[uv].index); + } + } + + return squash; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h index 489f541e84..16510ddd3a 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h @@ -1,72 +1,72 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph state squashing analysis. - */ -#ifndef NG_SQUASH_H -#define NG_SQUASH_H - -#include "ng_holder.h" -#include "som/som.h" -#include "ue2common.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph state squashing analysis. + */ +#ifndef NG_SQUASH_H +#define NG_SQUASH_H + +#include "ng_holder.h" +#include "som/som.h" +#include "ue2common.h" + #include <unordered_map> -#include <boost/dynamic_bitset.hpp> - -namespace ue2 { - -class NGHolder; -class ReportManager; - +#include <boost/dynamic_bitset.hpp> + +namespace ue2 { + +class NGHolder; +class ReportManager; + /** * Dynamically-sized bitset, as an NFA can have an arbitrary number of states. */ using NFAStateSet = boost::dynamic_bitset<>; - -/** - * Populates the squash mask for each vertex (i.e. the set of states to be left - * on during squashing). - * - * The NFAStateSet in the output map is indexed by vertex_index. - */ + +/** + * Populates the squash mask for each vertex (i.e. the set of states to be left + * on during squashing). + * + * The NFAStateSet in the output map is indexed by vertex_index. + */ std::unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som = SOM_NONE); - -/** Filters out squash states intended only for use in DFA construction. */ -void filterSquashers(const NGHolder &g, + +/** Filters out squash states intended only for use in DFA construction. */ +void filterSquashers(const NGHolder &g, std::unordered_map<NFAVertex, NFAStateSet> &squash); - -/** Populates squash masks for states that can be switched off by highlander - * (single match) reporters. */ + +/** Populates squash masks for states that can be switched off by highlander + * (single match) reporters. */ std::unordered_map<NFAVertex, NFAStateSet> -findHighlanderSquashers(const NGHolder &g, const ReportManager &rm); - -} // namespace ue2 - -#endif // NG_SQUASH_H +findHighlanderSquashers(const NGHolder &g, const ReportManager &rm); + +} // namespace ue2 + +#endif // NG_SQUASH_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp index 5e627bb593..446c2ba317 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp @@ -1,193 +1,193 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Stop Alphabet calculation. - */ -#include "ng_stop.h" - -#include "ng_depth.h" -#include "ng_holder.h" -#include "ng_misc_opt.h" -#include "ng_util.h" -#include "ue2common.h" -#include "nfa/castlecompile.h" -#include "som/som.h" -#include "util/charreach.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/verify_types.h" - -#include <map> -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -/** Stop alphabet depth threshold. */ -static const u32 MAX_STOP_DEPTH = 8; - -namespace { - -/** Depths from start, startDs for this graph. */ -struct InitDepths { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Stop Alphabet calculation. + */ +#include "ng_stop.h" + +#include "ng_depth.h" +#include "ng_holder.h" +#include "ng_misc_opt.h" +#include "ng_util.h" +#include "ue2common.h" +#include "nfa/castlecompile.h" +#include "som/som.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/verify_types.h" + +#include <map> +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +/** Stop alphabet depth threshold. */ +static const u32 MAX_STOP_DEPTH = 8; + +namespace { + +/** Depths from start, startDs for this graph. */ +struct InitDepths { explicit InitDepths(const NGHolder &g) : start(calcDepthsFrom(g, g.start)), startDs(calcDepthsFrom(g, g.startDs)) {} - - depth maxDist(const NGHolder &g, NFAVertex v) const { - u32 idx = g[v].index; - assert(idx < start.size() && idx < startDs.size()); - const depth &d_start = start.at(idx).max; - const depth &d_startDs = startDs.at(idx).max; - if (d_start.is_unreachable()) { - return d_startDs; - } else if (d_startDs.is_unreachable()) { - return d_start; - } - return max(d_start, d_startDs); - } - -private: - vector<DepthMinMax> start; - vector<DepthMinMax> startDs; -}; - -} // namespace - -/** Find the set of characters that are not present in the reachability of - * graph \p g after a certain depth (currently 8). If a character in this set - * is encountered, it means that the NFA is either dead or has not progressed + + depth maxDist(const NGHolder &g, NFAVertex v) const { + u32 idx = g[v].index; + assert(idx < start.size() && idx < startDs.size()); + const depth &d_start = start.at(idx).max; + const depth &d_startDs = startDs.at(idx).max; + if (d_start.is_unreachable()) { + return d_startDs; + } else if (d_startDs.is_unreachable()) { + return d_start; + } + return max(d_start, d_startDs); + } + +private: + vector<DepthMinMax> start; + vector<DepthMinMax> startDs; +}; + +} // namespace + +/** Find the set of characters that are not present in the reachability of + * graph \p g after a certain depth (currently 8). If a character in this set + * is encountered, it means that the NFA is either dead or has not progressed * more than 8 characters from its start states. * * This is only used to guide merging heuristics, use * findLeftOffsetStopAlphabet for real uses. */ -CharReach findStopAlphabet(const NGHolder &g, som_type som) { - const depth max_depth(MAX_STOP_DEPTH); - const InitDepths depths(g); - const map<NFAVertex, BoundedRepeatSummary> no_vertices; - - CharReach stopcr; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - if (depths.maxDist(g, v) >= max_depth) { - if (som == SOM_NONE) { - stopcr |= reduced_cr(v, g, no_vertices); - } else { - stopcr |= g[v].char_reach; - } - } - } - - // Turn alphabet into stops. - stopcr.flip(); - - return stopcr; -} - -/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then - * build an eight-bit mask per character C, with each bit representing the - * depth before the location of character C (if encountered) that the NFA would - * be in a predictable start state. */ -vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { - const depth max_depth(MAX_STOP_DEPTH); - const InitDepths depths(g); - const map<NFAVertex, BoundedRepeatSummary> no_vertices; - - vector<CharReach> reach(MAX_STOP_DEPTH); - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - CharReach v_cr; - if (som == SOM_NONE) { - v_cr = reduced_cr(v, g, no_vertices); - } else { - v_cr = g[v].char_reach; - } - - u32 d = min(max_depth, depths.maxDist(g, v)); - for (u32 i = 0; i < d; i++) { - reach[i] |= v_cr; - } - } - -#ifdef DEBUG - for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { - DEBUG_PRINTF("depth %u, stop chars: ", i); - describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); - printf("\n"); - } -#endif - - vector<u8> stop(N_CHARS, 0); - - for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { - CharReach cr = ~reach[i]; // invert reach for stop chars. - const u8 mask = 1U << i; - for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { - stop[c] |= mask; - } - } - - return stop; -} - -vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, - UNUSED som_type som) { - const depth max_width = findMaxWidth(castle); - DEBUG_PRINTF("castle has reach %s and max width %s\n", - describeClass(castle.reach()).c_str(), - max_width.str().c_str()); - - const CharReach escape = ~castle.reach(); // invert reach for stop chars. - - u32 d = min(max_width, depth(MAX_STOP_DEPTH)); - const u8 mask = verify_u8((1U << d) - 1); - - vector<u8> stop(N_CHARS, 0); - - for (size_t c = escape.find_first(); c != escape.npos; - c = escape.find_next(c)) { - stop[c] |= mask; - } - - return stop; -} - -} // namespace ue2 +CharReach findStopAlphabet(const NGHolder &g, som_type som) { + const depth max_depth(MAX_STOP_DEPTH); + const InitDepths depths(g); + const map<NFAVertex, BoundedRepeatSummary> no_vertices; + + CharReach stopcr; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + if (depths.maxDist(g, v) >= max_depth) { + if (som == SOM_NONE) { + stopcr |= reduced_cr(v, g, no_vertices); + } else { + stopcr |= g[v].char_reach; + } + } + } + + // Turn alphabet into stops. + stopcr.flip(); + + return stopcr; +} + +/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then + * build an eight-bit mask per character C, with each bit representing the + * depth before the location of character C (if encountered) that the NFA would + * be in a predictable start state. */ +vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { + const depth max_depth(MAX_STOP_DEPTH); + const InitDepths depths(g); + const map<NFAVertex, BoundedRepeatSummary> no_vertices; + + vector<CharReach> reach(MAX_STOP_DEPTH); + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + CharReach v_cr; + if (som == SOM_NONE) { + v_cr = reduced_cr(v, g, no_vertices); + } else { + v_cr = g[v].char_reach; + } + + u32 d = min(max_depth, depths.maxDist(g, v)); + for (u32 i = 0; i < d; i++) { + reach[i] |= v_cr; + } + } + +#ifdef DEBUG + for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { + DEBUG_PRINTF("depth %u, stop chars: ", i); + describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); + printf("\n"); + } +#endif + + vector<u8> stop(N_CHARS, 0); + + for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { + CharReach cr = ~reach[i]; // invert reach for stop chars. + const u8 mask = 1U << i; + for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { + stop[c] |= mask; + } + } + + return stop; +} + +vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, + UNUSED som_type som) { + const depth max_width = findMaxWidth(castle); + DEBUG_PRINTF("castle has reach %s and max width %s\n", + describeClass(castle.reach()).c_str(), + max_width.str().c_str()); + + const CharReach escape = ~castle.reach(); // invert reach for stop chars. + + u32 d = min(max_width, depth(MAX_STOP_DEPTH)); + const u8 mask = verify_u8((1U << d) - 1); + + vector<u8> stop(N_CHARS, 0); + + for (size_t c = escape.find_first(); c != escape.npos; + c = escape.find_next(c)) { + stop[c] |= mask; + } + + return stop; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h index 4a889dca09..8399047f7b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h @@ -1,66 +1,66 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Stop Alphabet calculation. - */ - -#ifndef NG_STOP_H -#define NG_STOP_H - -#include "ue2common.h" -#include "som/som.h" - -#include <vector> - -namespace ue2 { - -struct CastleProto; -class CharReach; -class NGHolder; - -/** Find the set of characters that are not present in the reachability of - * graph \p g after a certain depth (currently 8). If a character in this set - * is encountered, it means that the NFA is either dead or has not progressed + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Stop Alphabet calculation. + */ + +#ifndef NG_STOP_H +#define NG_STOP_H + +#include "ue2common.h" +#include "som/som.h" + +#include <vector> + +namespace ue2 { + +struct CastleProto; +class CharReach; +class NGHolder; + +/** Find the set of characters that are not present in the reachability of + * graph \p g after a certain depth (currently 8). If a character in this set + * is encountered, it means that the NFA is either dead or has not progressed * more than 8 characters from its start states. * * This is only used to guide merging heuristics, use * findLeftOffsetStopAlphabet for real uses. */ -CharReach findStopAlphabet(const NGHolder &g, som_type som); - -/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then - * build an eight-bit mask per character C, with each bit representing the - * depth before the location of character C (if encountered) that the NFA would - * be in a predictable start state. */ -std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som); -std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som); - -} // namespace ue2 - -#endif +CharReach findStopAlphabet(const NGHolder &g, som_type som); + +/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then + * build an eight-bit mask per character C, with each bit representing the + * depth before the location of character C (if encountered) that the NFA would + * be in a predictable start state. */ +std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som); +std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp index 4ad5ff7875..6c7259f717 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp @@ -1,73 +1,73 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph merging ("uncalc") - * - * The file contains our collection of NFA graph merging strategies. - * - * NFAGraph merging is generally guided by the length of the common prefix - * between NFAGraph pairs. - */ -#include "grey.h" -#include "ng_holder.h" -#include "ng_limex.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_uncalc_components.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph_range.h" -#include "util/ue2string.h" - -#include <algorithm> -#include <deque> -#include <map> -#include <queue> -#include <set> -#include <vector> - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph merging ("uncalc") + * + * The file contains our collection of NFA graph merging strategies. + * + * NFAGraph merging is generally guided by the length of the common prefix + * between NFAGraph pairs. + */ +#include "grey.h" +#include "ng_holder.h" +#include "ng_limex.h" +#include "ng_redundancy.h" +#include "ng_region.h" +#include "ng_uncalc_components.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph_range.h" +#include "util/ue2string.h" + +#include <algorithm> +#include <deque> +#include <map> +#include <queue> +#include <set> +#include <vector> + #include <boost/range/adaptor/map.hpp> -using namespace std; +using namespace std; using boost::adaptors::map_values; - -namespace ue2 { - -static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ - -/** Sentinel value meaning no component has yet been selected. */ + +namespace ue2 { + +static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ + +/** Sentinel value meaning no component has yet been selected. */ static const u32 NO_COMPONENT = ~0U; - + static const u32 UNUSED_STATE = ~0U; - + namespace { struct ranking_info { explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { @@ -77,15 +77,15 @@ struct ranking_info { for (NFAVertex v : to_vertex) { to_rank[v] = rank++; - } + } for (NFAVertex v : vertices_range(h)) { if (!contains(to_rank, v)) { to_rank[v] = UNUSED_STATE; } } - } - + } + NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } u32 get(NFAVertex v) const { return to_rank.at(v); } u32 size() const { return (u32)to_vertex.size(); } @@ -94,279 +94,279 @@ struct ranking_info { to_rank[v] = rank; to_vertex.push_back(v); return rank; - } - + } + private: vector<NFAVertex> to_vertex; unordered_map<NFAVertex, u32> to_rank; }; -} - -static never_inline -bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, - const NGHolder &gb, NFAVertex vb) { - // Must have the same reachability. - if (ga[va].char_reach != gb[vb].char_reach) { - return false; - } - - // If they're start vertices, they must be the same one. - if (is_any_start(va, ga) || is_any_start(vb, gb)) { - if (ga[va].index != gb[vb].index) { - return false; - } - } - - bool va_accept = edge(va, ga.accept, ga).second; - bool vb_accept = edge(vb, gb.accept, gb).second; - bool va_acceptEod = edge(va, ga.acceptEod, ga).second; - bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second; - - // Must have the same accept/acceptEod edges. - if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) { - return false; - } - - return true; -} - -static never_inline +} + +static never_inline +bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, + const NGHolder &gb, NFAVertex vb) { + // Must have the same reachability. + if (ga[va].char_reach != gb[vb].char_reach) { + return false; + } + + // If they're start vertices, they must be the same one. + if (is_any_start(va, ga) || is_any_start(vb, gb)) { + if (ga[va].index != gb[vb].index) { + return false; + } + } + + bool va_accept = edge(va, ga.accept, ga).second; + bool vb_accept = edge(vb, gb.accept, gb).second; + bool va_acceptEod = edge(va, ga.acceptEod, ga).second; + bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second; + + // Must have the same accept/acceptEod edges. + if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) { + return false; + } + + return true; +} + +static never_inline u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, const NGHolder &gb, const ranking_info &b_ranking) { u32 ml = min(a_ranking.size(), b_ranking.size()); - if (ml > 65535) { - ml = 65535; - } - - // Count the number of common vertices which share reachability, report and - // "startedness" properties. - u32 max = 0; - for (; max < ml; max++) { + if (ml > 65535) { + ml = 65535; + } + + // Count the number of common vertices which share reachability, report and + // "startedness" properties. + u32 max = 0; + for (; max < ml; max++) { if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { - break; - } - } - - return max; -} - + break; + } + } + + return max; +} + static u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, const NGHolder &gb, const ranking_info &b_ranking) { - /* upper bound on the common region based on local properties */ + /* upper bound on the common region based on local properties */ u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); - DEBUG_PRINTF("cpl upper bound %u\n", max); - - while (max > 0) { - /* shrink max region based on in-edges from outside the region */ - for (size_t j = max; j > 0; j--) { + DEBUG_PRINTF("cpl upper bound %u\n", max); + + while (max > 0) { + /* shrink max region based on in-edges from outside the region */ + for (size_t j = max; j > 0; j--) { NFAVertex a_v = a_ranking.at(j - 1); NFAVertex b_v = b_ranking.at(j - 1); for (auto u : inv_adjacent_vertices_range(a_v, ga)) { u32 state_id = a_ranking.get(u); if (state_id != UNUSED_STATE && state_id >= max) { - max = j - 1; - DEBUG_PRINTF("lowering max to %u\n", max); - goto next_vertex; - } - } - + max = j - 1; + DEBUG_PRINTF("lowering max to %u\n", max); + goto next_vertex; + } + } + for (auto u : inv_adjacent_vertices_range(b_v, gb)) { u32 state_id = b_ranking.get(u); if (state_id != UNUSED_STATE && state_id >= max) { - max = j - 1; - DEBUG_PRINTF("lowering max to %u\n", max); - goto next_vertex; - } - } - - next_vertex:; - } - - /* Ensure that every pair of vertices has same out-edges to vertices in - the region. */ + max = j - 1; + DEBUG_PRINTF("lowering max to %u\n", max); + goto next_vertex; + } + } + + next_vertex:; + } + + /* Ensure that every pair of vertices has same out-edges to vertices in + the region. */ for (size_t i = 0; i < max; i++) { - size_t a_count = 0; - size_t b_count = 0; - + size_t a_count = 0; + size_t b_count = 0; + for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { u32 sid = a_ranking.get(target(a_edge, ga)); if (sid == UNUSED_STATE || sid >= max) { - continue; - } - - a_count++; - + continue; + } + + a_count++; + NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); - + if (!b_edge) { - max = i; - DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", - max, i, sid); + max = i; + DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", + max, i, sid); goto try_smaller; - } - + } + if (ga[a_edge].tops != gb[b_edge].tops) { - max = i; + max = i; DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); goto try_smaller; - } - } - + } + } + for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { u32 sid = b_ranking.get(b_v); if (sid == UNUSED_STATE || sid >= max) { - continue; - } - - b_count++; - } - - if (a_count != b_count) { - max = i; + continue; + } + + b_count++; + } + + if (a_count != b_count) { + max = i; DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," " b_count=%zu)\n", max, a_count, b_count); goto try_smaller; - } - } - + } + } + DEBUG_PRINTF("survived checks, returning cpl %u\n", max); return max; try_smaller:; - } - - DEBUG_PRINTF("failed to find any common region\n"); - return 0; -} - + } + + DEBUG_PRINTF("failed to find any common region\n"); + return 0; +} + u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); } -static never_inline +static never_inline void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { assert(&dest != &vic); auto dest_info = ranking_info(dest); auto vic_info = ranking_info(vic); - map<NFAVertex, NFAVertex> vmap; // vic -> dest - - vmap[vic.start] = dest.start; - vmap[vic.startDs] = dest.startDs; - vmap[vic.accept] = dest.accept; - vmap[vic.acceptEod] = dest.acceptEod; + map<NFAVertex, NFAVertex> vmap; // vic -> dest + + vmap[vic.start] = dest.start; + vmap[vic.startDs] = dest.startDs; + vmap[vic.accept] = dest.accept; + vmap[vic.acceptEod] = dest.acceptEod; vmap[NGHolder::null_vertex()] = NGHolder::null_vertex(); - - // For vertices in the common len, add to vmap and merge in the reports, if - // any. - for (u32 i = 0; i < common_len; i++) { + + // For vertices in the common len, add to vmap and merge in the reports, if + // any. + for (u32 i = 0; i < common_len; i++) { NFAVertex v_old = vic_info.at(i); NFAVertex v = dest_info.at(i); - vmap[v_old] = v; - - const auto &reports = vic[v_old].reports; - dest[v].reports.insert(reports.begin(), reports.end()); - } - + vmap[v_old] = v; + + const auto &reports = vic[v_old].reports; + dest[v].reports.insert(reports.begin(), reports.end()); + } + // Add in vertices beyond the common len for (u32 i = common_len; i < vic_info.size(); i++) { NFAVertex v_old = vic_info.at(i); - - if (is_special(v_old, vic)) { - // Dest already has start vertices, just merge the reports. - u32 idx = vic[v_old].index; - NFAVertex v = dest.getSpecialVertex(idx); - const auto &reports = vic[v_old].reports; - dest[v].reports.insert(reports.begin(), reports.end()); - continue; - } - - NFAVertex v = add_vertex(vic[v_old], dest); + + if (is_special(v_old, vic)) { + // Dest already has start vertices, just merge the reports. + u32 idx = vic[v_old].index; + NFAVertex v = dest.getSpecialVertex(idx); + const auto &reports = vic[v_old].reports; + dest[v].reports.insert(reports.begin(), reports.end()); + continue; + } + + NFAVertex v = add_vertex(vic[v_old], dest); dest_info.add_to_tail(v); - vmap[v_old] = v; - } - - /* add edges */ - DEBUG_PRINTF("common_len=%zu\n", common_len); - for (const auto &e : edges_range(vic)) { + vmap[v_old] = v; + } + + /* add edges */ + DEBUG_PRINTF("common_len=%zu\n", common_len); + for (const auto &e : edges_range(vic)) { NFAVertex u_old = source(e, vic); NFAVertex v_old = target(e, vic); NFAVertex u = vmap[u_old]; NFAVertex v = vmap[v_old]; - bool uspecial = is_special(u, dest); - bool vspecial = is_special(v, dest); - - // Skip stylised edges that are already present. - if (uspecial && vspecial && edge(u, v, dest).second) { - continue; - } - - // We're in the common region if v's state ID is low enough, unless v - // is a special (an accept), in which case we use u's state ID. + bool uspecial = is_special(u, dest); + bool vspecial = is_special(v, dest); + + // Skip stylised edges that are already present. + if (uspecial && vspecial && edge(u, v, dest).second) { + continue; + } + + // We're in the common region if v's state ID is low enough, unless v + // is a special (an accept), in which case we use u's state ID. bool in_common_region = dest_info.get(v) < common_len; if (vspecial && dest_info.get(u) < common_len) { - in_common_region = true; - } - + in_common_region = true; + } + DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n", dest[u].index, dest_info.get(u), dest[v].index, dest_info.get(v), - in_common_region ? " [common]" : ""); - - if (in_common_region) { - if (!is_special(v, dest)) { - DEBUG_PRINTF("skipping common edge\n"); - assert(edge(u, v, dest).second); - // Should never merge edges with different top values. + in_common_region ? " [common]" : ""); + + if (in_common_region) { + if (!is_special(v, dest)) { + DEBUG_PRINTF("skipping common edge\n"); + assert(edge(u, v, dest).second); + // Should never merge edges with different top values. assert(vic[e].tops == dest[edge(u, v, dest)].tops); - continue; - } else { - assert(is_any_accept(v, dest)); - // If the edge exists in both graphs, skip it. - if (edge(u, v, dest).second) { - DEBUG_PRINTF("skipping common edge to accept\n"); - continue; - } - } - } - - assert(!edge(u, v, dest).second); - add_edge(u, v, vic[e], dest); - } - + continue; + } else { + assert(is_any_accept(v, dest)); + // If the edge exists in both graphs, skip it. + if (edge(u, v, dest).second) { + DEBUG_PRINTF("skipping common edge to accept\n"); + continue; + } + } + } + + assert(!edge(u, v, dest).second); + add_edge(u, v, vic[e], dest); + } + renumber_edges(dest); renumber_vertices(dest); -} - -namespace { -struct NfaMergeCandidateH { - NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in, - u32 tb_in) - : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {} - - size_t cpl; //!< common prefix length - NGHolder *first; //!< first component to merge - NGHolder *second; //!< second component to merge - u32 tie_breaker; //!< for determinism - - bool operator<(const NfaMergeCandidateH &other) const { - if (cpl != other.cpl) { - return cpl < other.cpl; - } else { - return tie_breaker < other.tie_breaker; - } - } -}; - -} // end namespace - -/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ -static +} + +namespace { +struct NfaMergeCandidateH { + NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in, + u32 tb_in) + : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {} + + size_t cpl; //!< common prefix length + NGHolder *first; //!< first component to merge + NGHolder *second; //!< second component to merge + u32 tie_breaker; //!< for determinism + + bool operator<(const NfaMergeCandidateH &other) const { + if (cpl != other.cpl) { + return cpl < other.cpl; + } else { + return tie_breaker < other.tie_breaker; + } + } +}; + +} // end namespace + +/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ +static bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, const ReportManager *rm, const CompileContext &cc) { size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; - + combinedStateCount -= 2 * 2; /* discount accepts from both */ if (is_triggered(ha)) { @@ -377,130 +377,130 @@ bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, combinedStateCount += tops.size(); } - if (combinedStateCount > FAST_STATE_LIMIT) { - // More complex implementability check. - NGHolder h_temp; - cloneHolder(h_temp, ha); - assert(h_temp.kind == hb.kind); - mergeNfaComponent(h_temp, hb, cpl); - reduceImplementableGraph(h_temp, SOM_NONE, rm, cc); - u32 numStates = isImplementableNFA(h_temp, rm, cc); - DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates); - if (!numStates) { - DEBUG_PRINTF("not implementable\n"); - return false; - } else if (numStates > FAST_STATE_LIMIT) { - DEBUG_PRINTF("too many states to merge\n"); - return false; - } - } - - return true; -} - -/** Returns true if the graph has start vertices that are compatible for - * merging. Rose may generate all sorts of wacky vacuous cases, and the merge - * code isn't currently up to handling them. */ -static -bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) { - // Start and startDs must have the same self-loops. - return (edge(ga.startDs, ga.startDs, ga).second == - edge(gb.startDs, gb.startDs, gb).second) && - (edge(ga.start, ga.start, ga).second == - edge(gb.start, gb.start, gb).second); -} - -static never_inline -void buildNfaMergeQueue(const vector<NGHolder *> &cluster, - priority_queue<NfaMergeCandidateH> *pq) { - const size_t cs = cluster.size(); - assert(cs < NO_COMPONENT); - - // First, make sure all holders have numbered states and collect their - // counts. + if (combinedStateCount > FAST_STATE_LIMIT) { + // More complex implementability check. + NGHolder h_temp; + cloneHolder(h_temp, ha); + assert(h_temp.kind == hb.kind); + mergeNfaComponent(h_temp, hb, cpl); + reduceImplementableGraph(h_temp, SOM_NONE, rm, cc); + u32 numStates = isImplementableNFA(h_temp, rm, cc); + DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates); + if (!numStates) { + DEBUG_PRINTF("not implementable\n"); + return false; + } else if (numStates > FAST_STATE_LIMIT) { + DEBUG_PRINTF("too many states to merge\n"); + return false; + } + } + + return true; +} + +/** Returns true if the graph has start vertices that are compatible for + * merging. Rose may generate all sorts of wacky vacuous cases, and the merge + * code isn't currently up to handling them. */ +static +bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) { + // Start and startDs must have the same self-loops. + return (edge(ga.startDs, ga.startDs, ga).second == + edge(gb.startDs, gb.startDs, gb).second) && + (edge(ga.start, ga.start, ga).second == + edge(gb.start, gb.start, gb).second); +} + +static never_inline +void buildNfaMergeQueue(const vector<NGHolder *> &cluster, + priority_queue<NfaMergeCandidateH> *pq) { + const size_t cs = cluster.size(); + assert(cs < NO_COMPONENT); + + // First, make sure all holders have numbered states and collect their + // counts. vector<ranking_info> states_map; states_map.reserve(cs); - for (size_t i = 0; i < cs; i++) { - assert(cluster[i]); + for (size_t i = 0; i < cs; i++) { + assert(cluster[i]); assert(states_map.size() == i); const NGHolder &g = *(cluster[i]); states_map.emplace_back(g); - } - - vector<u16> seen_cpl(cs * cs, 0); - vector<u32> best_comp(cs, NO_COMPONENT); - - /* TODO: understand, explain */ - for (u32 ci = 0; ci < cs; ci++) { - for (u32 cj = ci + 1; cj < cs; cj++) { - u16 cpl = 0; - bool calc = false; - - if (best_comp[ci] != NO_COMPONENT) { - u32 bc = best_comp[ci]; - if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) { - cpl = seen_cpl[bc + cs * cj]; - DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl); - calc = true; - } - } - - if (!calc && best_comp[cj] != NO_COMPONENT) { - u32 bc = best_comp[cj]; - if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) { - cpl = seen_cpl[bc + cs * ci]; - DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl); - calc = true; - } - } - - NGHolder &g_i = *(cluster[ci]); - NGHolder &g_j = *(cluster[cj]); - - if (!compatibleStarts(g_i, g_j)) { - continue; - } - - if (!calc) { - cpl = commonPrefixLength(g_i, states_map[ci], - g_j, states_map[cj]); - } - - seen_cpl[ci + cs * cj] = cpl; - seen_cpl[cj + cs * ci] = cpl; - - if (best_comp[cj] == NO_COMPONENT - || seen_cpl[best_comp[cj] + cs * cj] < cpl) { - best_comp[cj] = ci; - } - - DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl); - - pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj], - ci * cs + cj)); - } - } -} - + } + + vector<u16> seen_cpl(cs * cs, 0); + vector<u32> best_comp(cs, NO_COMPONENT); + + /* TODO: understand, explain */ + for (u32 ci = 0; ci < cs; ci++) { + for (u32 cj = ci + 1; cj < cs; cj++) { + u16 cpl = 0; + bool calc = false; + + if (best_comp[ci] != NO_COMPONENT) { + u32 bc = best_comp[ci]; + if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) { + cpl = seen_cpl[bc + cs * cj]; + DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl); + calc = true; + } + } + + if (!calc && best_comp[cj] != NO_COMPONENT) { + u32 bc = best_comp[cj]; + if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) { + cpl = seen_cpl[bc + cs * ci]; + DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl); + calc = true; + } + } + + NGHolder &g_i = *(cluster[ci]); + NGHolder &g_j = *(cluster[cj]); + + if (!compatibleStarts(g_i, g_j)) { + continue; + } + + if (!calc) { + cpl = commonPrefixLength(g_i, states_map[ci], + g_j, states_map[cj]); + } + + seen_cpl[ci + cs * cj] = cpl; + seen_cpl[cj + cs * ci] = cpl; + + if (best_comp[cj] == NO_COMPONENT + || seen_cpl[best_comp[cj] + cs * cj] < cpl) { + best_comp[cj] = ci; + } + + DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl); + + pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj], + ci * cs + cj)); + } + } +} + /** * True if the graphs have mergeable starts. * * Nowadays, this means that any vacuous edges must have the same tops. In * addition, mixed-accept cases need to have matching reports. */ -static -bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { +static +bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { if (!isVacuous(h1) || !isVacuous(h2)) { return true; } - + // Vacuous edges from startDs should not occur: we have better ways to // implement true dot-star relationships. Just in case they do, ban them // from being merged unless they have identical reports. if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) { assert(0); return false; - } + } /* TODO: relax top checks if reports match */ @@ -509,88 +509,88 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { NFAEdge e2_accept = edge(h2.start, h2.accept, h2); if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { return false; - } - + } + // If both graphs have edge (start, acceptEod), the tops must match. NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { return false; } - + // If one graph has an edge to accept and the other has an edge to // acceptEod, the reports must match for the merge to be safe. if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) { if (h1[h1.start].reports != h2[h2.start].reports) { - return false; - } - } - - return true; -} - -/** Merge graph \p ga into graph \p gb. Returns false on failure. */ + return false; + } + } + + return true; +} + +/** Merge graph \p ga into graph \p gb. Returns false on failure. */ bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, - const CompileContext &cc) { - assert(ga.kind == gb.kind); - + const CompileContext &cc) { + assert(ga.kind == gb.kind); + // Vacuous NFAs require special checks on their starts to ensure that tops // match, and that reports match for mixed-accept cases. - if (!mergeableStarts(ga, gb)) { - DEBUG_PRINTF("starts aren't mergeable\n"); - return false; - } - + if (!mergeableStarts(ga, gb)) { + DEBUG_PRINTF("starts aren't mergeable\n"); + return false; + } + u32 cpl = commonPrefixLength(ga, gb); if (!shouldMerge(gb, ga, cpl, rm, cc)) { - return false; - } - - mergeNfaComponent(gb, ga, cpl); - reduceImplementableGraph(gb, SOM_NONE, rm, cc); - return true; -} - + return false; + } + + mergeNfaComponent(gb, ga, cpl); + reduceImplementableGraph(gb, SOM_NONE, rm, cc); + return true; +} + map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster, const ReportManager *rm, const CompileContext &cc) { map<NGHolder *, NGHolder *> merged; - if (cluster.size() < 2) { + if (cluster.size() < 2) { return merged; - } - - DEBUG_PRINTF("new cluster, size %zu\n", cluster.size()); - - priority_queue<NfaMergeCandidateH> pq; - buildNfaMergeQueue(cluster, &pq); - - while (!pq.empty()) { - NGHolder &pholder = *pq.top().first; - NGHolder &vholder = *pq.top().second; - pq.pop(); - - if (contains(merged, &pholder) || contains(merged, &vholder)) { - DEBUG_PRINTF("dead\n"); - continue; - } - - if (!mergeNfaPair(vholder, pholder, rm, cc)) { - DEBUG_PRINTF("merge failed\n"); - continue; - } - - merged.emplace(&vholder, &pholder); - - // Seek closure. - for (auto &m : merged) { - if (m.second == &vholder) { - m.second = &pholder; - } - } - } + } + + DEBUG_PRINTF("new cluster, size %zu\n", cluster.size()); + + priority_queue<NfaMergeCandidateH> pq; + buildNfaMergeQueue(cluster, &pq); + + while (!pq.empty()) { + NGHolder &pholder = *pq.top().first; + NGHolder &vholder = *pq.top().second; + pq.pop(); + + if (contains(merged, &pholder) || contains(merged, &vholder)) { + DEBUG_PRINTF("dead\n"); + continue; + } + + if (!mergeNfaPair(vholder, pholder, rm, cc)) { + DEBUG_PRINTF("merge failed\n"); + continue; + } + + merged.emplace(&vholder, &pholder); + + // Seek closure. + for (auto &m : merged) { + if (m.second == &vholder) { + m.second = &pholder; + } + } + } return merged; -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h index b0f42670a3..57bb242289 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h @@ -1,74 +1,74 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA graph merging ("uncalc") - */ - -#ifndef NG_UNCALC_COMPONENTS_H -#define NG_UNCALC_COMPONENTS_H - -#include <map> -#include <vector> - -namespace ue2 { - -struct CompileContext; -struct Grey; -class NGHolder; -class ReportManager; - -/** - * \brief Returns the common prefix length for a pair of graphs. - * - * The CPL is calculated based the topological ordering given by the state - * indices for each graph. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA graph merging ("uncalc") + */ + +#ifndef NG_UNCALC_COMPONENTS_H +#define NG_UNCALC_COMPONENTS_H + +#include <map> +#include <vector> + +namespace ue2 { + +struct CompileContext; +struct Grey; +class NGHolder; +class ReportManager; + +/** + * \brief Returns the common prefix length for a pair of graphs. + * + * The CPL is calculated based the topological ordering given by the state + * indices for each graph. + */ u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); - -/** - * \brief Merge the group of graphs in \p cluster where possible. - * + +/** + * \brief Merge the group of graphs in \p cluster where possible. + * * The (from, to) mapping of merged graphs is returned. - */ + */ std::map<NGHolder *, NGHolder *> mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm, const CompileContext &cc); - -/** - * \brief Merge graph \p ga into graph \p gb. - * - * Returns false on failure. On success, \p gb is reduced via \ref - * reduceImplementableGraph and renumbered. - */ + +/** + * \brief Merge graph \p ga into graph \p gb. + * + * Returns false on failure. On success, \p gb is reduced via \ref + * reduceImplementableGraph and renumbered. + */ bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, - const CompileContext &cc); - -} // namespace ue2 - -#endif + const CompileContext &cc); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp index 89500fe39e..a9afaa304d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp @@ -1,303 +1,303 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief UTF-8 transforms and operations. - */ -#include "ng_utf8.h" - -#include "ng.h" -#include "ng_prune.h" -#include "ng_util.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief UTF-8 transforms and operations. + */ +#include "ng_utf8.h" + +#include "ng.h" +#include "ng_prune.h" +#include "ng_util.h" #include "compiler/compiler.h" -#include "util/graph_range.h" -#include "util/unicode_def.h" - -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -static +#include "util/graph_range.h" +#include "util/unicode_def.h" + +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +static void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { if (in_degree(v, g) != 1) { - DEBUG_PRINTF("unexpected pred\n"); - assert(0); /* should be true due to the early stage of this analysis */ - return; - } - + DEBUG_PRINTF("unexpected pred\n"); + assert(0); /* should be true due to the early stage of this analysis */ + return; + } + CharReach &cr = g[v].char_reach; - if (pred_char == 0xe0) { - assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); - if (cr == CharReach(0xa0, 0xbf)) { - cr |= CharReach(0x80, 0x9f); - } - } else if (pred_char == 0xf0) { - assert(cr.isSubsetOf(CharReach(0x90, 0xbf))); - if (cr == CharReach(0x90, 0xbf)) { - cr |= CharReach(0x80, 0x8f); - } - } else if (pred_char == 0xf4) { - assert(cr.isSubsetOf(CharReach(0x80, 0x8f))); - if (cr == CharReach(0x80, 0x8f)) { - cr |= CharReach(0x90, 0xbf); - } - } else { - assert(0); /* unexpected pred */ - } -} - -/** \brief Relax forbidden UTF-8 sequences. - * - * Some byte sequences can not appear in valid UTF-8 as they encode code points - * above \\x{10ffff} or they represent overlong encodings. As we require valid - * UTF-8 input, we have no defined behaviour in these cases, as a result we can - * accept them if it simplifies the graph. */ + if (pred_char == 0xe0) { + assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); + if (cr == CharReach(0xa0, 0xbf)) { + cr |= CharReach(0x80, 0x9f); + } + } else if (pred_char == 0xf0) { + assert(cr.isSubsetOf(CharReach(0x90, 0xbf))); + if (cr == CharReach(0x90, 0xbf)) { + cr |= CharReach(0x80, 0x8f); + } + } else if (pred_char == 0xf4) { + assert(cr.isSubsetOf(CharReach(0x80, 0x8f))); + if (cr == CharReach(0x80, 0x8f)) { + cr |= CharReach(0x90, 0xbf); + } + } else { + assert(0); /* unexpected pred */ + } +} + +/** \brief Relax forbidden UTF-8 sequences. + * + * Some byte sequences can not appear in valid UTF-8 as they encode code points + * above \\x{10ffff} or they represent overlong encodings. As we require valid + * UTF-8 input, we have no defined behaviour in these cases, as a result we can + * accept them if it simplifies the graph. */ void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { if (!expr.utf8) { - return; - } - - const CharReach e0(0xe0); - const CharReach f0(0xf0); - const CharReach f4(0xf4); - + return; + } + + const CharReach e0(0xe0); + const CharReach f0(0xf0); + const CharReach f4(0xf4); + for (auto v : vertices_range(g)) { const CharReach &cr = g[v].char_reach; - if (cr == e0 || cr == f0 || cr == f4) { - u8 pred_char = cr.find_first(); + if (cr == e0 || cr == f0 || cr == f4) { + u8 pred_char = cr.find_first(); for (auto t : adjacent_vertices_range(v, g)) { allowIllegal(g, t, pred_char); - } - } - } -} - -static -bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (contains(s, u)) { - return true; - } - } - return false; -} - -static -bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) { - for (auto w : adjacent_vertices_range(v, g)) { - if (contains(s, w)) { - return true; - } - } - return false; -} - -static -void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) { - set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */ - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - const CharReach &cr = h[v].char_reach; - if (!isutf8ascii(cr) && !isutf8start(cr)) { - bad.insert(v); - } - } - - for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { - const CharReach &cr = h[v].char_reach; - if (!isutf8ascii(cr) && !isutf8start(cr)) { - bad.insert(v); - } - } - - // we want to be careful with asserts connected to starts - // as well as they may not finish a code point - for (auto v : vertices_range(h)) { - if (is_virtual_start(v, h)) { - bad.insert(v); - insert(&bad, adjacent_vertices(v, h)); - } - } - - /* we cannot handle vertices connected to accept as would report matches in - * the middle of codepoints. acceptEod is not a problem as the input must - * end at a codepoint boundary */ - bad.insert(h.accept); - - // If we're in SOM mode, we don't want to mess with vertices that have a - // direct edge from startDs. - if (som) { - insert(&bad, adjacent_vertices(h.startDs, h)); - } - - set<NFAVertex> already_seeds; /* already marked as seeds */ - for (auto v : vertices_range(h)) { - const CharReach &cr = h[v].char_reach; - - if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) { - continue; - } - - if (hasSuccInSet(h, v, bad)) { - continue; - } - - // Skip vertices that are directly connected to other vertices already - // in the seeds list: we can't collapse two of these directly next to - // each other. - if (hasPredInSet(h, v, already_seeds) || - hasSuccInSet(h, v, already_seeds)) { - continue; - } - + } + } + } +} + +static +bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (contains(s, u)) { + return true; + } + } + return false; +} + +static +bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) { + for (auto w : adjacent_vertices_range(v, g)) { + if (contains(s, w)) { + return true; + } + } + return false; +} + +static +void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) { + set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */ + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + const CharReach &cr = h[v].char_reach; + if (!isutf8ascii(cr) && !isutf8start(cr)) { + bad.insert(v); + } + } + + for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { + const CharReach &cr = h[v].char_reach; + if (!isutf8ascii(cr) && !isutf8start(cr)) { + bad.insert(v); + } + } + + // we want to be careful with asserts connected to starts + // as well as they may not finish a code point + for (auto v : vertices_range(h)) { + if (is_virtual_start(v, h)) { + bad.insert(v); + insert(&bad, adjacent_vertices(v, h)); + } + } + + /* we cannot handle vertices connected to accept as would report matches in + * the middle of codepoints. acceptEod is not a problem as the input must + * end at a codepoint boundary */ + bad.insert(h.accept); + + // If we're in SOM mode, we don't want to mess with vertices that have a + // direct edge from startDs. + if (som) { + insert(&bad, adjacent_vertices(h.startDs, h)); + } + + set<NFAVertex> already_seeds; /* already marked as seeds */ + for (auto v : vertices_range(h)) { + const CharReach &cr = h[v].char_reach; + + if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) { + continue; + } + + if (hasSuccInSet(h, v, bad)) { + continue; + } + + // Skip vertices that are directly connected to other vertices already + // in the seeds list: we can't collapse two of these directly next to + // each other. + if (hasPredInSet(h, v, already_seeds) || + hasSuccInSet(h, v, already_seeds)) { + continue; + } + DEBUG_PRINTF("%zu is a seed\n", h[v].index); - seeds->push_back(v); - already_seeds.insert(v); - } -} - -static -bool expandCyclic(NGHolder &h, NFAVertex v) { + seeds->push_back(v); + already_seeds.insert(v); + } +} + +static +bool expandCyclic(NGHolder &h, NFAVertex v) { DEBUG_PRINTF("inspecting %zu\n", h[v].index); - bool changes = false; - + bool changes = false; + auto v_preds = preds(v, h); auto v_succs = succs(v, h); - set<NFAVertex> start_siblings; - set<NFAVertex> end_siblings; - - CharReach &v_cr = h[v].char_reach; - - /* We need to find start vertices which have all of our preds. - * As we have a self loop, it must be one of our succs. */ - for (auto a : adjacent_vertices_range(v, h)) { + set<NFAVertex> start_siblings; + set<NFAVertex> end_siblings; + + CharReach &v_cr = h[v].char_reach; + + /* We need to find start vertices which have all of our preds. + * As we have a self loop, it must be one of our succs. */ + for (auto a : adjacent_vertices_range(v, h)) { auto a_preds = preds(a, h); - - if (a_preds == v_preds && isutf8start(h[a].char_reach)) { + + if (a_preds == v_preds && isutf8start(h[a].char_reach)) { DEBUG_PRINTF("%zu is a start v\n", h[a].index); - start_siblings.insert(a); - } - } - - /* We also need to find full cont vertices which have all our own succs; - * As we have a self loop, it must be one of our preds. */ - for (auto a : inv_adjacent_vertices_range(v, h)) { + start_siblings.insert(a); + } + } + + /* We also need to find full cont vertices which have all our own succs; + * As we have a self loop, it must be one of our preds. */ + for (auto a : inv_adjacent_vertices_range(v, h)) { auto a_succs = succs(a, h); - - if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { + + if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); - end_siblings.insert(a); - } - } - - for (auto s : start_siblings) { - if (out_degree(s, h) != 1) { - continue; - } - - const CharReach &cr = h[s].char_reach; - if (cr.isSubsetOf(UTF_TWO_START_CR)) { - if (end_siblings.find(*adjacent_vertices(s, h).first) - == end_siblings.end()) { + end_siblings.insert(a); + } + } + + for (auto s : start_siblings) { + if (out_degree(s, h) != 1) { + continue; + } + + const CharReach &cr = h[s].char_reach; + if (cr.isSubsetOf(UTF_TWO_START_CR)) { + if (end_siblings.find(*adjacent_vertices(s, h).first) + == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); - continue; - } - } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { - NFAVertex m = *adjacent_vertices(s, h).first; - - if (h[m].char_reach != UTF_CONT_CR - || out_degree(m, h) != 1) { - continue; - } - if (end_siblings.find(*adjacent_vertices(m, h).first) - == end_siblings.end()) { + continue; + } + } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { + NFAVertex m = *adjacent_vertices(s, h).first; + + if (h[m].char_reach != UTF_CONT_CR + || out_degree(m, h) != 1) { + continue; + } + if (end_siblings.find(*adjacent_vertices(m, h).first) + == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); - continue; - } - } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { - NFAVertex m1 = *adjacent_vertices(s, h).first; - - if (h[m1].char_reach != UTF_CONT_CR - || out_degree(m1, h) != 1) { - continue; - } - - NFAVertex m2 = *adjacent_vertices(m1, h).first; - - if (h[m2].char_reach != UTF_CONT_CR - || out_degree(m2, h) != 1) { - continue; - } - - if (end_siblings.find(*adjacent_vertices(m2, h).first) - == end_siblings.end()) { + continue; + } + } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { + NFAVertex m1 = *adjacent_vertices(s, h).first; + + if (h[m1].char_reach != UTF_CONT_CR + || out_degree(m1, h) != 1) { + continue; + } + + NFAVertex m2 = *adjacent_vertices(m1, h).first; + + if (h[m2].char_reach != UTF_CONT_CR + || out_degree(m2, h) != 1) { + continue; + } + + if (end_siblings.find(*adjacent_vertices(m2, h).first) + == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); - continue; - } - } else { + continue; + } + } else { DEBUG_PRINTF("%zu is bad\n", h[s].index); - continue; - } - - v_cr |= cr; - clear_vertex(s, h); - changes = true; - } - - if (changes) { - v_cr |= UTF_CONT_CR; /* we need to add in cont reach */ - v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require - * valid unicode data */ - v_cr.set(0xc1); - v_cr |= CharReach(0xf5, 0xff); - } - - return changes; -} - -/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex - * where possible, based on the assumption that we will always be matching - * against well-formed input. */ -void utf8DotRestoration(NGHolder &h, bool som) { - vector<NFAVertex> seeds; /* cyclic ascii vertices */ - findSeeds(h, som, &seeds); - - bool changes = false; - for (auto v : seeds) { - changes |= expandCyclic(h, v); - } - - if (changes) { - pruneUseless(h); - } -} - -} // namespace ue2 + continue; + } + + v_cr |= cr; + clear_vertex(s, h); + changes = true; + } + + if (changes) { + v_cr |= UTF_CONT_CR; /* we need to add in cont reach */ + v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require + * valid unicode data */ + v_cr.set(0xc1); + v_cr |= CharReach(0xf5, 0xff); + } + + return changes; +} + +/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex + * where possible, based on the assumption that we will always be matching + * against well-formed input. */ +void utf8DotRestoration(NGHolder &h, bool som) { + vector<NFAVertex> seeds; /* cyclic ascii vertices */ + findSeeds(h, som, &seeds); + + bool changes = false; + for (auto v : seeds) { + changes |= expandCyclic(h, v); + } + + if (changes) { + pruneUseless(h); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h index 7c4288336f..0300088039 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h @@ -1,57 +1,57 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief UTF-8 transforms and operations. - */ - -#ifndef NG_UTF8_H -#define NG_UTF8_H - -namespace ue2 { - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief UTF-8 transforms and operations. + */ + +#ifndef NG_UTF8_H +#define NG_UTF8_H + +namespace ue2 { + class ExpressionInfo; -class NGHolder; - -/** \brief Relax forbidden UTF-8 sequences. - * - * Some byte sequences can not appear in valid UTF-8 as they encode code points - * above \\x{10ffff} or they represent overlong encodings. As we require valid - * UTF-8 input, we have no defined behaviour in these cases, as a result we can - * accept them if it simplifies the graph. */ +class NGHolder; + +/** \brief Relax forbidden UTF-8 sequences. + * + * Some byte sequences can not appear in valid UTF-8 as they encode code points + * above \\x{10ffff} or they represent overlong encodings. As we require valid + * UTF-8 input, we have no defined behaviour in these cases, as a result we can + * accept them if it simplifies the graph. */ void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); - -/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex - * where possible, based on the assumption that we will always be matching - * against well-formed input. - */ -void utf8DotRestoration(NGHolder &h, bool som); - -} // namespace ue2 - -#endif + +/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex + * where possible, based on the assumption that we will always be matching + * against well-formed input. + */ +void utf8DotRestoration(NGHolder &h, bool som); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp index cb2b710358..630193b19b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp @@ -1,191 +1,191 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Miscellaneous NFA graph utilities. - */ -#include "ng_util.h" - -#include "grey.h" -#include "ng_dump.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Miscellaneous NFA graph utilities. + */ +#include "ng_util.h" + +#include "grey.h" +#include "ng_dump.h" #include "ng_prune.h" -#include "ue2common.h" -#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. -#include "parser/position.h" -#include "util/graph_range.h" +#include "ue2common.h" +#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. +#include "parser/position.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/ue2string.h" -#include "util/report_manager.h" - +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/ue2string.h" +#include "util/report_manager.h" + #include <limits> -#include <map> -#include <set> +#include <map> +#include <set> #include <unordered_map> #include <unordered_set> -#include <boost/graph/filtered_graph.hpp> -#include <boost/graph/topological_sort.hpp> -#include <boost/range/adaptor/map.hpp> - -using namespace std; +#include <boost/graph/filtered_graph.hpp> +#include <boost/graph/topological_sort.hpp> +#include <boost/range/adaptor/map.hpp> + +using namespace std; using boost::make_filtered_graph; -using boost::make_assoc_property_map; - -namespace ue2 { - -NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { +using boost::make_assoc_property_map; + +namespace ue2 { + +NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { assert(a != NGHolder::null_vertex()); - + NGHolder::out_edge_iterator ii, iie; - tie(ii, iie) = out_edges(a, g); - if (ii == iie) { + tie(ii, iie) = out_edges(a, g); + if (ii == iie) { return NGHolder::null_vertex(); - } - NFAVertex b = target(*ii, g); - if (a == b) { - ++ii; - if (ii == iie) { + } + NFAVertex b = target(*ii, g); + if (a == b) { + ++ii; + if (ii == iie) { return NGHolder::null_vertex(); - } - - b = target(*ii, g); - if (++ii != iie) { + } + + b = target(*ii, g); + if (++ii != iie) { return NGHolder::null_vertex(); - } - } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) { + } + } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) { return NGHolder::null_vertex(); - } - - assert(a != b); - return b; -} - -NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) { + } + + assert(a != b); + return b; +} + +NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) { assert(a != NGHolder::null_vertex()); - - u32 idegree = in_degree(a, g); - if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) { + + u32 idegree = in_degree(a, g); + if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) { return NGHolder::null_vertex(); - } - + } + NGHolder::in_edge_iterator ii, iie; - tie(ii, iie) = in_edges(a, g); - if (ii == iie) { + tie(ii, iie) = in_edges(a, g); + if (ii == iie) { return NGHolder::null_vertex(); - } - NFAVertex b = source(*ii, g); - if (a == b) { - ++ii; - if (ii == iie) { + } + NFAVertex b = source(*ii, g); + if (a == b) { + ++ii; + if (ii == iie) { return NGHolder::null_vertex(); - } - - b = source(*ii, g); - } - - assert(a != b); - return b; -} - -NFAVertex clone_vertex(NGHolder &g, NFAVertex v) { - NFAVertex clone = add_vertex(g); - u32 idx = g[clone].index; - g[clone] = g[v]; - g[clone].index = idx; - - return clone; -} - -void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { - for (const auto &e : out_edges_range(source, g)) { - NFAVertex t = target(e, g); - if (edge(dest, t, g).second) { - continue; - } + } + + b = source(*ii, g); + } + + assert(a != b); + return b; +} + +NFAVertex clone_vertex(NGHolder &g, NFAVertex v) { + NFAVertex clone = add_vertex(g); + u32 idx = g[clone].index; + g[clone] = g[v]; + g[clone].index = idx; + + return clone; +} + +void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { + for (const auto &e : out_edges_range(source, g)) { + NFAVertex t = target(e, g); + if (edge(dest, t, g).second) { + continue; + } NFAEdge clone = add_edge(dest, t, g); - u32 idx = g[clone].index; - g[clone] = g[e]; - g[clone].index = idx; - } -} - -void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { - for (const auto &e : in_edges_range(s, g)) { - NFAVertex ss = source(e, g); - assert(!edge(ss, dest, g).second); + u32 idx = g[clone].index; + g[clone] = g[e]; + g[clone].index = idx; + } +} + +void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { + for (const auto &e : in_edges_range(s, g)) { + NFAVertex ss = source(e, g); + assert(!edge(ss, dest, g).second); NFAEdge clone = add_edge(ss, dest, g); - u32 idx = g[clone].index; - g[clone] = g[e]; - g[clone].index = idx; - } -} - -bool onlyOneTop(const NGHolder &g) { + u32 idx = g[clone].index; + g[clone] = g[e]; + g[clone].index = idx; + } +} + +bool onlyOneTop(const NGHolder &g) { return getTops(g).size() == 1; -} - -namespace { -struct CycleFound {}; -struct DetectCycles : public boost::default_dfs_visitor { - explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} +} + +namespace { +struct CycleFound {}; +struct DetectCycles : public boost::default_dfs_visitor { + explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} void back_edge(const NFAEdge &e, const NGHolder &g) const { - NFAVertex u = source(e, g), v = target(e, g); - // We ignore the startDs self-loop. - if (u == startDs && v == startDs) { - return; - } - // Any other back-edge indicates a cycle. + NFAVertex u = source(e, g), v = target(e, g); + // We ignore the startDs self-loop. + if (u == startDs && v == startDs) { + return; + } + // Any other back-edge indicates a cycle. DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index); - throw CycleFound(); - } -private: - const NFAVertex startDs; -}; -} // namespace - -bool isVacuous(const NGHolder &h) { - return edge(h.start, h.accept, h).second - || edge(h.start, h.acceptEod, h).second - || edge(h.startDs, h.accept, h).second - || edge(h.startDs, h.acceptEod, h).second; -} - -bool isAnchored(const NGHolder &g) { - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (v != g.startDs) { - return false; - } - } - return true; -} - + throw CycleFound(); + } +private: + const NFAVertex startDs; +}; +} // namespace + +bool isVacuous(const NGHolder &h) { + return edge(h.start, h.accept, h).second + || edge(h.start, h.acceptEod, h).second + || edge(h.startDs, h.accept, h).second + || edge(h.startDs, h.acceptEod, h).second; +} + +bool isAnchored(const NGHolder &g) { + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (v != g.startDs) { + return false; + } + } + return true; +} + bool isFloating(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (v != g.startDs && !edge(g.startDs, v, g).second) { @@ -195,99 +195,99 @@ bool isFloating(const NGHolder &g) { return true; } -bool isAcyclic(const NGHolder &g) { - try { +bool isAcyclic(const NGHolder &g) { + try { boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g), g.start); - } catch (const CycleFound &) { - return false; - } - - return true; -} - -/** True if the graph has a cycle reachable from the given source vertex. */ -bool hasReachableCycle(const NGHolder &g, NFAVertex src) { - assert(hasCorrectlyNumberedVertices(g)); - - try { - // Use depth_first_visit, rather than depth_first_search, so that we - // only search from src. + } catch (const CycleFound &) { + return false; + } + + return true; +} + +/** True if the graph has a cycle reachable from the given source vertex. */ +bool hasReachableCycle(const NGHolder &g, NFAVertex src) { + assert(hasCorrectlyNumberedVertices(g)); + + try { + // Use depth_first_visit, rather than depth_first_search, so that we + // only search from src. boost::depth_first_visit(g, src, DetectCycles(g), make_small_color_map(g)); } catch (const CycleFound &) { - return true; - } - - return false; -} - -bool hasBigCycles(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - set<NFAEdge> dead; - BackEdges<set<NFAEdge>> backEdgeVisitor(dead); + return true; + } + + return false; +} + +bool hasBigCycles(const NGHolder &g) { + assert(hasCorrectlyNumberedVertices(g)); + set<NFAEdge> dead; + BackEdges<set<NFAEdge>> backEdgeVisitor(dead); boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g), g.start); - - for (const auto &e : dead) { - if (source(e, g) != target(e, g)) { - return true; - } - } - - return false; -} - + + for (const auto &e : dead) { + if (source(e, g) != target(e, g)) { + return true; + } + } + + return false; +} + bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) { return any_of_in(vertices_range(g), [&](NFAVertex v) { return !is_special(v, g) && g[v].char_reach.count() < max_reach_count; }); -} - -bool can_never_match(const NGHolder &g) { - assert(edge(g.accept, g.acceptEod, g).second); +} + +bool can_never_match(const NGHolder &g) { + assert(edge(g.accept, g.acceptEod, g).second); if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { - DEBUG_PRINTF("no paths into accept\n"); - return true; - } - - return false; -} - -bool can_match_at_eod(const NGHolder &h) { + DEBUG_PRINTF("no paths into accept\n"); + return true; + } + + return false; +} + +bool can_match_at_eod(const NGHolder &h) { if (in_degree(h.acceptEod, h) > 1) { - DEBUG_PRINTF("more than one edge to acceptEod\n"); - return true; - } - - for (auto e : in_edges_range(h.accept, h)) { - if (h[e].assert_flags) { - DEBUG_PRINTF("edge to accept has assert flags %d\n", - h[e].assert_flags); - return true; - } - } - - return false; -} - -bool can_only_match_at_eod(const NGHolder &g) { + DEBUG_PRINTF("more than one edge to acceptEod\n"); + return true; + } + + for (auto e : in_edges_range(h.accept, h)) { + if (h[e].assert_flags) { + DEBUG_PRINTF("edge to accept has assert flags %d\n", + h[e].assert_flags); + return true; + } + } + + return false; +} + +bool can_only_match_at_eod(const NGHolder &g) { NGHolder::in_edge_iterator ie, ee; - tie(ie, ee) = in_edges(g.accept, g); - - return ie == ee; -} - -bool matches_everywhere(const NGHolder &h) { + tie(ie, ee) = in_edges(g.accept, g); + + return ie == ee; +} + +bool matches_everywhere(const NGHolder &h) { NFAEdge e = edge(h.startDs, h.accept, h); - + return e && !h[e].assert_flags; -} - -bool is_virtual_start(NFAVertex v, const NGHolder &g) { - return g[v].assert_flags & POS_FLAG_VIRTUAL_START; -} - +} + +bool is_virtual_start(NFAVertex v, const NGHolder &g) { + return g[v].assert_flags & POS_FLAG_VIRTUAL_START; +} + static void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) { // Start is last element of reverse topo ordering. @@ -329,110 +329,110 @@ void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) { } } -vector<NFAVertex> getTopoOrdering(const NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - - // Use the same colour map for both DFS and topological_sort below: avoids - // having to reallocate it, etc. +vector<NFAVertex> getTopoOrdering(const NGHolder &g) { + assert(hasCorrectlyNumberedVertices(g)); + + // Use the same colour map for both DFS and topological_sort below: avoids + // having to reallocate it, etc. auto colors = make_small_color_map(g); - + using EdgeSet = unordered_set<NFAEdge>; - EdgeSet backEdges; - BackEdges<EdgeSet> be(backEdges); - + EdgeSet backEdges; + BackEdges<EdgeSet> be(backEdges); + depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors)); - + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); - - vector<NFAVertex> ordering; + + vector<NFAVertex> ordering; ordering.reserve(num_vertices(g)); topological_sort(acyclic_g, back_inserter(ordering), color_map(colors)); - + reorderSpecials(g, ordering); - return ordering; -} - -static -void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, + return ordering; +} + +static +void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, decltype(make_small_color_map(NGHolder())) &colors) { - set<NFAVertex> s; - insert(&s, adjacent_vertices(u, g)); - - set<NFAEdge> dead; // Edges leading to u or u's successors. - - for (auto v : inv_adjacent_vertices_range(u, g)) { - for (const auto &e : out_edges_range(v, g)) { - NFAVertex t = target(e, g); - if (t == u || contains(s, t)) { - dead.insert(e); - } - } - } - + set<NFAVertex> s; + insert(&s, adjacent_vertices(u, g)); + + set<NFAEdge> dead; // Edges leading to u or u's successors. + + for (auto v : inv_adjacent_vertices_range(u, g)) { + for (const auto &e : out_edges_range(v, g)) { + NFAVertex t = target(e, g); + if (t == u || contains(s, t)) { + dead.insert(e); + } + } + } + auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); - + depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()), colors); -} - -bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, - mbsb_cache &cache) { - assert(&cache.g == &g); - auto key = make_pair(g[u].index, g[v].index); - DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size()); - if (contains(cache.cache, key)) { - DEBUG_PRINTF("cache hit\n"); - return cache.cache[key]; - } - +} + +bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, + mbsb_cache &cache) { + assert(&cache.g == &g); + auto key = make_pair(g[u].index, g[v].index); + DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size()); + if (contains(cache.cache, key)) { + DEBUG_PRINTF("cache hit\n"); + return cache.cache[key]; + } + auto colors = make_small_color_map(g); mustBeSetBefore_int(u, g, colors); - - for (auto vi : vertices_range(g)) { + + for (auto vi : vertices_range(g)) { auto key2 = make_pair(g[u].index, g[vi].index); DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); - assert(!contains(cache.cache, key2)); + assert(!contains(cache.cache, key2)); bool value = get(colors, vi) == small_color::white; - cache.cache[key2] = value; - assert(contains(cache.cache, key2)); - } + cache.cache[key2] = value; + assert(contains(cache.cache, key2)); + } DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second, - cache.cache.size()); - return cache.cache[key]; -} - -void appendLiteral(NGHolder &h, const ue2_literal &s) { - DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str()); - vector<NFAVertex> tail; - assert(in_degree(h.acceptEod, h) == 1); - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - tail.push_back(v); - } - assert(!tail.empty()); - - for (auto v : tail) { - remove_edge(v, h.accept, h); - } - - for (const auto &c : s) { - NFAVertex v = add_vertex(h); - h[v].char_reach = c; - for (auto u : tail) { - add_edge(u, v, h); - } - tail.clear(); - tail.push_back(v); - } - - for (auto v : tail) { - add_edge(v, h.accept, h); - } -} - + cache.cache.size()); + return cache.cache[key]; +} + +void appendLiteral(NGHolder &h, const ue2_literal &s) { + DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str()); + vector<NFAVertex> tail; + assert(in_degree(h.acceptEod, h) == 1); + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + tail.push_back(v); + } + assert(!tail.empty()); + + for (auto v : tail) { + remove_edge(v, h.accept, h); + } + + for (const auto &c : s) { + NFAVertex v = add_vertex(h); + h[v].char_reach = c; + for (auto u : tail) { + add_edge(u, v, h); + } + tail.clear(); + tail.push_back(v); + } + + for (auto v : tail) { + add_edge(v, h.accept, h); + } +} + flat_set<u32> getTops(const NGHolder &h) { flat_set<u32> tops; - for (const auto &e : out_edges_range(h.start, h)) { + for (const auto &e : out_edges_range(h.start, h)) { insert(&tops, h[e].tops); } return tops; @@ -442,165 +442,165 @@ void setTops(NGHolder &h, u32 top) { for (const auto &e : out_edges_range(h.start, h)) { assert(h[e].tops.empty()); if (target(e, h) == h.startDs) { - continue; - } + continue; + } h[e].tops.insert(top); - } -} - -void clearReports(NGHolder &g) { - DEBUG_PRINTF("clearing reports without an accept edge\n"); + } +} + +void clearReports(NGHolder &g) { + DEBUG_PRINTF("clearing reports without an accept edge\n"); unordered_set<NFAVertex> allow; - insert(&allow, inv_adjacent_vertices(g.accept, g)); - insert(&allow, inv_adjacent_vertices(g.acceptEod, g)); - allow.erase(g.accept); // due to stylised edge. - - for (auto v : vertices_range(g)) { - if (contains(allow, v)) { - continue; - } - g[v].reports.clear(); - } -} - -void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) { - for (auto v : vertices_range(g)) { - auto &reports = g[v].reports; - if (contains(reports, r_old)) { - reports.insert(r_new); - } - } -} - -static -void fillHolderOutEdges(NGHolder &out, const NGHolder &in, + insert(&allow, inv_adjacent_vertices(g.accept, g)); + insert(&allow, inv_adjacent_vertices(g.acceptEod, g)); + allow.erase(g.accept); // due to stylised edge. + + for (auto v : vertices_range(g)) { + if (contains(allow, v)) { + continue; + } + g[v].reports.clear(); + } +} + +void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) { + for (auto v : vertices_range(g)) { + auto &reports = g[v].reports; + if (contains(reports, r_old)) { + reports.insert(r_new); + } + } +} + +static +void fillHolderOutEdges(NGHolder &out, const NGHolder &in, const unordered_map<NFAVertex, NFAVertex> &v_map, - NFAVertex u) { - NFAVertex u_new = v_map.at(u); - - for (auto e : out_edges_range(u, in)) { - NFAVertex v = target(e, in); - - if (is_special(u, in) && is_special(v, in)) { - continue; - } - - auto it = v_map.find(v); - if (it == v_map.end()) { - continue; - } - NFAVertex v_new = it->second; - assert(!edge(u_new, v_new, out).second); - add_edge(u_new, v_new, in[e], out); - } -} - -void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv, + NFAVertex u) { + NFAVertex u_new = v_map.at(u); + + for (auto e : out_edges_range(u, in)) { + NFAVertex v = target(e, in); + + if (is_special(u, in) && is_special(v, in)) { + continue; + } + + auto it = v_map.find(v); + if (it == v_map.end()) { + continue; + } + NFAVertex v_new = it->second; + assert(!edge(u_new, v_new, out).second); + add_edge(u_new, v_new, in[e], out); + } +} + +void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv, unordered_map<NFAVertex, NFAVertex> *v_map_out) { - NGHolder &out = *outp; + NGHolder &out = *outp; unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out; - - out.kind = in.kind; - - for (auto v : vv) { - if (is_special(v, in)) { - continue; - } - v_map[v] = add_vertex(in[v], out); - } - - for (u32 i = 0; i < N_SPECIALS; i++) { - v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i); - } - - DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size()); - - fillHolderOutEdges(out, in, v_map, in.start); - fillHolderOutEdges(out, in, v_map, in.startDs); - - for (auto u : vv) { - if (is_special(u, in)) { - continue; - } - fillHolderOutEdges(out, in, v_map, u); - } - + + out.kind = in.kind; + + for (auto v : vv) { + if (is_special(v, in)) { + continue; + } + v_map[v] = add_vertex(in[v], out); + } + + for (u32 i = 0; i < N_SPECIALS; i++) { + v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i); + } + + DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size()); + + fillHolderOutEdges(out, in, v_map, in.start); + fillHolderOutEdges(out, in, v_map, in.startDs); + + for (auto u : vv) { + if (is_special(u, in)) { + continue; + } + fillHolderOutEdges(out, in, v_map, u); + } + renumber_edges(out); renumber_vertices(out); -} - -void cloneHolder(NGHolder &out, const NGHolder &in) { - assert(hasCorrectlyNumberedVertices(in)); +} + +void cloneHolder(NGHolder &out, const NGHolder &in) { + assert(hasCorrectlyNumberedVertices(in)); assert(hasCorrectlyNumberedVertices(out)); - out.kind = in.kind; - - // Note: depending on the state of the input graph, some stylized edges - // (e.g. start->startDs) may not exist. This must be propagated to the - // output graph as well. - - /* remove the existing special edges */ - clear_vertex(out.startDs, out); - clear_vertex(out.accept, out); + out.kind = in.kind; + + // Note: depending on the state of the input graph, some stylized edges + // (e.g. start->startDs) may not exist. This must be propagated to the + // output graph as well. + + /* remove the existing special edges */ + clear_vertex(out.startDs, out); + clear_vertex(out.accept, out); renumber_edges(out); - - vector<NFAVertex> out_mapping(num_vertices(in)); - out_mapping[NODE_START] = out.start; - out_mapping[NODE_START_DOTSTAR] = out.startDs; - out_mapping[NODE_ACCEPT] = out.accept; - out_mapping[NODE_ACCEPT_EOD] = out.acceptEod; - - for (auto v : vertices_range(in)) { - u32 i = in[v].index; - - /* special vertices are already in the out graph */ - if (i >= N_SPECIALS) { - assert(!out_mapping[i]); - out_mapping[i] = add_vertex(in[v], out); - } - - out[out_mapping[i]] = in[v]; - } - - for (auto e : edges_range(in)) { - u32 si = in[source(e, in)].index; - u32 ti = in[target(e, in)].index; - - DEBUG_PRINTF("adding edge %u->%u\n", si, ti); - - NFAVertex s = out_mapping[si]; - NFAVertex t = out_mapping[ti]; + + vector<NFAVertex> out_mapping(num_vertices(in)); + out_mapping[NODE_START] = out.start; + out_mapping[NODE_START_DOTSTAR] = out.startDs; + out_mapping[NODE_ACCEPT] = out.accept; + out_mapping[NODE_ACCEPT_EOD] = out.acceptEod; + + for (auto v : vertices_range(in)) { + u32 i = in[v].index; + + /* special vertices are already in the out graph */ + if (i >= N_SPECIALS) { + assert(!out_mapping[i]); + out_mapping[i] = add_vertex(in[v], out); + } + + out[out_mapping[i]] = in[v]; + } + + for (auto e : edges_range(in)) { + u32 si = in[source(e, in)].index; + u32 ti = in[target(e, in)].index; + + DEBUG_PRINTF("adding edge %u->%u\n", si, ti); + + NFAVertex s = out_mapping[si]; + NFAVertex t = out_mapping[ti]; NFAEdge e2 = add_edge(s, t, out); - out[e2] = in[e]; - } - - // Safety checks. + out[e2] = in[e]; + } + + // Safety checks. assert(num_vertices(in) == num_vertices(out)); assert(num_edges(in) == num_edges(out)); - assert(hasCorrectlyNumberedVertices(out)); -} - -void cloneHolder(NGHolder &out, const NGHolder &in, + assert(hasCorrectlyNumberedVertices(out)); +} + +void cloneHolder(NGHolder &out, const NGHolder &in, unordered_map<NFAVertex, NFAVertex> *mapping) { - cloneHolder(out, in); - vector<NFAVertex> out_verts(num_vertices(in)); - for (auto v : vertices_range(out)) { - out_verts[out[v].index] = v; - } - - mapping->clear(); - - for (auto v : vertices_range(in)) { - (*mapping)[v] = out_verts[in[v].index]; - assert((*mapping)[v]); - } -} - -unique_ptr<NGHolder> cloneHolder(const NGHolder &in) { - unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(); - cloneHolder(*h, in); - return h; -} - + cloneHolder(out, in); + vector<NFAVertex> out_verts(num_vertices(in)); + for (auto v : vertices_range(out)) { + out_verts[out[v].index] = v; + } + + mapping->clear(); + + for (auto v : vertices_range(in)) { + (*mapping)[v] = out_verts[in[v].index]; + assert((*mapping)[v]); + } +} + +unique_ptr<NGHolder> cloneHolder(const NGHolder &in) { + unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(); + cloneHolder(*h, in); + return h; +} + void reverseHolder(const NGHolder &g_in, NGHolder &g) { // Make the BGL do the grunt work. unordered_map<NFAVertex, NFAVertex> vertexMap; @@ -734,58 +734,58 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, return delay; } -#ifndef NDEBUG +#ifndef NDEBUG -bool allMatchStatesHaveReports(const NGHolder &g) { +bool allMatchStatesHaveReports(const NGHolder &g) { unordered_set<NFAVertex> reporters; - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (g[v].reports.empty()) { + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (g[v].reports.empty()) { DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); - return false; - } + return false; + } reporters.insert(v); - } + } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; // stylised edge - } - if (g[v].reports.empty()) { + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; // stylised edge + } + if (g[v].reports.empty()) { DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); - return false; - } + return false; + } reporters.insert(v); - } - - for (auto v : vertices_range(g)) { + } + + for (auto v : vertices_range(g)) { if (!contains(reporters, v) && !g[v].reports.empty()) { DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n", g[v].index); return false; - } - } + } + } return true; -} - +} + bool isCorrectlyTopped(const NGHolder &g) { if (is_triggered(g)) { for (const auto &e : out_edges_range(g.start, g)) { if (g[e].tops.empty() != (target(e, g) == g.startDs)) { return false; } - } + } } else { for (const auto &e : out_edges_range(g.start, g)) { if (!g[e].tops.empty()) { return false; } } - } + } return true; -} - -#endif // NDEBUG +} -} // namespace ue2 +#endif // NDEBUG + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_util.h index a2d0d9b7d6..0f89b64dc9 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_util.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.h @@ -1,44 +1,44 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Miscellaneous NFA graph utilities. - */ -#ifndef NG_UTIL_H -#define NG_UTIL_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Miscellaneous NFA graph utilities. + */ +#ifndef NG_UTIL_H +#define NG_UTIL_H + #include "ng_depth.h" -#include "ng_holder.h" -#include "ue2common.h" +#include "ng_holder.h" +#include "ue2common.h" #include "util/flat_containers.h" -#include "util/graph.h" -#include "util/graph_range.h" - +#include "util/graph.h" +#include "util/graph_range.h" + #include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor #include <algorithm> @@ -46,12 +46,12 @@ #include <unordered_map> #include <vector> -namespace ue2 { - -struct Grey; -struct ue2_literal; -class ReportManager; - +namespace ue2 { + +struct Grey; +struct ue2_literal; +class ReportManager; + template<class VertexDepth> depth maxDistFromInit(const VertexDepth &vd) { if (vd.fromStart.max.is_unreachable()) { @@ -62,7 +62,7 @@ depth maxDistFromInit(const VertexDepth &vd) { return std::max(vd.fromStartDotStar.max, vd.fromStart.max); } } - + template<class VertexDepth> depth maxDistFromStartOfData(const VertexDepth &vd) { if (vd.fromStartDotStar.max.is_reachable()) { @@ -73,21 +73,21 @@ depth maxDistFromStartOfData(const VertexDepth &vd) { } } -/** True if the given vertex is a dot (reachable on any character). */ -template<class GraphT> -static really_inline -bool is_dot(NFAVertex v, const GraphT &g) { - return g[v].char_reach.all(); -} - -/** adds successors of v to s */ -template<class U> -static really_inline -void succ(const NGHolder &g, NFAVertex v, U *s) { +/** True if the given vertex is a dot (reachable on any character). */ +template<class GraphT> +static really_inline +bool is_dot(NFAVertex v, const GraphT &g) { + return g[v].char_reach.all(); +} + +/** adds successors of v to s */ +template<class U> +static really_inline +void succ(const NGHolder &g, NFAVertex v, U *s) { auto rv = adjacent_vertices(v, g); s->insert(rv.first, rv.second); -} - +} + template<class ContTemp = flat_set<NFAVertex>> ContTemp succs(NFAVertex u, const NGHolder &g) { ContTemp rv; @@ -95,14 +95,14 @@ ContTemp succs(NFAVertex u, const NGHolder &g) { return rv; } -/** adds predecessors of v to s */ -template<class U> -static really_inline -void pred(const NGHolder &g, NFAVertex v, U *p) { +/** adds predecessors of v to s */ +template<class U> +static really_inline +void pred(const NGHolder &g, NFAVertex v, U *p) { auto rv = inv_adjacent_vertices(v, g); p->insert(rv.first, rv.second); -} - +} + template<class ContTemp = flat_set<NFAVertex>> ContTemp preds(NFAVertex u, const NGHolder &g) { ContTemp rv; @@ -110,15 +110,15 @@ ContTemp preds(NFAVertex u, const NGHolder &g) { return rv; } -/** returns a vertex with an out edge from v and is not v. - * v must have exactly one out-edge excluding self-loops. +/** returns a vertex with an out edge from v and is not v. + * v must have exactly one out-edge excluding self-loops. * will return NGHolder::null_vertex() if the preconditions don't hold. - */ -NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); - -/** Like getSoleDestVertex but for in-edges */ -NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); - + */ +NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); + +/** Like getSoleDestVertex but for in-edges */ +NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); + /** \brief edge filtered graph. * * This will give you a view over the graph that has none of the edges from @@ -159,159 +159,159 @@ bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) { return bad_vertex_filter<VertexSet>(v); } -/** Visitor that records back edges */ -template <typename BackEdgeSet> -class BackEdges : public boost::default_dfs_visitor { -public: - explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {} - template <class EdgeT, class GraphT> - void back_edge(const EdgeT &e, const GraphT &) { - backEdges.insert(e); // Remove this back edge only - } - BackEdgeSet &backEdges; -}; - -/** Returns true if the vertex is either of the real starts (NODE_START, - * NODE_START_DOTSTAR). */ -template <typename GraphT> -static really_inline +/** Visitor that records back edges */ +template <typename BackEdgeSet> +class BackEdges : public boost::default_dfs_visitor { +public: + explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {} + template <class EdgeT, class GraphT> + void back_edge(const EdgeT &e, const GraphT &) { + backEdges.insert(e); // Remove this back edge only + } + BackEdgeSet &backEdges; +}; + +/** Returns true if the vertex is either of the real starts (NODE_START, + * NODE_START_DOTSTAR). */ +template <typename GraphT> +static really_inline bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { - u32 i = g[v].index; - return i == NODE_START || i == NODE_START_DOTSTAR; -} - -bool is_virtual_start(NFAVertex v, const NGHolder &g); - -template <typename GraphT> + u32 i = g[v].index; + return i == NODE_START || i == NODE_START_DOTSTAR; +} + +bool is_virtual_start(NFAVertex v, const NGHolder &g); + +template <typename GraphT> bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) { - u32 i = g[v].index; - return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD; -} - -/** returns true iff v has an edge to accept or acceptEod */ -template <typename GraphT> + u32 i = g[v].index; + return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD; +} + +/** returns true iff v has an edge to accept or acceptEod */ +template <typename GraphT> bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) { - return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second; -} - -/** Generate a reverse topological ordering for a back-edge filtered version of + return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second; +} + +/** Generate a reverse topological ordering for a back-edge filtered version of * our graph (as it must be a DAG and correctly numbered). * * Note: we ensure that we produce a topo ordering that begins with acceptEod * and accept (if present) and ends with startDs followed by start. */ -std::vector<NFAVertex> getTopoOrdering(const NGHolder &g); - -bool onlyOneTop(const NGHolder &g); - +std::vector<NFAVertex> getTopoOrdering(const NGHolder &g); + +bool onlyOneTop(const NGHolder &g); + /** Return the set of the tops on the given graph. */ -flat_set<u32> getTops(const NGHolder &h); - +flat_set<u32> getTops(const NGHolder &h); + /** Initialise the tops on h to the provide top. Assumes that h is triggered and * no tops have been set on h. */ void setTops(NGHolder &h, u32 top = DEFAULT_TOP); -/** adds a vertex to g with all the same vertex properties as \p v (aside from - * index) */ -NFAVertex clone_vertex(NGHolder &g, NFAVertex v); - -/** - * \brief Copies all out-edges from source to target. - * - * Edge properties (aside from index) are preserved and duplicate edges are - * skipped. - */ -void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest); - -/** - * \brief Copies all in-edges from source to target. - * - * Edge properties (aside from index) are preserved. - */ -void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest); - -/** \brief True if the graph contains an edge from one of {start, startDs} to - * one of {accept, acceptEod}. */ -bool isVacuous(const NGHolder &h); - -/** \brief True if the graph contains no floating vertices (startDs has no - * proper successors). */ -bool isAnchored(const NGHolder &h); - +/** adds a vertex to g with all the same vertex properties as \p v (aside from + * index) */ +NFAVertex clone_vertex(NGHolder &g, NFAVertex v); + +/** + * \brief Copies all out-edges from source to target. + * + * Edge properties (aside from index) are preserved and duplicate edges are + * skipped. + */ +void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest); + +/** + * \brief Copies all in-edges from source to target. + * + * Edge properties (aside from index) are preserved. + */ +void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest); + +/** \brief True if the graph contains an edge from one of {start, startDs} to + * one of {accept, acceptEod}. */ +bool isVacuous(const NGHolder &h); + +/** \brief True if the graph contains no floating vertices (startDs has no + * proper successors). */ +bool isAnchored(const NGHolder &h); + /** \brief True if the graph contains no anchored vertices (start has no * successors aside from startDs or vertices connected to startDs). */ bool isFloating(const NGHolder &h); -/** True if the graph contains no back-edges at all, other than the - * startDs self-loop. */ -bool isAcyclic(const NGHolder &g); - -/** True if the graph has a cycle reachable from the given source vertex. */ -bool hasReachableCycle(const NGHolder &g, NFAVertex src); - -/** True if g has any cycles which are not self-loops. */ -bool hasBigCycles(const NGHolder &g); - +/** True if the graph contains no back-edges at all, other than the + * startDs self-loop. */ +bool isAcyclic(const NGHolder &g); + +/** True if the graph has a cycle reachable from the given source vertex. */ +bool hasReachableCycle(const NGHolder &g, NFAVertex src); + +/** True if g has any cycles which are not self-loops. */ +bool hasBigCycles(const NGHolder &g); + /** * \brief True if g has at least one non-special vertex with reach smaller than * max_reach_count. The default of 200 is pretty conservative. */ bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200); -/** Returns the set of all vertices that appear in any of the graph's cycles. */ -std::set<NFAVertex> findVerticesInCycles(const NGHolder &g); - -bool can_never_match(const NGHolder &g); - -/* \brief Does the graph have any edges leading into acceptEod (aside from - * accept) or will it have after resolving asserts? */ -bool can_match_at_eod(const NGHolder &h); - -bool can_only_match_at_eod(const NGHolder &g); - -/** \brief Does this graph become a "firehose", matching between every - * byte? */ -bool matches_everywhere(const NGHolder &h); - - -struct mbsb_cache { - explicit mbsb_cache(const NGHolder &gg) : g(gg) {} - std::map<std::pair<u32, u32>, bool> cache; - const NGHolder &g; -}; - -/* weaker than straight domination as allows jump edges */ -bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, - mbsb_cache &cache); - -/* adds the literal 's' to the end of the graph before h.accept */ -void appendLiteral(NGHolder &h, const ue2_literal &s); - -/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in - * \a in). A vertex mapping is returned in \a v_map_out. */ -void fillHolder(NGHolder *outp, const NGHolder &in, - const std::deque<NFAVertex> &vv, +/** Returns the set of all vertices that appear in any of the graph's cycles. */ +std::set<NFAVertex> findVerticesInCycles(const NGHolder &g); + +bool can_never_match(const NGHolder &g); + +/* \brief Does the graph have any edges leading into acceptEod (aside from + * accept) or will it have after resolving asserts? */ +bool can_match_at_eod(const NGHolder &h); + +bool can_only_match_at_eod(const NGHolder &g); + +/** \brief Does this graph become a "firehose", matching between every + * byte? */ +bool matches_everywhere(const NGHolder &h); + + +struct mbsb_cache { + explicit mbsb_cache(const NGHolder &gg) : g(gg) {} + std::map<std::pair<u32, u32>, bool> cache; + const NGHolder &g; +}; + +/* weaker than straight domination as allows jump edges */ +bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, + mbsb_cache &cache); + +/* adds the literal 's' to the end of the graph before h.accept */ +void appendLiteral(NGHolder &h, const ue2_literal &s); + +/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in + * \a in). A vertex mapping is returned in \a v_map_out. */ +void fillHolder(NGHolder *outp, const NGHolder &in, + const std::deque<NFAVertex> &vv, std::unordered_map<NFAVertex, NFAVertex> *v_map_out); - -/** \brief Clone the graph in \a in into graph \a out, returning a vertex - * mapping in \a v_map_out. */ -void cloneHolder(NGHolder &out, const NGHolder &in, + +/** \brief Clone the graph in \a in into graph \a out, returning a vertex + * mapping in \a v_map_out. */ +void cloneHolder(NGHolder &out, const NGHolder &in, std::unordered_map<NFAVertex, NFAVertex> *v_map_out); - -/** \brief Clone the graph in \a in into graph \a out. */ -void cloneHolder(NGHolder &out, const NGHolder &in); - -/** \brief Build a clone of graph \a in and return a pointer to it. */ -std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in); - -/** \brief Clear all reports on vertices that do not have an edge to accept or - * acceptEod. */ -void clearReports(NGHolder &g); - -/** \brief Add report \a r_new to every vertex that already has report \a - * r_old. */ -void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); - + +/** \brief Clone the graph in \a in into graph \a out. */ +void cloneHolder(NGHolder &out, const NGHolder &in); + +/** \brief Build a clone of graph \a in and return a pointer to it. */ +std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in); + +/** \brief Clear all reports on vertices that do not have an edge to accept or + * acceptEod. */ +void clearReports(NGHolder &g); + +/** \brief Add report \a r_new to every vertex that already has report \a + * r_old. */ +void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); + /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to * accepts. */ void reverseHolder(const NGHolder &g, NGHolder &out); @@ -321,8 +321,8 @@ void reverseHolder(const NGHolder &g, NGHolder &out); u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 max_delay, bool overhang_ok = true); -#ifndef NDEBUG - +#ifndef NDEBUG + // Assertions: only available in internal builds. /** @@ -330,8 +330,8 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, * with edges to accept or acceptEod have at least one report ID. Additionally, * checks that ONLY vertices with edges to accept or acceptEod has reports. */ -bool allMatchStatesHaveReports(const NGHolder &g); - +bool allMatchStatesHaveReports(const NGHolder &g); + /** * Assertion: returns true if the graph is triggered and all edges out of start * have tops OR if the graph is not-triggered and all edges out of start have no @@ -339,7 +339,7 @@ bool allMatchStatesHaveReports(const NGHolder &g); */ bool isCorrectlyTopped(const NGHolder &g); #endif // NDEBUG - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp index d1123dff49..71ec2e4bab 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp @@ -1,143 +1,143 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for vacuous graphs. - */ -#include "ng_vacuous.h" - -#include "grey.h" -#include "ng.h" -#include "ng_util.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for vacuous graphs. + */ +#include "ng_vacuous.h" + +#include "grey.h" +#include "ng.h" +#include "ng_util.h" #include "compiler/compiler.h" - -using namespace std; - -namespace ue2 { - -static + +using namespace std; + +namespace ue2 { + +static ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { Report ir = rm.getBasicInternalReport(expr); - - // Apply any extended params. + + // Apply any extended params. if (expr.min_offset || expr.max_offset != MAX_OFFSET) { ir.minOffset = expr.min_offset; ir.maxOffset = expr.max_offset; - } - + } + assert(!expr.min_length); // should be handled elsewhere. - - return rm.getInternalId(ir); -} - -static + + return rm.getInternalId(ir); +} + +static void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { const ReportID r = getInternalId(rm, expr); - - boundary.report_at_0_eod.insert(r); - boundary.report_at_0.insert(r); - - // Replace the graph with a '.+'. - - clear_graph(g); - clearReports(g); - remove_edge(g.start, g.accept, g); - remove_edge(g.start, g.acceptEod, g); - remove_edge(g.startDs, g.accept, g); - remove_edge(g.startDs, g.acceptEod, g); - - NFAVertex v = add_vertex(g); - g[v].char_reach.setall(); - g[v].reports.insert(r); - add_edge(v, v, g); - add_edge(g.start, v, g); - add_edge(g.startDs, v, g); - add_edge(v, g.accept, g); -} - -static -void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, + + boundary.report_at_0_eod.insert(r); + boundary.report_at_0.insert(r); + + // Replace the graph with a '.+'. + + clear_graph(g); + clearReports(g); + remove_edge(g.start, g.accept, g); + remove_edge(g.start, g.acceptEod, g); + remove_edge(g.startDs, g.accept, g); + remove_edge(g.startDs, g.acceptEod, g); + + NFAVertex v = add_vertex(g); + g[v].char_reach.setall(); + g[v].reports.insert(r); + add_edge(v, v, g); + add_edge(g.start, v, g); + add_edge(g.startDs, v, g); + add_edge(v, g.accept, g); +} + +static +void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { boundary.report_at_0.insert(getInternalId(rm, expr)); - remove_edge(g.start, g.accept, g); - remove_edge(g.start, g.acceptEod, g); - g[g.start].reports.clear(); -} - -static -void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, + remove_edge(g.start, g.accept, g); + remove_edge(g.start, g.acceptEod, g); + g[g.start].reports.clear(); +} + +static +void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { boundary.report_at_eod.insert(getInternalId(rm, expr)); - remove_edge(g.startDs, g.acceptEod, g); - remove_edge(g.start, g.acceptEod, g); - g[g.start].reports.clear(); - g[g.startDs].reports.clear(); -} - -static -void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, + remove_edge(g.startDs, g.acceptEod, g); + remove_edge(g.start, g.acceptEod, g); + g[g.start].reports.clear(); + g[g.startDs].reports.clear(); +} + +static +void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { boundary.report_at_0_eod.insert(getInternalId(rm, expr)); - remove_edge(g.start, g.acceptEod, g); - g[g.start].reports.clear(); -} - -bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, + remove_edge(g.start, g.acceptEod, g); + g[g.start].reports.clear(); +} + +bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr) { - if (edge(g.startDs, g.accept, g).second) { - // e.g. '.*'; match "between" every byte - DEBUG_PRINTF("graph is firehose\n"); + if (edge(g.startDs, g.accept, g).second) { + // e.g. '.*'; match "between" every byte + DEBUG_PRINTF("graph is firehose\n"); makeFirehose(boundary, rm, g, expr); - return true; - } - - bool work_done = false; - - if (edge(g.start, g.accept, g).second) { - DEBUG_PRINTF("creating anchored acceptor\n"); + return true; + } + + bool work_done = false; + + if (edge(g.start, g.accept, g).second) { + DEBUG_PRINTF("creating anchored acceptor\n"); makeAnchoredAcceptor(boundary, rm, g, expr); - work_done = true; - } - - if (edge(g.startDs, g.acceptEod, g).second) { - DEBUG_PRINTF("creating end-anchored acceptor\n"); + work_done = true; + } + + if (edge(g.startDs, g.acceptEod, g).second) { + DEBUG_PRINTF("creating end-anchored acceptor\n"); makeEndAnchoredAcceptor(boundary, rm, g, expr); - work_done = true; - } - - if (edge(g.start, g.acceptEod, g).second) { - DEBUG_PRINTF("creating nothing acceptor\n"); + work_done = true; + } + + if (edge(g.start, g.acceptEod, g).second) { + DEBUG_PRINTF("creating nothing acceptor\n"); makeNothingAcceptor(boundary, rm, g, expr); - work_done = true; - } - - return work_done; -} - -} // namespace ue2 + work_done = true; + } + + return work_done; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h index c33cb312de..12ad62d812 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h @@ -1,49 +1,49 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Build code for vacuous graphs. - */ - -#ifndef NG_VACUOUS_H -#define NG_VACUOUS_H - -namespace ue2 { - -struct BoundaryReports; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Build code for vacuous graphs. + */ + +#ifndef NG_VACUOUS_H +#define NG_VACUOUS_H + +namespace ue2 { + +struct BoundaryReports; class ExpressionInfo; class NGHolder; -class ReportManager; - -// Returns true if a "vacuous" reporter was created. -bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, +class ReportManager; + +// Returns true if a "vacuous" reporter was created. +bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); - -} // namespace ue2 - -#endif // NG_VACUOUS_H + +} // namespace ue2 + +#endif // NG_VACUOUS_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp index 219241ca55..f2d4fb73e4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp @@ -1,237 +1,237 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for finding the min/max width of the input required to - * match a pattern. - */ -#include "ng_width.h" - -#include "ng_holder.h" -#include "ng_util.h" -#include "ue2common.h" -#include "util/depth.h" -#include "util/graph.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for finding the min/max width of the input required to + * match a pattern. + */ +#include "ng_width.h" + +#include "ng_holder.h" +#include "ng_util.h" +#include "ue2common.h" +#include "util/depth.h" +#include "util/graph.h" #include "util/graph_small_color_map.h" - -#include <deque> -#include <vector> - -#include <boost/graph/breadth_first_search.hpp> -#include <boost/graph/dag_shortest_paths.hpp> -#include <boost/graph/filtered_graph.hpp> - -using namespace std; - -namespace ue2 { - -namespace { - -/** - * Filter out special edges, or in the top-specific variant, start edges that - * don't have the right top set. - */ -struct SpecialEdgeFilter { - SpecialEdgeFilter() {} - explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} + +#include <deque> +#include <vector> + +#include <boost/graph/breadth_first_search.hpp> +#include <boost/graph/dag_shortest_paths.hpp> +#include <boost/graph/filtered_graph.hpp> + +using namespace std; + +namespace ue2 { + +namespace { + +/** + * Filter out special edges, or in the top-specific variant, start edges that + * don't have the right top set. + */ +struct SpecialEdgeFilter { + SpecialEdgeFilter() {} + explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) - : h(&h_in), single_top(true), top(top_in) {} - - bool operator()(const NFAEdge &e) const { + : h(&h_in), single_top(true), top(top_in) {} + + bool operator()(const NFAEdge &e) const { NFAVertex u = source(e, *h); NFAVertex v = target(e, *h); if ((is_any_start(u, *h) && is_any_start(v, *h)) || (is_any_accept(u, *h) && is_any_accept(v, *h))) { - return false; - } - if (single_top) { + return false; + } + if (single_top) { if (u == h->start && !contains((*h)[e].tops, top)) { - return false; - } - if (u == h->startDs) { - return false; - } - } - return true; - - } -private: - const NGHolder *h = nullptr; - bool single_top = false; - u32 top = 0; -}; - -} // namespace - -static -depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, - NFAVertex src) { - if (isLeafNode(src, h)) { - return depth::unreachable(); - } - + return false; + } + if (u == h->startDs) { + return false; + } + } + return true; + + } +private: + const NGHolder *h = nullptr; + bool single_top = false; + u32 top = 0; +}; + +} // namespace + +static +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { + if (isLeafNode(src, h)) { + return depth::unreachable(); + } + boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); - - assert(hasCorrectlyNumberedVertices(h)); - const size_t num = num_vertices(h); - vector<depth> distance(num, depth::unreachable()); - distance.at(g[src].index) = depth(0); - - auto index_map = get(&NFAGraphVertexProps::index, g); - - // Since we are interested in the single-source shortest paths on a graph - // with the same weight on every edge, using BFS will be faster than - // Dijkstra here. + + assert(hasCorrectlyNumberedVertices(h)); + const size_t num = num_vertices(h); + vector<depth> distance(num, depth::unreachable()); + distance.at(g[src].index) = depth(0); + + auto index_map = get(&NFAGraphVertexProps::index, g); + + // Since we are interested in the single-source shortest paths on a graph + // with the same weight on every edge, using BFS will be faster than + // Dijkstra here. breadth_first_search(g, src, - visitor(make_bfs_visitor(record_distances( - make_iterator_property_map(distance.begin(), index_map), + visitor(make_bfs_visitor(record_distances( + make_iterator_property_map(distance.begin(), index_map), boost::on_tree_edge())))); - - DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n", - distance.at(NODE_ACCEPT).str().c_str(), - distance.at(NODE_ACCEPT_EOD).str().c_str()); - - depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD)); - - if (d.is_unreachable()) { - return d; - } - - assert(d.is_finite()); - assert(d > depth(0)); - return d - depth(1); -} - -static -depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, - NFAVertex src) { + + DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n", + distance.at(NODE_ACCEPT).str().c_str(), + distance.at(NODE_ACCEPT_EOD).str().c_str()); + + depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD)); + + if (d.is_unreachable()) { + return d; + } + + assert(d.is_finite()); + assert(d > depth(0)); + return d - depth(1); +} + +static +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { if (isLeafNode(src, h)) { - return depth::unreachable(); - } - - if (hasReachableCycle(h, src)) { - // There's a cycle reachable from this src, so we have inf width. - return depth::infinity(); - } - + return depth::unreachable(); + } + + if (hasReachableCycle(h, src)) { + // There's a cycle reachable from this src, so we have inf width. + return depth::infinity(); + } + boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); - - assert(hasCorrectlyNumberedVertices(h)); - const size_t num = num_vertices(h); - vector<int> distance(num); + + assert(hasCorrectlyNumberedVertices(h)); + const size_t num = num_vertices(h); + vector<int> distance(num); auto colors = make_small_color_map(h); - - auto index_map = get(&NFAGraphVertexProps::index, g); - - // DAG shortest paths with negative edge weights. + + auto index_map = get(&NFAGraphVertexProps::index, g); + + // DAG shortest paths with negative edge weights. dag_shortest_paths(g, src, - distance_map(make_iterator_property_map(distance.begin(), index_map)) - .weight_map(boost::make_constant_property<NFAEdge>(-1)) + distance_map(make_iterator_property_map(distance.begin(), index_map)) + .weight_map(boost::make_constant_property<NFAEdge>(-1)) .color_map(colors)); - - depth acceptDepth, acceptEodDepth; + + depth acceptDepth, acceptEodDepth; if (get(colors, h.accept) == small_color::white) { - acceptDepth = depth::unreachable(); - } else { + acceptDepth = depth::unreachable(); + } else { acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); - } + } if (get(colors, h.acceptEod) == small_color::white) { - acceptEodDepth = depth::unreachable(); - } else { + acceptEodDepth = depth::unreachable(); + } else { acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); - } - - depth d; - if (acceptDepth.is_unreachable()) { - d = acceptEodDepth; - } else if (acceptEodDepth.is_unreachable()) { - d = acceptDepth; - } else { - d = max(acceptDepth, acceptEodDepth); - } - - if (d.is_unreachable()) { + } + + depth d; + if (acceptDepth.is_unreachable()) { + d = acceptEodDepth; + } else if (acceptEodDepth.is_unreachable()) { + d = acceptDepth; + } else { + d = max(acceptDepth, acceptEodDepth); + } + + if (d.is_unreachable()) { assert(findMinWidth(h, filter, src).is_unreachable()); - return d; - } - - // Invert sign and subtract one for start transition. - assert(d.is_finite() && d > depth(0)); - return d - depth(1); -} - -static -depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { - depth startDepth = findMinWidth(h, filter, h.start); - depth dotstarDepth = findMinWidth(h, filter, h.startDs); - DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), - dotstarDepth.str().c_str()); - if (startDepth.is_unreachable()) { - assert(dotstarDepth.is_finite()); - return dotstarDepth; - } else if (dotstarDepth.is_unreachable()) { - assert(startDepth.is_finite()); - return startDepth; - } else { - assert(min(startDepth, dotstarDepth).is_finite()); - return min(startDepth, dotstarDepth); - } -} - -depth findMinWidth(const NGHolder &h) { - return findMinWidth(h, SpecialEdgeFilter(h)); -} - -depth findMinWidth(const NGHolder &h, u32 top) { - return findMinWidth(h, SpecialEdgeFilter(h, top)); -} - -static -depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { - depth startDepth = findMaxWidth(h, filter, h.start); - depth dotstarDepth = findMaxWidth(h, filter, h.startDs); - DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), - dotstarDepth.str().c_str()); - if (startDepth.is_unreachable()) { - return dotstarDepth; - } else if (dotstarDepth.is_unreachable()) { - return startDepth; - } else { - return max(startDepth, dotstarDepth); - } -} - -depth findMaxWidth(const NGHolder &h) { - return findMaxWidth(h, SpecialEdgeFilter(h)); -} - -depth findMaxWidth(const NGHolder &h, u32 top) { - return findMaxWidth(h, SpecialEdgeFilter(h, top)); -} - -} // namespace ue2 + return d; + } + + // Invert sign and subtract one for start transition. + assert(d.is_finite() && d > depth(0)); + return d - depth(1); +} + +static +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMinWidth(h, filter, h.start); + depth dotstarDepth = findMinWidth(h, filter, h.startDs); + DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), + dotstarDepth.str().c_str()); + if (startDepth.is_unreachable()) { + assert(dotstarDepth.is_finite()); + return dotstarDepth; + } else if (dotstarDepth.is_unreachable()) { + assert(startDepth.is_finite()); + return startDepth; + } else { + assert(min(startDepth, dotstarDepth).is_finite()); + return min(startDepth, dotstarDepth); + } +} + +depth findMinWidth(const NGHolder &h) { + return findMinWidth(h, SpecialEdgeFilter(h)); +} + +depth findMinWidth(const NGHolder &h, u32 top) { + return findMinWidth(h, SpecialEdgeFilter(h, top)); +} + +static +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMaxWidth(h, filter, h.start); + depth dotstarDepth = findMaxWidth(h, filter, h.startDs); + DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), + dotstarDepth.str().c_str()); + if (startDepth.is_unreachable()) { + return dotstarDepth; + } else if (dotstarDepth.is_unreachable()) { + return startDepth; + } else { + return max(startDepth, dotstarDepth); + } +} + +depth findMaxWidth(const NGHolder &h) { + return findMaxWidth(h, SpecialEdgeFilter(h)); +} + +depth findMaxWidth(const NGHolder &h, u32 top) { + return findMaxWidth(h, SpecialEdgeFilter(h, top)); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_width.h index 871e8a9343..ecc3c100ae 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_width.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.h @@ -1,74 +1,74 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Functions for finding the min/max width of the input required to - * match a pattern. - */ - -#ifndef NG_WIDTH_H -#define NG_WIDTH_H - -#include "ue2common.h" -#include "util/depth.h" - -namespace ue2 { - -class NGHolder; - -/** - * \brief Compute the minimum width in bytes of an input that will match the - * given graph. - */ -depth findMinWidth(const NGHolder &h); - -/** - * \brief Compute the minimum width in bytes of an input that will match the - * given graph, considering only paths activated by the given top. - */ -depth findMinWidth(const NGHolder &h, u32 top); - -/** - * \brief Compute the maximum width in bytes of an input that will match the - * given graph. - * - * If there is no bound on the maximum width, returns infinity. - */ -depth findMaxWidth(const NGHolder &h); - -/** - * \brief Compute the maximum width in bytes of an input that will match the - * given graph, considering only paths activated by the given top. - * - * If there is no bound on the maximum width, returns infinity. - */ -depth findMaxWidth(const NGHolder &h, u32 top); - -} // namespace ue2 - -#endif // NG_WIDTH_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for finding the min/max width of the input required to + * match a pattern. + */ + +#ifndef NG_WIDTH_H +#define NG_WIDTH_H + +#include "ue2common.h" +#include "util/depth.h" + +namespace ue2 { + +class NGHolder; + +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph. + */ +depth findMinWidth(const NGHolder &h); + +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + */ +depth findMinWidth(const NGHolder &h, u32 top); + +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph. + * + * If there is no bound on the maximum width, returns infinity. + */ +depth findMaxWidth(const NGHolder &h); + +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + * + * If there is no bound on the maximum width, returns infinity. + */ +depth findMaxWidth(const NGHolder &h, u32 top); + +} // namespace ue2 + +#endif // NG_WIDTH_H |