diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph')
84 files changed, 8718 insertions, 8718 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp index 8dccf9863d..2d987102af 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp @@ -27,10 +27,10 @@ */ /** \file - * \brief NG and graph handling. + * \brief NG and graph handling. */ -#include "ng.h" - +#include "ng.h" + #include "grey.h" #include "ng_anchored_acyclic.h" #include "ng_anchored_dots.h" @@ -42,7 +42,7 @@ #include "ng_equivalence.h" #include "ng_extparam.h" #include "ng_fixed_width.h" -#include "ng_fuzzy.h" +#include "ng_fuzzy.h" #include "ng_haig.h" #include "ng_literal_component.h" #include "ng_literal_decorated.h" @@ -58,14 +58,14 @@ #include "ng_small_literal_set.h" #include "ng_som.h" #include "ng_vacuous.h" -#include "ng_violet.h" +#include "ng_violet.h" #include "ng_utf8.h" #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" -#include "rose/rose_build.h" +#include "rose/rose_build.h" #include "smallwrite/smallwrite_build.h" #include "util/compile_error.h" #include "util/container.h" @@ -78,15 +78,15 @@ using namespace std; namespace ue2 { -NG::NG(const CompileContext &in_cc, size_t num_patterns, - unsigned in_somPrecision) +NG::NG(const CompileContext &in_cc, size_t num_patterns, + unsigned in_somPrecision) : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength), minWidth(depth::infinity()), rm(in_cc.grey), ssm(in_somPrecision), cc(in_cc), - smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), - rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { + smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), + rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { } NG::~NG() { @@ -102,16 +102,16 @@ NG::~NG() { * \throw CompileError if SOM cannot be supported for the component. */ static -bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, +bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { DEBUG_PRINTF("doing som\n"); - dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); + dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); - assert(allMatchStatesHaveReports(g)); + assert(allMatchStatesHaveReports(g)); // First, we try the "SOM chain" support in ng_som.cpp. - sombe_rv rv = doSom(ng, g, expr, comp_id, som); + sombe_rv rv = doSom(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -120,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, assert(rv == SOMBE_FAIL); /* Next, Sombe style approaches */ - rv = doSomWithHaig(ng, g, expr, comp_id, som); + rv = doSomWithHaig(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -134,8 +134,8 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, vector<vector<CharReach> > triggers; /* empty for outfix */ assert(g.kind == NFA_OUTFIX); - dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); - makeReportsSomPass(ng.rm, g); + dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); + makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); if (haig) { @@ -147,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, /* Our various strategies for supporting SOM for this pattern have failed. * Provide a generic pattern not supported/too large return value as it is * unclear what the meaning of a specific SOM error would be */ - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); assert(0); // unreachable return false; @@ -173,7 +173,7 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, changed |= removeEdgeRedundancy(g, som, cc); changed |= reduceGraphEquivalences(g, cc); changed |= removeRedundancy(g, som); - changed |= removeCyclicPathRedundancy(g); + changed |= removeCyclicPathRedundancy(g); if (!changed) { DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass); break; @@ -202,35 +202,35 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, } static -bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, - const som_type som, const u32 comp_id) { +bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, + const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", - expr.index, comp_id, num_vertices(g), num_edges(g)); + expr.index, comp_id, num_vertices(g), num_edges(g)); - dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - - assert(allMatchStatesHaveReports(g)); + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - reduceExtendedParams(g, ng.rm, som); - reduceGraph(g, som, expr.utf8, cc); + assert(allMatchStatesHaveReports(g)); - dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); + reduceExtendedParams(g, ng.rm, som); + reduceGraph(g, som, expr.utf8, cc); + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); + // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { removeRegionRedundancy(g, som); } - // We might be done at this point: if we've run out of vertices, we can - // stop processing. - if (num_vertices(g) == N_SPECIALS) { - DEBUG_PRINTF("all vertices claimed\n"); - return true; - } - + // We might be done at this point: if we've run out of vertices, we can + // stop processing. + if (num_vertices(g) == N_SPECIALS) { + DEBUG_PRINTF("all vertices claimed\n"); + return true; + } + // "Short Exhaustible Passthrough" patterns always become outfixes. if (!som && isSEP(g, ng.rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); @@ -241,13 +241,13 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, // Start Of Match handling. if (som) { - if (addComponentSom(ng, g, expr, som, comp_id)) { + if (addComponentSom(ng, g, expr, som, comp_id)) { return true; } } - assert(allMatchStatesHaveReports(g)); - + assert(allMatchStatesHaveReports(g)); + if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; } @@ -261,11 +261,11 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, return true; } - if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { return true; } - if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { return true; } @@ -278,7 +278,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, return true; } - if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { return true; } @@ -293,7 +293,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, // Returns true if all components have been added. static -bool processComponents(NG &ng, ExpressionInfo &expr, +bool processComponents(NG &ng, ExpressionInfo &expr, deque<unique_ptr<NGHolder>> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); @@ -303,7 +303,7 @@ bool processComponents(NG &ng, ExpressionInfo &expr, if (!g_comp[i]) { continue; } - if (addComponent(ng, *g_comp[i], expr, som, i)) { + if (addComponent(ng, *g_comp[i], expr, som, i)) { g_comp[i].reset(); continue; } @@ -323,70 +323,70 @@ bool processComponents(NG &ng, ExpressionInfo &expr, return false; } -bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { - assert(g_ptr); - NGHolder &g = *g_ptr; - +bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { + assert(g_ptr); + NGHolder &g = *g_ptr; + // remove reports that aren't on vertices connected to accept. - clearReports(g); + clearReports(g); - som_type som = expr.som; - if (som && isVacuous(g)) { - throw CompileError(expr.index, "Start of match is not " + som_type som = expr.som; + if (som && isVacuous(g)) { + throw CompileError(expr.index, "Start of match is not " "currently supported for patterns which match an " "empty buffer."); } - dumpDotWrapper(g, expr, "01_initial", cc.grey); - assert(allMatchStatesHaveReports(g)); + dumpDotWrapper(g, expr, "01_initial", cc.grey); + assert(allMatchStatesHaveReports(g)); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, g, expr); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - bool hamming = expr.hamm_distance > 0; - u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; - - DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false"); - - // validate graph's suitability for fuzzing before resolving asserts - validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); - - resolveAsserts(rm, g, expr); - dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); - assert(allMatchStatesHaveReports(g)); - - make_fuzzy(g, e_dist, hamming, cc.grey); - dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); - - pruneUseless(g); - pruneEmptyVertices(g); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - optimiseVirtualStarts(g); /* good for som */ - - propagateExtendedParams(g, expr, rm); - reduceExtendedParams(g, rm, som); - - // We may have removed all the edges to accept, in which case this - // expression cannot match. - if (can_never_match(g)) { - throw CompileError(expr.index, "Extended parameter constraints can not " - "be satisfied for any match from this " - "expression."); - } - - if (any_of_in(all_reports(g), [&](ReportID id) { - return rm.getReport(id).minLength; - })) { - // We have at least one report with a minimum length constraint, which - // we currently use SOM to satisfy. + ensureCodePointStart(rm, g, expr); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + bool hamming = expr.hamm_distance > 0; + u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; + + DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false"); + + // validate graph's suitability for fuzzing before resolving asserts + validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); + + resolveAsserts(rm, g, expr); + dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); + assert(allMatchStatesHaveReports(g)); + + make_fuzzy(g, e_dist, hamming, cc.grey); + dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + optimiseVirtualStarts(g); /* good for som */ + + propagateExtendedParams(g, expr, rm); + reduceExtendedParams(g, rm, som); + + // We may have removed all the edges to accept, in which case this + // expression cannot match. + if (can_never_match(g)) { + throw CompileError(expr.index, "Extended parameter constraints can not " + "be satisfied for any match from this " + "expression."); + } + + if (any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength; + })) { + // We have at least one report with a minimum length constraint, which + // we currently use SOM to satisfy. som = SOM_LEFT; ssm.somPrecision(8); } @@ -398,104 +398,104 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { // first, we can perform graph work that can be done on an individual // expression basis. - if (expr.utf8) { - relaxForbiddenUtf8(g, expr); + if (expr.utf8) { + relaxForbiddenUtf8(g, expr); } - if (all_of_in(all_reports(g), [&](ReportID id) { - const auto &report = rm.getReport(id); - return report.ekey != INVALID_EKEY && !report.minLength && - !report.minOffset; - })) { + if (all_of_in(all_reports(g), [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.ekey != INVALID_EKEY && !report.minLength && + !report.minOffset; + })) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. - // TODO: shift the report checking down into pruneHighlanderAccepts() - // to allow us to handle the parts we can in mixed cases. - pruneHighlanderAccepts(g, rm); + // TODO: shift the report checking down into pruneHighlanderAccepts() + // to allow us to handle the parts we can in mixed cases. + pruneHighlanderAccepts(g, rm); } - dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); // If we're a vacuous pattern, we can handle this early. - if (splitOffVacuous(boundary, rm, g, expr)) { + if (splitOffVacuous(boundary, rm, g, expr)) { DEBUG_PRINTF("split off vacuous\n"); } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(g) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); return true; } // Now that vacuous edges have been removed, update the min width exclusive // of boundary reports. - minWidth = min(minWidth, findMinWidth(g)); + minWidth = min(minWidth, findMinWidth(g)); // Add the pattern to the small write builder. - smwr->add(g, expr); + smwr->add(g, expr); if (!som) { - removeSiblingsOfStartDotStar(g); + removeSiblingsOfStartDotStar(g); } - dumpDotWrapper(g, expr, "03_early", cc.grey); - - // Perform a reduction pass to merge sibling character classes together. - if (cc.grey.performGraphSimplification) { - removeRedundancy(g, som); - prunePathsRedundantWithSuccessorOfCyclics(g, som); - } - - dumpDotWrapper(g, expr, "04_reduced", cc.grey); + dumpDotWrapper(g, expr, "03_early", cc.grey); + // Perform a reduction pass to merge sibling character classes together. + if (cc.grey.performGraphSimplification) { + removeRedundancy(g, som); + prunePathsRedundantWithSuccessorOfCyclics(g, som); + } + + dumpDotWrapper(g, expr, "04_reduced", cc.grey); + // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { - if (splitOffLiterals(*this, g)) { + if (splitOffLiterals(*this, g)) { DEBUG_PRINTF("some vertices claimed by literals\n"); } } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(g) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed before calc components\n"); return true; } - // Split the graph into a set of connected components and process those. - // Note: this invalidates g_ptr. + // Split the graph into a set of connected components and process those. + // Note: this invalidates g_ptr. - auto g_comp = calcComponents(std::move(g_ptr), cc.grey); + auto g_comp = calcComponents(std::move(g_ptr), cc.grey); assert(!g_comp.empty()); if (!som) { - for (auto &gc : g_comp) { - assert(gc); - reformLeadingDots(*gc); + for (auto &gc : g_comp) { + assert(gc); + reformLeadingDots(*gc); } - recalcComponents(g_comp, cc.grey); + recalcComponents(g_comp, cc.grey); } - if (processComponents(*this, expr, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } // If we're in prefiltering mode, we can run the prefilter reductions and // have another shot at accepting the graph. - if (cc.grey.prefilterReductions && expr.prefilter) { - for (auto &gc : g_comp) { - if (!gc) { + if (cc.grey.prefilterReductions && expr.prefilter) { + for (auto &gc : g_comp) { + if (!gc) { continue; } - prefilterReductions(*gc, cc); + prefilterReductions(*gc, cc); } - if (processComponents(*this, expr, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } } @@ -505,7 +505,7 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { if (g_comp[i]) { DEBUG_PRINTF("could not compile component %u with %zu vertices\n", i, num_vertices(*g_comp[i])); - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -514,60 +514,60 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { } /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ -bool NG::addHolder(NGHolder &g) { - DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); - assert(allMatchStatesHaveReports(g)); - assert(hasCorrectlyNumberedVertices(g)); +bool NG::addHolder(NGHolder &g) { + DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); /* We don't update the global minWidth here as we care about the min width * of the whole pattern - not a just a prefix of it. */ bool prefilter = false; - //dumpDotComp(comp, g, *this, 20, "prefix_init"); + //dumpDotComp(comp, g, *this, 20, "prefix_init"); som_type som = SOM_NONE; /* the prefixes created by the SOM code do not themselves track som */ bool utf8 = false; // handling done earlier - reduceGraph(g, som, utf8, cc); + reduceGraph(g, som, utf8, cc); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(g, som); + removeRegionRedundancy(g, som); } // "Short Exhaustible Passthrough" patterns always become outfixes. - if (isSEP(g, rm, cc.grey)) { + if (isSEP(g, rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); - if (rose->addOutfix(g)) { + if (rose->addOutfix(g)) { return true; } } - if (splitOffAnchoredAcyclic(*rose, g, cc)) { + if (splitOffAnchoredAcyclic(*rose, g, cc)) { return true; } - if (handleSmallLiteralSets(*rose, g, cc) - || handleFixedWidth(*rose, g, cc.grey)) { + if (handleSmallLiteralSets(*rose, g, cc) + || handleFixedWidth(*rose, g, cc.grey)) { return true; } - if (handleDecoratedLiterals(*rose, g, cc)) { + if (handleDecoratedLiterals(*rose, g, cc)) { return true; } - if (doViolet(*rose, g, prefilter, false, rm, cc)) { + if (doViolet(*rose, g, prefilter, false, rm, cc)) { return true; } - if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { + if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { return true; } - if (doViolet(*rose, g, prefilter, true, rm, cc)) { + if (doViolet(*rose, g, prefilter, true, rm, cc)) { return true; } DEBUG_PRINTF("trying for outfix\n"); - if (rose->addOutfix(g)) { + if (rose->addOutfix(g)) { DEBUG_PRINTF("ok\n"); return true; } @@ -617,8 +617,8 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, minWidth = min(minWidth, depth(literal.length())); - /* inform small write handler about this literal */ - smwr->add(literal, id); + /* inform small write handler about this literal */ + smwr->add(literal, id); return true; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.h b/contrib/libs/hyperscan/src/nfagraph/ng.h index a5a9077d4f..d7f46bddcf 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng.h @@ -27,7 +27,7 @@ */ /** \file - * \brief NG declaration. + * \brief NG declaration. */ #ifndef NG_H @@ -42,7 +42,7 @@ #include "util/compile_context.h" #include "util/depth.h" #include "util/graph.h" -#include "util/noncopyable.h" +#include "util/noncopyable.h" #include "util/report_manager.h" #include <deque> @@ -56,26 +56,26 @@ namespace ue2 { struct CompileContext; struct ue2_literal; -class ExpressionInfo; +class ExpressionInfo; class RoseBuild; class SmallWriteBuild; -class NG : noncopyable { +class NG : noncopyable { public: - NG(const CompileContext &in_cc, size_t num_patterns, - unsigned in_somPrecision); + NG(const CompileContext &in_cc, size_t num_patterns, + unsigned in_somPrecision); ~NG(); /** \brief Consumes a pattern, returns false or throws a CompileError * exception if the graph cannot be consumed. */ - bool addGraph(ExpressionInfo &expr, std::unique_ptr<NGHolder> g_ptr); + bool addGraph(ExpressionInfo &expr, std::unique_ptr<NGHolder> g_ptr); /** \brief Consumes a graph, cut-down version of addGraph for use by SOM * processing. */ bool addHolder(NGHolder &h); - /** \brief Adds a literal to Rose, used by literal shortcut passes (instead - * of using \ref addGraph) */ + /** \brief Adds a literal to Rose, used by literal shortcut passes (instead + * of using \ref addGraph) */ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool highlander, som_type som, bool quiet); @@ -94,7 +94,7 @@ public: BoundaryReports boundary; const CompileContext cc; - const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder. + const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder. const std::unique_ptr<RoseBuild> rose; //!< Rose builder. }; @@ -102,8 +102,8 @@ public: * * Shared with the small write compiler. */ -void reduceGraph(NGHolder &g, som_type som, bool utf8, - const CompileContext &cc); +void reduceGraph(NGHolder &g, som_type som, bool utf8, + const CompileContext &cc); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp index 9a13376d19..11d6861d69 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -119,7 +119,7 @@ NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts, } if (dotq.empty()) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } const DotInfo &dot = dotq.top(); @@ -165,10 +165,10 @@ void reformAnchoredRepeatsComponent(NGHolder &g, return; } - NFAVertex dotV = NGHolder::null_vertex(); + NFAVertex dotV = NGHolder::null_vertex(); set<NFAVertex> otherV; dotV = findReformable(g, compAnchoredStarts, otherV); - if (dotV == NGHolder::null_vertex()) { + if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; } @@ -202,13 +202,13 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } if (!isStartNode(dotV, g.start, g, true)) { - DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); return; } /* get bounds */ depth min; - depth max(1); + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -229,9 +229,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } } - min = depth(0); + min = depth(0); } else { - min = depth(1); + min = depth(1); } *startBegin = min; @@ -249,7 +249,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, remove_edge(g.start, v, g); } - DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); clear_vertex(dotV, g); dead.insert(dotV); compAnchoredStarts.erase(dotV); @@ -268,10 +268,10 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } while (true) { - NFAVertex dotV = NGHolder::null_vertex(); + NFAVertex dotV = NGHolder::null_vertex(); set<NFAVertex> otherV; dotV = findReformable(g, compUnanchoredStarts, otherV); - if (dotV == NGHolder::null_vertex()) { + if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; } @@ -313,21 +313,21 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } // A self-loop indicates that this is a '.+' or '.*' - DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index); + DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index); *startEnd = depth::infinity(); remove_edge(dotV, dotV, g); return; } if (!isStartNode(dotV, g.startDs, g, true)) { - DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", - g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", + g[dotV].index); return; } /* get bounds */ - depth min(1); - depth max(1); + depth min(1); + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -349,7 +349,7 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, DEBUG_PRINTF("min greater than one, skipping\n"); return; } - min = depth(0); + min = depth(0); } *startBegin += min; @@ -363,14 +363,14 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, compUnanchoredStarts.clear(); for (auto t : adjacent_vertices_range(dotV, g)) { if (t != dotV) { - DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index); + DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index); add_edge(g.startDs, t, g); add_edge(g.start, t, g); compUnanchoredStarts.insert(t); } } - DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); dead.insert(dotV); clear_vertex(dotV, g); compUnanchoredStarts.erase(dotV); @@ -417,7 +417,7 @@ bool gatherParticipants(const NGHolder &g, if (isOptionalDot(t, v, g)) { // another dot; bail if we've seen it once already if (dots.find(t) != dots.end()) { - DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index); + DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index); return false; } dots.insert(t); @@ -433,7 +433,7 @@ bool gatherParticipants(const NGHolder &g, for (auto w : adjacent_vertices_range(v, g)) { succ.insert(w); if (!edge(start, w, g).second) { - DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n", + DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n", g[w].index); return false; } @@ -465,7 +465,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, // The first of our optional dots must be connected to start. The jump edge // past it will be verified in gatherParticipants(). If start is // graph.start, it should not be connected to startDs. - NFAVertex initialDot = NGHolder::null_vertex(); + NFAVertex initialDot = NGHolder::null_vertex(); for (auto v : adjacent_vertices_range(start, g)) { if (is_special(v, g)) { continue; @@ -475,7 +475,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, return; } initialDot = v; - DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index); + DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index); } } @@ -502,14 +502,14 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, startEnd->str().c_str()); if (start == g.start && startEnd->is_infinite()) { - *startEnd = depth(dots.size()); + *startEnd = depth(dots.size()); } else if (startEnd->is_finite()) { *startEnd += dots.size(); } assert(startEnd->is_reachable()); // Connect our successor vertices to both start and startDs. - for (auto v : succ) { + for (auto v : succ) { add_edge_if_not_present(g.start, v, g); add_edge_if_not_present(g.startDs, v, g); } @@ -634,8 +634,8 @@ void restoreLeadingDots(NGHolder &g, const depth &startBegin, } addDotsBetween(g, root, rhs, startBegin, startEnd); - renumber_vertices(g); - renumber_edges(g); + renumber_vertices(g); + renumber_edges(g); } // Entry point. diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp index 8812afadb7..e4603514a0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,7 +47,7 @@ #include "ng_prune.h" #include "ng_redundancy.h" #include "ng_util.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "parser/position.h" // for POS flags #include "util/bitutils.h" // for findAndClearLSB_32 #include "util/boundary_reports.h" @@ -102,7 +102,7 @@ vector<NFAEdge> getAsserts(const NGHolder &g) { static void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) { - DEBUG_PRINTF("%zu needs splitting\n", g[v].index); + DEBUG_PRINTF("%zu needs splitting\n", g[v].index); to_split->emplace(g[v].index, v); } @@ -185,49 +185,49 @@ void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts, } static -void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, - NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // If there's a report set already, we're replacing it. g[v].reports.clear(); - Report ir = rm.getBasicInternalReport(expr, adj); + Report ir = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(ir)); - DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); + DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, - NFAVertex v, const CharReach &cr_mask) { +NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, const CharReach &cr_mask) { NFAVertex clone = clone_vertex(g, v); g[clone].char_reach &= cr_mask; clone_out_edges(g, v, clone); clone_in_edges(g, v, clone); if (v == g.startDs) { - if (expr.utf8) { + if (expr.utf8) { g[clone].char_reach &= ~UTF_START_CR; } DEBUG_PRINTF("marked as virt\n"); g[clone].assert_flags = POS_FLAG_VIRTUAL_START; - setReportId(rm, g, expr, clone, 0); + setReportId(rm, g, expr, clone, 0); } return clone; } static -void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, - NFAVertex v, bool ucp) { +void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); - DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp); + DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp); CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; @@ -235,14 +235,14 @@ void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; // Split v into word/nonword vertices with only asserting out-edges. - NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); - NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); + NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); remove_out_edge_if(w_out, has_no_assert, g); remove_out_edge_if(nw_out, has_no_assert, g); // Split v into word/nonword vertices with only asserting in-edges. - NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); - NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); + NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); remove_in_edge_if(w_in, has_no_assert, g); remove_in_edge_if(nw_in, has_no_assert, g); @@ -253,8 +253,8 @@ void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } static -void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, - set<NFAEdge> *dead) { +void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + set<NFAEdge> *dead) { for (const auto &e : edges_range(g)) { u32 flags = g[e].assert_flags; if (!flags) { @@ -271,8 +271,8 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, bool impassable = true; bool ucp = flags & UCP_ASSERT_FLAGS; - DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n", - g[u].index, g[v].index, flags, (int)ucp); + DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n", + g[u].index, g[v].index, flags, (int)ucp); while (flags && impassable) { u32 flag = 1U << findAndClearLSB_32(&flags); switch (flag) { @@ -367,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, expr, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -376,19 +376,19 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, expr, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - /* there may already be a different edge from start to eod if so - * we need to make it unconditional and alive - */ - if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - } else { - add_edge(u, g.acceptEod, g[e], g); + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -420,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, expr, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -429,19 +429,19 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, expr, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - /* there may already be a different edge from start to eod if so - * we need to make it unconditional and alive - */ - if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - } else { - add_edge(u, g.acceptEod, g[e], g); + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -454,8 +454,8 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, } } -void resolveAsserts(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr) { +void resolveAsserts(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector<NFAEdge> asserts = getAsserts(g); if (asserts.empty()) { return; @@ -465,41 +465,41 @@ void resolveAsserts(ReportManager &rm, NGHolder &g, map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ findSplitters(g, asserts, &to_split, &to_split_ucp); if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } for (const auto &m : to_split) { assert(!contains(to_split_ucp, m.first)); - splitVertex(rm, g, expr, m.second, false); + splitVertex(rm, g, expr, m.second, false); } for (const auto &m : to_split_ucp) { - splitVertex(rm, g, expr, m.second, true); + splitVertex(rm, g, expr, m.second, true); } set<NFAEdge> dead; - resolveEdges(rm, g, expr, &dead); + resolveEdges(rm, g, expr, &dead); remove_edges(dead, g); - renumber_vertices(g); + renumber_vertices(g); pruneUseless(g); pruneEmptyVertices(g); - renumber_vertices(g); - renumber_edges(g); + renumber_vertices(g); + renumber_edges(g); clearReports(g); } -void ensureCodePointStart(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr) { +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { /* In utf8 mode there is an implicit assertion that we start at codepoint * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ - NFAEdge orig = edge(g.startDs, g.accept, g); - if (expr.utf8 && orig) { - DEBUG_PRINTF("rectifying %u\n", expr.report); - Report ir = rm.getBasicInternalReport(expr); + NFAEdge orig = edge(g.startDs, g.accept, g); + if (expr.utf8 && orig) { + DEBUG_PRINTF("rectifying %u\n", expr.report); + Report ir = rm.getBasicInternalReport(expr); ReportID rep = rm.getInternalId(ir); NFAVertex v_a = add_vertex(g); @@ -550,8 +550,8 @@ void ensureCodePointStart(ReportManager &rm, NGHolder &g, add_edge(g.start, v_4, g); add_edge(g.startDs, v_4, g); remove_edge(orig, g); - renumber_edges(g); - clearReports(g); + renumber_edges(g); + clearReports(g); } } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h index 2534f57147..04d619752d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,14 +36,14 @@ namespace ue2 { struct BoundaryReports; -class ExpressionInfo; -class NGHolder; +class ExpressionInfo; +class NGHolder; class ReportManager; -void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); +void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); -void ensureCodePointStart(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr); +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp index 60f667f491..a740eab65e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,11 +28,11 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGHolder from a parsed expression. + * NGHolder from a parsed expression. */ - -#include "ng_builder.h" - + +#include "ng_builder.h" + #include "grey.h" #include "ng.h" #include "ng_util.h" @@ -81,7 +81,7 @@ public: void cloneRegion(Position first, Position last, unsigned posOffset) override; - BuiltExpression getGraph() override; + BuiltExpression getGraph() override; private: /** fetch a vertex given its Position ID. */ @@ -96,12 +96,12 @@ private: /** \brief Greybox: used for resource limits. */ const Grey &grey; - /** \brief Underlying graph. */ - unique_ptr<NGHolder> graph; - - /** \brief Underlying expression info. */ - ExpressionInfo expr; + /** \brief Underlying graph. */ + unique_ptr<NGHolder> graph; + /** \brief Underlying expression info. */ + ExpressionInfo expr; + /** \brief mapping from position to vertex. Use \ref getVertex for access. * */ vector<NFAVertex> id2vertex; @@ -113,9 +113,9 @@ private: } // namespace NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, - const ParsedExpression &parsed) - : rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()), - expr(parsed.expr), vertIdx(N_SPECIALS) { + const ParsedExpression &parsed) + : rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()), + expr(parsed.expr), vertIdx(N_SPECIALS) { // Reserve space for a reasonably-sized NFA id2vertex.reserve(64); @@ -133,8 +133,8 @@ NFABuilderImpl::~NFABuilderImpl() { NFAVertex NFABuilderImpl::getVertex(Position pos) const { assert(id2vertex.size() >= pos); const NFAVertex v = id2vertex[pos]; - assert(v != NGHolder::null_vertex()); - assert((*graph)[v].index == pos); + assert(v != NGHolder::null_vertex()); + assert((*graph)[v].index == pos); return v; } @@ -149,10 +149,10 @@ void NFABuilderImpl::addVertex(Position pos) { id2vertex.resize(pos + 1); } id2vertex[pos] = v; - (*graph)[v].index = pos; + (*graph)[v].index = pos; } -BuiltExpression NFABuilderImpl::getGraph() { +BuiltExpression NFABuilderImpl::getGraph() { DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", num_vertices(*graph), num_edges(*graph)); @@ -163,13 +163,13 @@ BuiltExpression NFABuilderImpl::getGraph() { throw CompileError("Pattern too large."); } - return { expr, move(graph) }; + return { expr, move(graph) }; } void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { - Report ir = rm.getBasicInternalReport(expr, offsetAdjust); + Report ir = rm.getBasicInternalReport(expr, offsetAdjust); DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", - pos, expr.report, offsetAdjust, ir.ekey); + pos, expr.report, offsetAdjust, ir.ekey); NFAVertex v = getVertex(pos); auto &reports = (*graph)[v].reports; @@ -179,24 +179,24 @@ void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) { NFAVertex v = getVertex(pos); - (*graph)[v].char_reach |= cr; + (*graph)[v].char_reach |= cr; } void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) { NFAVertex v = getVertex(pos); - (*graph)[v].assert_flags |= flag; + (*graph)[v].assert_flags |= flag; } u32 NFABuilderImpl::getAssertFlag(Position pos) { NFAVertex v = getVertex(pos); - return (*graph)[v].assert_flags; + return (*graph)[v].assert_flags; } pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { // assert that the edge doesn't already exist - assert(edge(u, v, *graph).second == false); + assert(edge(u, v, *graph).second == false); - return add_edge(u, v, *graph); + return add_edge(u, v, *graph); } void NFABuilderImpl::addEdge(Position startPos, Position endPos) { @@ -209,16 +209,16 @@ void NFABuilderImpl::addEdge(Position startPos, Position endPos) { if ((u == graph->start || u == graph->startDs) && v == graph->startDs) { /* standard special -> special edges already exist */ - assert(edge(u, v, *graph).second == true); + assert(edge(u, v, *graph).second == true); return; } - assert(edge(u, v, *graph).second == false); + assert(edge(u, v, *graph).second == false); addEdge(u, v); } bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const { - return edge(getVertex(startPos), getVertex(endPos), *graph).second; + return edge(getVertex(startPos), getVertex(endPos), *graph).second; } Position NFABuilderImpl::getStart() const { @@ -252,7 +252,7 @@ Position NFABuilderImpl::makePositions(size_t nPositions) { } void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) { - NGHolder &g = *graph; + NGHolder &g = *graph; assert(posOffset > 0); // walk the nodes between first and last and copy their vertex properties diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h index 9f71b62235..6ae1eea15f 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGHolder from a parsed expression. + * NGHolder from a parsed expression. */ #ifndef NG_BUILDER_H @@ -37,7 +37,7 @@ #include "ue2common.h" #include "parser/position.h" -#include "util/noncopyable.h" +#include "util/noncopyable.h" #include <memory> @@ -45,14 +45,14 @@ namespace ue2 { class CharReach; class ReportManager; -struct BuiltExpression; +struct BuiltExpression; struct CompileContext; class ParsedExpression; /** \brief Abstract builder interface. Use \ref makeNFABuilder to construct * one. Used by GlushkovBuildState. */ -class NFABuilder : noncopyable { +class NFABuilder : noncopyable { public: virtual ~NFABuilder(); @@ -83,10 +83,10 @@ public: unsigned posOffset) = 0; /** - * \brief Returns the built NGHolder graph and ExpressionInfo. + * \brief Returns the built NGHolder graph and ExpressionInfo. * Note that this builder cannot be used after this call. */ - virtual BuiltExpression getGraph() = 0; + virtual BuiltExpression getGraph() = 0; }; /** Construct a usable NFABuilder. */ diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp index 3e9454eeed..daa78e1052 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp @@ -54,7 +54,7 @@ #include "ng_holder.h" #include "ng_prune.h" #include "ng_util.h" -#include "grey.h" +#include "grey.h" #include "ue2common.h" #include "util/graph_range.h" #include "util/graph_undirected.h" @@ -64,7 +64,7 @@ #include <vector> #include <boost/graph/connected_components.hpp> -#include <boost/graph/filtered_graph.hpp> +#include <boost/graph/filtered_graph.hpp> using namespace std; @@ -164,7 +164,7 @@ flat_set<NFAVertex> findHeadShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %zu\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -186,7 +186,7 @@ flat_set<NFAVertex> findTailShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %zu\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -211,8 +211,8 @@ vector<NFAEdge> findShellEdges(const NGHolder &g, if ((is_special(u, g) || contains(head_shell, u)) && (is_special(v, g) || contains(tail_shell, v))) { - DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index, - g[v].index); + DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index, + g[v].index); shell_edges.push_back(e); } } @@ -220,50 +220,50 @@ vector<NFAEdge> findShellEdges(const NGHolder &g, return shell_edges; } -template<typename GetAdjRange> -bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell, - GetAdjRange adj_range_func) { - if (shell.empty()) { - DEBUG_PRINTF("no shell\n"); - return false; +template<typename GetAdjRange> +bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell, + GetAdjRange adj_range_func) { + if (shell.empty()) { + DEBUG_PRINTF("no shell\n"); + return false; } - - NFAVertex exit_vertex = NGHolder::null_vertex(); - for (auto u : shell) { - for (auto v : adj_range_func(u, g)) { - if (contains(shell, v)) { - continue; - } - if (!exit_vertex) { - exit_vertex = v; - continue; - } - if (exit_vertex == v) { - continue; - } - return false; - } - } - - return true; + + NFAVertex exit_vertex = NGHolder::null_vertex(); + for (auto u : shell) { + for (auto v : adj_range_func(u, g)) { + if (contains(shell, v)) { + continue; + } + if (!exit_vertex) { + exit_vertex = v; + continue; + } + if (exit_vertex == v) { + continue; + } + return false; + } + } + + return true; } -/** - * True if all edges out of vertices in the head shell lead to at most a single - * outside vertex, or the inverse for the tail shell. - */ +/** + * True if all edges out of vertices in the head shell lead to at most a single + * outside vertex, or the inverse for the tail shell. + */ static -bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell, - const flat_set<NFAVertex> &tail_shell) { - if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) { - DEBUG_PRINTF("head shell has only one path through it\n"); - return true; +bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell, + const flat_set<NFAVertex> &tail_shell) { + if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) { + DEBUG_PRINTF("head shell has only one path through it\n"); + return true; } - if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) { - DEBUG_PRINTF("tail shell has only one path into it\n"); - return true; - } - return false; + if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) { + DEBUG_PRINTF("tail shell has only one path into it\n"); + return true; + } + return false; } /** @@ -271,44 +271,44 @@ bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell, * one or more connected components, adding them to the comps deque. */ static -void splitIntoComponents(unique_ptr<NGHolder> g, - deque<unique_ptr<NGHolder>> &comps, +void splitIntoComponents(unique_ptr<NGHolder> g, + deque<unique_ptr<NGHolder>> &comps, const depth &max_head_depth, const depth &max_tail_depth, bool *shell_comp) { - DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g)); + DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g)); assert(shell_comp); *shell_comp = false; // Compute "shell" head and tail subgraphs. - auto depths = calcBidiDepths(*g); - auto head_shell = findHeadShell(*g, depths, max_head_depth); - auto tail_shell = findTailShell(*g, depths, max_tail_depth); + auto depths = calcBidiDepths(*g); + auto head_shell = findHeadShell(*g, depths, max_head_depth); + auto tail_shell = findTailShell(*g, depths, max_tail_depth); for (auto v : head_shell) { tail_shell.erase(v); } - if (head_shell.size() + tail_shell.size() + N_SPECIALS >= - num_vertices(*g)) { + if (head_shell.size() + tail_shell.size() + N_SPECIALS >= + num_vertices(*g)) { DEBUG_PRINTF("all in shell component\n"); - comps.push_back(std::move(g)); + comps.push_back(std::move(g)); *shell_comp = true; return; } - // Find edges connecting the head and tail shells directly. - vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell); + // Find edges connecting the head and tail shells directly. + vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell); DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); - // If there are no shell edges and only one path out of the head shell or - // into the tail shell, we aren't going to find more than one component. - if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) { - DEBUG_PRINTF("single component\n"); - comps.push_back(std::move(g)); - return; - } + // If there are no shell edges and only one path out of the head shell or + // into the tail shell, we aren't going to find more than one component. + if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) { + DEBUG_PRINTF("single component\n"); + comps.push_back(std::move(g)); + return; + } auto ug = make_undirected_graph(*g); @@ -318,18 +318,18 @@ void splitIntoComponents(unique_ptr<NGHolder> g, bad_vertices.insert(head_shell.begin(), head_shell.end()); bad_vertices.insert(tail_shell.begin(), tail_shell.end()); - auto filtered_ug = boost::make_filtered_graph( + auto filtered_ug = boost::make_filtered_graph( ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices)); - // Actually run the connected components algorithm. + // Actually run the connected components algorithm. map<NFAVertex, u32> split_components; const u32 num = connected_components( - filtered_ug, boost::make_assoc_property_map(split_components)); + filtered_ug, boost::make_assoc_property_map(split_components)); assert(num > 0); if (num == 1 && shell_edges.empty()) { DEBUG_PRINTF("single component\n"); - comps.push_back(std::move(g)); + comps.push_back(std::move(g)); return; } @@ -342,27 +342,27 @@ void splitIntoComponents(unique_ptr<NGHolder> g, NFAVertex v = m.first; u32 c = m.second; verts[c].push_back(v); - DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); + DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); } - unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder + unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder for (auto &vv : verts) { // Shells are in every component. vv.insert(vv.end(), begin(head_shell), end(head_shell)); vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - /* Sort for determinism. Still required as NFAUndirectedVertex have - * no deterministic ordering (split_components map). */ - sort(begin(vv), end(vv)); + /* Sort for determinism. Still required as NFAUndirectedVertex have + * no deterministic ordering (split_components map). */ + sort(begin(vv), end(vv)); auto gc = ue2::make_unique<NGHolder>(); v_map.clear(); - fillHolder(gc.get(), *g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); // Remove shell edges, which will get their own component. for (const auto &e : shell_edges) { - auto cu = v_map.at(source(e, *g)); - auto cv = v_map.at(target(e, *g)); + auto cu = v_map.at(source(e, *g)); + auto cv = v_map.at(target(e, *g)); assert(edge(cu, cv, *gc).second); remove_edge(cu, cv, *gc); } @@ -381,7 +381,7 @@ void splitIntoComponents(unique_ptr<NGHolder> g, auto gc = ue2::make_unique<NGHolder>(); v_map.clear(); - fillHolder(gc.get(), *g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); pruneUseless(*gc); DEBUG_PRINTF("shell edge component %zu has %zu vertices\n", @@ -390,12 +390,12 @@ void splitIntoComponents(unique_ptr<NGHolder> g, *shell_comp = true; } - // Ensure that only vertices with accept edges have reports. - for (auto &gc : comps) { - assert(gc); - clearReports(*gc); - } - + // Ensure that only vertices with accept edges have reports. + for (auto &gc : comps) { + assert(gc); + clearReports(*gc); + } + // We should never produce empty component graphs. assert(all_of(begin(comps), end(comps), [](const unique_ptr<NGHolder> &g_comp) { @@ -403,39 +403,39 @@ void splitIntoComponents(unique_ptr<NGHolder> g, })); } -deque<unique_ptr<NGHolder>> calcComponents(unique_ptr<NGHolder> g, - const Grey &grey) { +deque<unique_ptr<NGHolder>> calcComponents(unique_ptr<NGHolder> g, + const Grey &grey) { deque<unique_ptr<NGHolder>> comps; // For trivial cases, we needn't bother running the full // connected_components algorithm. - if (!grey.calcComponents || isAlternationOfClasses(*g)) { - comps.push_back(std::move(g)); + if (!grey.calcComponents || isAlternationOfClasses(*g)) { + comps.push_back(std::move(g)); return comps; } bool shell_comp = false; - splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH), - depth(MAX_TAIL_SHELL_DEPTH), &shell_comp); + splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH), + depth(MAX_TAIL_SHELL_DEPTH), &shell_comp); if (shell_comp) { DEBUG_PRINTF("re-running on shell comp\n"); assert(!comps.empty()); - auto sc = std::move(comps.back()); + auto sc = std::move(comps.back()); comps.pop_back(); - splitIntoComponents(std::move(sc), comps, depth(0), depth(0), - &shell_comp); + splitIntoComponents(std::move(sc), comps, depth(0), depth(0), + &shell_comp); } DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); return comps; } -void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) { - if (!grey.calcComponents) { - return; - } - +void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) { + if (!grey.calcComponents) { + return; + } + deque<unique_ptr<NGHolder>> out; for (auto &gc : comps) { @@ -444,13 +444,13 @@ void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) { } if (isAlternationOfClasses(*gc)) { - out.push_back(std::move(gc)); + out.push_back(std::move(gc)); continue; } - auto gc_comps = calcComponents(std::move(gc), grey); - out.insert(end(out), std::make_move_iterator(begin(gc_comps)), - std::make_move_iterator(end(gc_comps))); + auto gc_comps = calcComponents(std::move(gc), grey); + out.insert(end(out), std::make_move_iterator(begin(gc_comps)), + std::make_move_iterator(end(gc_comps))); } // Replace comps with our recalculated list. diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h index 1bcdc5f81e..97f0f9b5df 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,15 +39,15 @@ namespace ue2 { class NGHolder; -struct Grey; +struct Grey; bool isAlternationOfClasses(const NGHolder &g); -std::deque<std::unique_ptr<NGHolder>> -calcComponents(std::unique_ptr<NGHolder> g, const Grey &grey); +std::deque<std::unique_ptr<NGHolder>> +calcComponents(std::unique_ptr<NGHolder> g, const Grey &grey); -void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps, - const Grey &grey); +void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps, + const Grey &grey); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp index 0b24bf07a8..928455fbd2 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,11 +62,11 @@ #include "ng_prune.h" #include "ng_util.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" -#include <algorithm> +#include <algorithm> #include <boost/graph/depth_first_search.hpp> #include <boost/graph/reverse_graph.hpp> @@ -101,7 +101,7 @@ class SearchVisitor : public boost::default_dfs_visitor { template<class Vertex, class Graph> void discover_vertex(const Vertex &v, const Graph &g) const { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); if (is_special(v, g)) { DEBUG_PRINTF("start or accept\n"); throw SearchFailed(); @@ -125,17 +125,17 @@ class SearchVisitor : public boost::default_dfs_visitor { } // namespace -template<class Graph, class ColorMap> +template<class Graph, class ColorMap> static bool searchForward(const Graph &g, const CharReach &reach, - ColorMap &colours, + ColorMap &colours, const flat_set<typename Graph::vertex_descriptor> &s, typename Graph::vertex_descriptor w) { - colours.fill(small_color::white); + colours.fill(small_color::white); try { - depth_first_visit(g, w, SearchVisitor(reach), colours, - VertexInSet<typename Graph::vertex_descriptor, Graph>(s)); - } catch (SearchFailed &) { + depth_first_visit(g, w, SearchVisitor(reach), colours, + VertexInSet<typename Graph::vertex_descriptor, Graph>(s)); + } catch (SearchFailed &) { return false; } @@ -143,14 +143,14 @@ bool searchForward(const Graph &g, const CharReach &reach, } static -NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { +NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { return e; } static -NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e, - const reverse_graph<NGHolder, NGHolder &> &g) { - return get(boost::edge_underlying, g, e); +NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e, + const reverse_graph<NGHolder, NGHolder &> &g) { + return get(boost::edge_underlying, g, e); } /* returns true if we did stuff */ @@ -164,9 +164,9 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, typedef typename Graph::vertex_descriptor vertex_descriptor; - // Colour map used for depth_first_visit(). - auto colours = make_small_color_map(g); - + // Colour map used for depth_first_visit(). + auto colours = make_small_color_map(g); + // precalc successors of v. flat_set<vertex_descriptor> succ_v; insert(&succ_v, adjacent_vertices(v, g)); @@ -182,7 +182,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF("- checking u %zu\n", g[u].index); + DEBUG_PRINTF("- checking u %zu\n", g[u].index); // let s be intersection(succ(u), succ(v)) s.clear(); @@ -203,18 +203,18 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF(" - checking w %zu\n", g[w].index); + DEBUG_PRINTF(" - checking w %zu\n", g[w].index); if (!searchForward(g, reach, colours, succ_v, w)) { - continue; + continue; } - - DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index); - /* we are currently iterating over the in-edges of v, so it - would be unwise to remove edges to v. However, */ - assert(w != v); /* as v is in s */ - remove_edge(to_raw(e_u, g), raw); - did_stuff = true; + + DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index); + /* we are currently iterating over the in-edges of v, so it + would be unwise to remove edges to v. However, */ + assert(w != v); /* as v is in s */ + remove_edge(to_raw(e_u, g), raw); + did_stuff = true; } } @@ -231,7 +231,7 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { continue; } - DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index); + DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index); did_stuff |= removeCyclicPathRedundancy(g, v, raw); } @@ -239,10 +239,10 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { } bool removeCyclicPathRedundancy(NGHolder &g) { - assert(hasCorrectlyNumberedVertices(g)); - + assert(hasCorrectlyNumberedVertices(g)); + // Forward pass. - bool f_changed = cyclicPathRedundancyPass(g, g); + bool f_changed = cyclicPathRedundancyPass(g, g); if (f_changed) { DEBUG_PRINTF("edges removed by forward pass\n"); pruneUseless(g); @@ -250,8 +250,8 @@ bool removeCyclicPathRedundancy(NGHolder &g) { // Reverse pass. DEBUG_PRINTF("REVERSE PASS\n"); - typedef reverse_graph<NGHolder, NGHolder &> RevGraph; - RevGraph revg(g); + typedef reverse_graph<NGHolder, NGHolder &> RevGraph; + RevGraph revg(g); bool r_changed = cyclicPathRedundancyPass(revg, g); if (r_changed) { DEBUG_PRINTF("edges removed by reverse pass\n"); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp index 6c90326ce4..1335b6e267 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,34 +26,34 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief NFA graph vertex depth calculations. */ #include "ng_depth.h" #include "ng_util.h" #include "ue2common.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include <deque> #include <vector> -#include <boost/graph/breadth_first_search.hpp> +#include <boost/graph/breadth_first_search.hpp> #include <boost/graph/dag_shortest_paths.hpp> #include <boost/graph/depth_first_search.hpp> #include <boost/graph/filtered_graph.hpp> -#include <boost/graph/property_maps/constant_property_map.hpp> +#include <boost/graph/property_maps/constant_property_map.hpp> #include <boost/graph/reverse_graph.hpp> #include <boost/graph/topological_sort.hpp> -#include <boost/range/adaptor/reversed.hpp> +#include <boost/range/adaptor/reversed.hpp> using namespace std; using boost::filtered_graph; -using boost::make_filtered_graph; +using boost::make_filtered_graph; using boost::make_constant_property; using boost::reverse_graph; -using boost::adaptors::reverse; +using boost::adaptors::reverse; namespace ue2 { @@ -125,37 +125,37 @@ private: } // namespace -template<class Graph> +template<class Graph> static -vector<bool> findLoopReachable(const Graph &g, - const typename Graph::vertex_descriptor src) { - vector<bool> deadNodes(num_vertices(g)); - - using Edge = typename Graph::edge_descriptor; - using Vertex = typename Graph::vertex_descriptor; - using EdgeSet = set<Edge>; - +vector<bool> findLoopReachable(const Graph &g, + const typename Graph::vertex_descriptor src) { + vector<bool> deadNodes(num_vertices(g)); + + using Edge = typename Graph::edge_descriptor; + using Vertex = typename Graph::vertex_descriptor; + using EdgeSet = set<Edge>; + EdgeSet deadEdges; BackEdges<EdgeSet> be(deadEdges); - auto colors = make_small_color_map(g); + auto colors = make_small_color_map(g); - depth_first_search(g, be, colors, src); - auto af = make_bad_edge_filter(&deadEdges); - auto acyclic_g = make_filtered_graph(g, af); + depth_first_search(g, be, colors, src); + auto af = make_bad_edge_filter(&deadEdges); + auto acyclic_g = make_filtered_graph(g, af); - vector<Vertex> topoOrder; /* actually reverse topological order */ + vector<Vertex> topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); - topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors)); + topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors)); for (const auto &e : deadEdges) { - size_t srcIdx = g[source(e, g)].index; + size_t srcIdx = g[source(e, g)].index; if (srcIdx != NODE_START_DOTSTAR) { deadNodes[srcIdx] = true; } } - for (auto v : reverse(topoOrder)) { + for (auto v : reverse(topoOrder)) { for (const auto &e : in_edges_range(v, g)) { if (deadNodes[g[source(e, g)].index]) { deadNodes[g[v].index] = true; @@ -163,19 +163,19 @@ vector<bool> findLoopReachable(const Graph &g, } } } - - return deadNodes; + + return deadNodes; } template <class GraphT> static -void calcDepthFromSource(const GraphT &g, +void calcDepthFromSource(const GraphT &g, typename GraphT::vertex_descriptor srcVertex, - const vector<bool> &deadNodes, vector<int> &dMin, - vector<int> &dMax) { + const vector<bool> &deadNodes, vector<int> &dMin, + vector<int> &dMax) { typedef typename GraphT::edge_descriptor EdgeT; - const size_t numVerts = num_vertices(g); + const size_t numVerts = num_vertices(g); NodeFilter<GraphT> nf(&deadNodes, &g); StartFilter<GraphT> sf(&g); @@ -201,22 +201,22 @@ void calcDepthFromSource(const GraphT &g, using boost::make_iterator_property_map; - auto min_index_map = get(vertex_index, mindist_g); + auto min_index_map = get(vertex_index, mindist_g); breadth_first_search(mindist_g, srcVertex, visitor(make_bfs_visitor(record_distances( - make_iterator_property_map(dMin.begin(), - min_index_map), - boost::on_tree_edge()))) - .color_map(make_small_color_map(mindist_g))); + make_iterator_property_map(dMin.begin(), + min_index_map), + boost::on_tree_edge()))) + .color_map(make_small_color_map(mindist_g))); - auto max_index_map = get(vertex_index, maxdist_g); + auto max_index_map = get(vertex_index, maxdist_g); dag_shortest_paths(maxdist_g, srcVertex, - distance_map(make_iterator_property_map(dMax.begin(), - max_index_map)) - .weight_map(make_constant_property<EdgeT>(-1)) - .color_map(make_small_color_map(maxdist_g))); + distance_map(make_iterator_property_map(dMax.begin(), + max_index_map)) + .weight_map(make_constant_property<EdgeT>(-1)) + .color_map(make_small_color_map(maxdist_g))); for (size_t i = 0; i < numVerts; i++) { if (dMin[i] > DIST_UNREACHABLE) { @@ -261,14 +261,14 @@ DepthMinMax getDepths(u32 idx, const vector<int> &dMin, template<class Graph, class Output> static -void calcAndStoreDepth(const Graph &g, +void calcAndStoreDepth(const Graph &g, const typename Graph::vertex_descriptor src, const vector<bool> &deadNodes, vector<int> &dMin /* util */, vector<int> &dMax /* util */, vector<Output> &depths, DepthMinMax Output::*store) { - calcDepthFromSource(g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); for (auto v : vertices_range(g)) { u32 idx = g[v].index; @@ -278,11 +278,11 @@ void calcAndStoreDepth(const Graph &g, } } -vector<NFAVertexDepth> calcDepths(const NGHolder &g) { +vector<NFAVertexDepth> calcDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - vector<NFAVertexDepth> depths(numVertices); + vector<NFAVertexDepth> depths(numVertices); vector<int> dMin; vector<int> dMax; @@ -290,56 +290,56 @@ vector<NFAVertexDepth> calcDepths(const NGHolder &g) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - auto deadNodes = findLoopReachable(g, g.start); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, - &NFAVertexDepth::fromStart); + calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, - &NFAVertexDepth::fromStartDotStar); - - return depths; + calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStartDotStar); + + return depths; } -vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g) { +vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - vector<NFAVertexRevDepth> depths(numVertices); + vector<NFAVertexRevDepth> depths(numVertices); vector<int> dMin; vector<int> dMax; /* reverse the graph before walking it */ - typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; - const RevNFAGraph rg(g); - - assert(num_vertices(g) == num_vertices(rg)); + typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; + const RevNFAGraph rg(g); + assert(num_vertices(g) == num_vertices(rg)); + /* * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - auto deadNodes = findLoopReachable(rg, g.acceptEod); + auto deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( - rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>( - rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAcceptEod); - - return depths; + + return depths; } -vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) { +vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - vector<NFAVertexBidiDepth> depths(numVertices); + vector<NFAVertexBidiDepth> depths(numVertices); vector<int> dMin; vector<int> dMax; @@ -347,52 +347,52 @@ vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - auto deadNodes = findLoopReachable(g, g.start); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( - g, g.start, deadNodes, dMin, dMax, depths, + calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( + g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( - g, g.startDs, deadNodes, dMin, dMax, depths, + calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>( + g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStartDotStar); /* Now go backwards */ - typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; - const RevNFAGraph rg(g); - deadNodes = findLoopReachable(rg, g.acceptEod); + typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph; + const RevNFAGraph rg(g); + deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( - rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>( - rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAcceptEod); - - return depths; + + return depths; } -vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src) { +vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - auto deadNodes = findLoopReachable(g, g.start); + auto deadNodes = findLoopReachable(g, g.start); vector<int> dMin, dMax; - calcDepthFromSource(g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); - vector<DepthMinMax> depths(numVertices); + vector<DepthMinMax> depths(numVertices); for (auto v : vertices_range(g)) { - auto idx = g[v].index; + auto idx = g[v].index; depths.at(idx) = getDepths(idx, dMin, dMax); } - - return depths; + + return depths; } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h index 36cca87e84..fbec996b89 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,15 +26,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief NFA graph vertex depth calculations. */ -#ifndef NG_DEPTH_H -#define NG_DEPTH_H +#ifndef NG_DEPTH_H +#define NG_DEPTH_H -#include "ue2common.h" +#include "ue2common.h" #include "nfagraph/ng_holder.h" #include "util/depth.h" @@ -63,37 +63,37 @@ struct NFAVertexRevDepth { /** * \brief Encapsulates min/max depths relative to all of our special vertices. */ -struct NFAVertexBidiDepth { - DepthMinMax fromStart; - DepthMinMax fromStartDotStar; - DepthMinMax toAccept; - DepthMinMax toAcceptEod; +struct NFAVertexBidiDepth { + DepthMinMax fromStart; + DepthMinMax fromStartDotStar; + DepthMinMax toAccept; + DepthMinMax toAcceptEod; }; /** - * \brief Calculate depths from start and startDs. Returns them in a vector, - * indexed by vertex index. + * \brief Calculate depths from start and startDs. Returns them in a vector, + * indexed by vertex index. */ -std::vector<NFAVertexDepth> calcDepths(const NGHolder &g); +std::vector<NFAVertexDepth> calcDepths(const NGHolder &g); /** - * \brief Calculate depths to accept and acceptEod. Returns them in a vector, - * indexed by vertex index. + * \brief Calculate depths to accept and acceptEod. Returns them in a vector, + * indexed by vertex index. */ -std::vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g); +std::vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g); /** - * \brief Calculate depths to/from all special vertices. Returns them in a - * vector, indexed by vertex index. + * \brief Calculate depths to/from all special vertices. Returns them in a + * vector, indexed by vertex index. */ -std::vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g); +std::vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g); -/** - * \brief Calculate the (min, max) depths from the given \p src to every vertex - * in the graph and return them in a vector, indexed by \p vertex_index. - */ -std::vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src); +/** + * \brief Calculate the (min, max) depths from the given \p src to every vertex + * in the graph and return them in a vector, indexed by \p vertex_index. + */ +std::vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src); } // namespace ue2 -#endif // NG_DEPTH_H +#endif // NG_DEPTH_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp index d6a064d12f..157784700e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "ng_util.h" #include <boost-patched/graph/dominator_tree.hpp> // locally patched version -#include <boost-patched/graph/reverse_graph.hpp> +#include <boost-patched/graph/reverse_graph.hpp> using namespace std; using boost::make_assoc_property_map; @@ -47,47 +47,47 @@ using boost::make_iterator_property_map; namespace ue2 { template <class Graph> -unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g, - typename Graph::vertex_descriptor source) { - using Vertex = typename Graph::vertex_descriptor; +unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g, + typename Graph::vertex_descriptor source) { + using Vertex = typename Graph::vertex_descriptor; const size_t num_verts = num_vertices(g); auto index_map = get(&NFAGraphVertexProps::index, g); vector<size_t> dfnum(num_verts, 0); - vector<Vertex> parents(num_verts, Graph::null_vertex()); + vector<Vertex> parents(num_verts, Graph::null_vertex()); auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map); auto parent_map = make_iterator_property_map(parents.begin(), index_map); - vector<Vertex> vertices_by_dfnum(num_verts, Graph::null_vertex()); + vector<Vertex> vertices_by_dfnum(num_verts, Graph::null_vertex()); // Output map. - vector<Vertex> doms(num_verts, Graph::null_vertex()); - auto dom_map = make_iterator_property_map(doms.begin(), index_map); + vector<Vertex> doms(num_verts, Graph::null_vertex()); + auto dom_map = make_iterator_property_map(doms.begin(), index_map); boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, parent_map, vertices_by_dfnum, dom_map); - /* Translate back to an NFAVertex map */ - unordered_map<NFAVertex, NFAVertex> doms2; - doms2.reserve(num_verts); - for (auto v : vertices_range(g)) { - auto dom_of_v = doms[g[v].index]; - if (dom_of_v) { - doms2.emplace(v, dom_of_v); - } - } - return doms2; + /* Translate back to an NFAVertex map */ + unordered_map<NFAVertex, NFAVertex> doms2; + doms2.reserve(num_verts); + for (auto v : vertices_range(g)) { + auto dom_of_v = doms[g[v].index]; + if (dom_of_v) { + doms2.emplace(v, dom_of_v); + } + } + return doms2; } -unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) { +unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(g, g.start); + return calcDominators(g, g.start); } -unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) { +unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(boost::reverse_graph<NGHolder, const NGHolder &>(g), + return calcDominators(boost::reverse_graph<NGHolder, const NGHolder &>(g), g.acceptEod); } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h index f505b7e471..7b05574d7d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,13 +37,13 @@ #include "ng_holder.h" -#include <unordered_map> - +#include <unordered_map> + namespace ue2 { -std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g); +std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g); -std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g); +std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h index 3e12d1d22e..a104772983 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,8 +37,8 @@ #include "ng_holder.h" // for graph types #include "ue2common.h" -#include <unordered_map> - +#include <unordered_map> + #ifdef DUMP_SUPPORT #include <fstream> #endif @@ -49,7 +49,7 @@ namespace ue2 { class NGHolder; class NG; -class ExpressionInfo; +class ExpressionInfo; class ReportManager; // Implementations for stubs below -- all have the suffix "Impl". @@ -62,8 +62,8 @@ void dumpGraphImpl(const char *name, const GraphT &g); template <typename GraphT> void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); -void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, - const char *name, const Grey &grey); +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey); void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, const Grey &grey); @@ -76,7 +76,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, // Variant that takes a region map as well. void dumpHolderImpl(const NGHolder &h, - const std::unordered_map<NFAVertex, u32> ®ion_map, + const std::unordered_map<NFAVertex, u32> ®ion_map, unsigned int stageNumber, const char *stageName, const Grey &grey); @@ -90,10 +90,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { // Stubs which call through to dump code if compiled in. UNUSED static inline -void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, - UNUSED const char *name, UNUSED const Grey &grey) { +void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, + UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT - dumpDotWrapperImpl(g, expr, name, grey); + dumpDotWrapperImpl(g, expr, name, grey); #endif } @@ -124,7 +124,7 @@ void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber, UNUSED static inline void dumpHolder(UNUSED const NGHolder &h, - UNUSED const std::unordered_map<NFAVertex, u32> ®ion_map, + UNUSED const std::unordered_map<NFAVertex, u32> ®ion_map, UNUSED unsigned int stageNumber, UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp index b8354bd42a..3a1940d912 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "parser/position.h" #include "util/compile_context.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include <set> @@ -181,28 +181,28 @@ bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { return dead_count; } -static -bool checkReportsRev(const NGHolder &g, NFAVertex v, - const set<NFAVertex> &happy) { - if (g[v].reports.empty()) { - return true; - } - - assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second); - - /* an edge to accept takes priority over eod only accept */ - NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod; - - flat_set<ReportID> happy_reports; - for (NFAVertex u : happy) { - if (edge(u, accept, g).second) { - insert(&happy_reports, g[u].reports); - } - } - - return is_subset_of(g[v].reports, happy_reports); -} - +static +bool checkReportsRev(const NGHolder &g, NFAVertex v, + const set<NFAVertex> &happy) { + if (g[v].reports.empty()) { + return true; + } + + assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second); + + /* an edge to accept takes priority over eod only accept */ + NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod; + + flat_set<ReportID> happy_reports; + for (NFAVertex u : happy) { + if (edge(u, accept, g).second) { + insert(&happy_reports, g[u].reports); + } + } + + return is_subset_of(g[v].reports, happy_reports); +} + /** \brief Redundant self-loop removal (reverse version). * * A self loop on a vertex v can be removed if: @@ -255,8 +255,8 @@ bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { happy.insert(u); } - if (!happy.empty() && checkVerticesRev(g, sad, happy) - && checkReportsRev(g, v, happy)) { + if (!happy.empty() && checkVerticesRev(g, sad, happy) + && checkReportsRev(g, v, happy)) { dead_count++; remove_edge(v, v, g); } @@ -320,8 +320,8 @@ bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, return false; } - DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n", - g[w].index, g[v].index, g[fixed_src].index, g[v].index); + DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n", + g[w].index, g[v].index, g[fixed_src].index, g[v].index); return true; } @@ -437,7 +437,7 @@ bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { pred(g, u, &parents_u); done.clear(); - if (out_degree(u, g) > 1) { + if (out_degree(u, g) > 1) { checkLargeOutU(g, u, parents_u, possible_w, done, &dead); } else { checkSmallOutU(g, u, parents_u, done, &dead); @@ -482,7 +482,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { vector<NFAEdge> dead; for (auto v : adjacent_vertices_range(g.startDs, g)) { - DEBUG_PRINTF("checking %zu\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (is_special(v, g)) { continue; } @@ -492,7 +492,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { if (is_special(u, g)) { continue; } - DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index); + DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index); dead.push_back(e); } } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp index fba8ce7b74..7e2243ee6e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,13 +37,13 @@ #include "ng_holder.h" #include "ng_util.h" #include "util/compile_context.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/unordered.h" +#include "util/make_unique.h" +#include "util/unordered.h" #include <algorithm> -#include <memory> +#include <memory> #include <set> #include <stack> #include <vector> @@ -53,7 +53,7 @@ using namespace std; namespace ue2 { enum EquivalenceType { - LEFT_EQUIVALENCE, + LEFT_EQUIVALENCE, RIGHT_EQUIVALENCE, }; @@ -66,23 +66,23 @@ struct VertexInfoPtrCmp { bool operator()(const VertexInfo *a, const VertexInfo *b) const; }; -using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>; - +using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>; + /** Precalculated (and maintained) information about a vertex. */ class VertexInfo { public: VertexInfo(NFAVertex v_in, const NGHolder &g) - : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), + : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), equivalence_class(~0), vertex_flags(g[v].assert_flags) {} - VertexInfoSet pred; //!< predecessors of this vertex - VertexInfoSet succ; //!< successors of this vertex + VertexInfoSet pred; //!< predecessors of this vertex + VertexInfoSet succ; //!< successors of this vertex NFAVertex v; - size_t vert_index; + size_t vert_index; CharReach cr; CharReach pred_cr; CharReach succ_cr; - flat_set<u32> edge_tops; /**< tops on edge from start */ + flat_set<u32> edge_tops; /**< tops on edge from start */ unsigned equivalence_class; unsigned vertex_flags; }; @@ -106,31 +106,31 @@ public: DepthMinMax d1; DepthMinMax d2; }; - ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in, + ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in, EquivalenceType eq) - : /* reports only matter for right-equiv */ - rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()), - vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), - adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), - /* treat non-special vertices the same */ - node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {} - - bool operator==(const ClassInfo &b) const { - return node_type == b.node_type && depth.d1 == b.depth.d1 && - depth.d2 == b.depth.d2 && cr == b.cr && - adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops && - vertex_flags == b.vertex_flags && rs == b.rs; - } - - size_t hash() const { - return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1, - depth.d2); + : /* reports only matter for right-equiv */ + rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()), + vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), + adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), + /* treat non-special vertices the same */ + node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {} + + bool operator==(const ClassInfo &b) const { + return node_type == b.node_type && depth.d1 == b.depth.d1 && + depth.d2 == b.depth.d2 && cr == b.cr && + adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops && + vertex_flags == b.vertex_flags && rs == b.rs; + } + + size_t hash() const { + return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1, + depth.d2); } private: flat_set<ReportID> rs; /* for right equiv only */ unsigned vertex_flags; - flat_set<u32> edge_tops; + flat_set<u32> edge_tops; CharReach cr; CharReach adjacent_cr; unsigned node_type; @@ -187,7 +187,7 @@ public: return q.capacity(); } private: - unordered_set<unsigned> ids; //!< stores id's, for uniqueness + unordered_set<unsigned> ids; //!< stores id's, for uniqueness vector<unsigned> q; //!< vector of id's that we use as FILO. }; @@ -259,112 +259,112 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { // populate VertexInfo table static -vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) { - const size_t num_verts = num_vertices(g); - - vector<unique_ptr<VertexInfo>> infos; - infos.reserve(num_verts * 2); - +vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) { + const size_t num_verts = num_vertices(g); + + vector<unique_ptr<VertexInfo>> infos; + infos.reserve(num_verts * 2); + vector<VertexInfo *> vertex_map; // indexed by vertex_index property - vertex_map.resize(num_verts); + vertex_map.resize(num_verts); for (auto v : vertices_range(g)) { infos.push_back(std::make_unique<VertexInfo>(v, g)); - vertex_map[g[v].index] = infos.back().get(); - } + vertex_map[g[v].index] = infos.back().get(); + } - // now, go through each vertex and populate its predecessor and successor - // lists - for (auto &vi : infos) { - assert(vi); - NFAVertex v = vi->v; + // now, go through each vertex and populate its predecessor and successor + // lists + for (auto &vi : infos) { + assert(vi); + NFAVertex v = vi->v; // find predecessors - for (const auto &e : in_edges_range(v, g)) { + for (const auto &e : in_edges_range(v, g)) { NFAVertex u = source(e, g); - VertexInfo *u_vi = vertex_map[g[u].index]; + VertexInfo *u_vi = vertex_map[g[u].index]; - vi->pred_cr |= u_vi->cr; - vi->pred.insert(u_vi); + vi->pred_cr |= u_vi->cr; + vi->pred.insert(u_vi); // also set up edge tops if (is_triggered(g) && u == g.start) { - vi->edge_tops = g[e].tops; + vi->edge_tops = g[e].tops; } } // find successors - for (auto w : adjacent_vertices_range(v, g)) { - VertexInfo *w_vi = vertex_map[g[w].index]; - vi->succ_cr |= w_vi->cr; - vi->succ.insert(w_vi); + for (auto w : adjacent_vertices_range(v, g)) { + VertexInfo *w_vi = vertex_map[g[w].index]; + vi->succ_cr |= w_vi->cr; + vi->succ.insert(w_vi); } - assert(!hasEdgeAsserts(vi->v, g)); + assert(!hasEdgeAsserts(vi->v, g)); } - - return infos; + + return infos; } // store equivalence class in VertexInfo for each vertex static -vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos, - WorkQueue &work_queue, const NGHolder &g, - EquivalenceType eq) { - const size_t num_verts = infos.size(); - - vector<VertexInfoSet> classes; - ue2_unordered_map<ClassInfo, unsigned> classinfomap; - - // assume we will have lots of classes, so we don't waste time resizing - // these structures. - classes.reserve(num_verts); - classinfomap.reserve(num_verts); - +vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos, + WorkQueue &work_queue, const NGHolder &g, + EquivalenceType eq) { + const size_t num_verts = infos.size(); + + vector<VertexInfoSet> classes; + ue2_unordered_map<ClassInfo, unsigned> classinfomap; + + // assume we will have lots of classes, so we don't waste time resizing + // these structures. + classes.reserve(num_verts); + classinfomap.reserve(num_verts); + // get distances from start (or accept) for all vertices // only one of them is used at a time, never both vector<NFAVertexDepth> depths; vector<NFAVertexRevDepth> rdepths; if (eq == LEFT_EQUIVALENCE) { - depths = calcDepths(g); + depths = calcDepths(g); } else { - rdepths = calcRevDepths(g); + rdepths = calcRevDepths(g); } // partition the graph based on CharReach - for (auto &vi : infos) { - assert(vi); - + for (auto &vi : infos) { + assert(vi); + ClassInfo::ClassDepth depth; if (eq == LEFT_EQUIVALENCE) { - depth = depths[vi->vert_index]; + depth = depths[vi->vert_index]; } else { - depth = rdepths[vi->vert_index]; + depth = rdepths[vi->vert_index]; } - ClassInfo ci(g, *vi, depth, eq); + ClassInfo ci(g, *vi, depth, eq); auto ii = classinfomap.find(ci); if (ii == classinfomap.end()) { - // vertex is in a new equivalence class by itself. - unsigned eq_class = classes.size(); - vi->equivalence_class = eq_class; - classes.push_back({vi.get()}); - classinfomap.emplace(move(ci), eq_class); + // vertex is in a new equivalence class by itself. + unsigned eq_class = classes.size(); + vi->equivalence_class = eq_class; + classes.push_back({vi.get()}); + classinfomap.emplace(move(ci), eq_class); } else { - // vertex is added to an existing class. + // vertex is added to an existing class. unsigned eq_class = ii->second; - vi->equivalence_class = eq_class; - classes.at(eq_class).insert(vi.get()); + vi->equivalence_class = eq_class; + classes.at(eq_class).insert(vi.get()); // we now know that this particular class has more than one // vertex, so we add it to the work queue work_queue.push(eq_class); } } - - DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size()); - return classes; + + DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size()); + return classes; } // generalized equivalence processing (left and right) @@ -375,7 +375,7 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos, // equivalence, predecessors for right equivalence) classes get revalidated in // case of a split. static -void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue, +void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue, EquivalenceType eq_type) { // now, go through the work queue until it's empty map<flat_set<unsigned>, VertexInfoSet> tentative_classmap; @@ -388,7 +388,7 @@ void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue, unsigned cur_class = work_queue.pop(); // get all vertices in current equivalence class - VertexInfoSet &cur_class_vertices = classes.at(cur_class); + VertexInfoSet &cur_class_vertices = classes.at(cur_class); if (cur_class_vertices.size() < 2) { continue; @@ -432,19 +432,19 @@ void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue, // start from the second class for (++tmi; tmi != tentative_classmap.end(); ++tmi) { const VertexInfoSet &vertices_to_split = tmi->second; - unsigned new_class = classes.size(); - VertexInfoSet new_class_vertices; + unsigned new_class = classes.size(); + VertexInfoSet new_class_vertices; for (VertexInfo *vi : vertices_to_split) { vi->equivalence_class = new_class; - // note: we cannot use the cur_class_vertices ref, as it is - // invalidated by modifications to the classes vector. - classes[cur_class].erase(vi); + // note: we cannot use the cur_class_vertices ref, as it is + // invalidated by modifications to the classes vector. + classes[cur_class].erase(vi); new_class_vertices.insert(vi); } - classes.push_back(move(new_class_vertices)); - - if (contains(tmi->first, cur_class)) { + classes.push_back(move(new_class_vertices)); + + if (contains(tmi->first, cur_class)) { reval_queue.push(new_class); } } @@ -485,9 +485,9 @@ bool require_separate_eod_vertex(const VertexInfoSet &vert_infos, } static -void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, - unsigned eq_class, VertexInfoSet &cur_class_vertices, - set<NFAVertex> *toRemove) { +void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, + unsigned eq_class, VertexInfoSet &cur_class_vertices, + set<NFAVertex> *toRemove) { DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a " "single vertex.\n", cur_class_vertices.size(), eq_class); @@ -517,7 +517,7 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, // store this vertex in our global vertex list infos.push_back(std::make_unique<VertexInfo>(new_v, g)); - VertexInfo *new_vertex_info = infos.back().get(); + VertexInfo *new_vertex_info = infos.back().get(); NFAVertex new_v_eod = NGHolder::null_vertex(); VertexInfo *new_vertex_info_eod = nullptr; @@ -526,10 +526,10 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, new_v_eod = clone_vertex(g, old_v); g[new_v_eod].reports.clear(); infos.push_back(std::make_unique<VertexInfo>(new_v_eod, g)); - new_vertex_info_eod = infos.back().get(); + new_vertex_info_eod = infos.back().get(); } - const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; + const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; for (VertexInfo *old_vertex_info : cur_class_vertices) { assert(old_vertex_info->equivalence_class == eq_class); @@ -548,24 +548,24 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, pred_info->succ.erase(old_vertex_info); // if edge doesn't exist, create it - NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); + NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); - // put edge tops, if applicable - if (!edgetops.empty()) { - assert(g[e].tops.empty() || g[e].tops == edgetops); - g[e].tops = edgetops; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[e].tops = edgetops; } pred_info->succ.insert(new_vertex_info); if (new_v_eod) { NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, - g); + g); - // put edge tops, if applicable - if (!edgetops.empty()) { - assert(g[e].tops.empty() || g[e].tops == edgetops); - g[ee].tops = edgetops; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[ee].tops = edgetops; } pred_info->succ.insert(new_vertex_info_eod); @@ -612,16 +612,16 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g, // vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the // report behaviour with a single vertex). static -bool mergeEquivalentClasses(vector<VertexInfoSet> &classes, - vector<unique_ptr<VertexInfo>> &infos, +bool mergeEquivalentClasses(vector<VertexInfoSet> &classes, + vector<unique_ptr<VertexInfo>> &infos, NGHolder &g) { bool merged = false; set<NFAVertex> toRemove; // go through all classes and merge classes with more than one vertex - for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) { + for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) { // get all vertices in current equivalence class - VertexInfoSet &cur_class_vertices = classes[eq_class]; + VertexInfoSet &cur_class_vertices = classes[eq_class]; // we don't care for single-vertex classes if (cur_class_vertices.size() > 1) { @@ -637,32 +637,32 @@ bool mergeEquivalentClasses(vector<VertexInfoSet> &classes, return merged; } -static -bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { - // create a list of equivalence classes to check - WorkQueue work_queue(num_vertices(g)); - - // get information on every vertex in the graph - // new vertices are allocated here, and stored in infos - auto infos = getVertexInfos(g); - - // partition the graph - auto classes = partitionGraph(infos, work_queue, g, eq_type); - - // do equivalence processing - equivalence(classes, work_queue, eq_type); - - // replace equivalent classes with single vertices - // new vertices are (possibly) allocated here, and stored in infos - return mergeEquivalentClasses(classes, infos, g); -} - +static +bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { + // create a list of equivalence classes to check + WorkQueue work_queue(num_vertices(g)); + + // get information on every vertex in the graph + // new vertices are allocated here, and stored in infos + auto infos = getVertexInfos(g); + + // partition the graph + auto classes = partitionGraph(infos, work_queue, g, eq_type); + + // do equivalence processing + equivalence(classes, work_queue, eq_type); + + // replace equivalent classes with single vertices + // new vertices are (possibly) allocated here, and stored in infos + return mergeEquivalentClasses(classes, infos, g); +} + bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { if (!cc.grey.equivalenceEnable) { DEBUG_PRINTF("equivalence processing disabled in grey box\n"); return false; } - renumber_vertices(g); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can @@ -674,8 +674,8 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { // take note if we have merged any vertices bool merge = false; - merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE); - merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE); + merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE); + merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE); return merge; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp index 9d90489471..3834de5057 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,7 +58,7 @@ namespace ue2 { struct StateInfo { StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} - StateInfo() : vertex(NGHolder::null_vertex()) {} + StateInfo() : vertex(NGHolder::null_vertex()) {} NFAVertex vertex; CharReach reach; }; @@ -193,14 +193,14 @@ public: info(info_in), input_g(input_g_in), states(states_in), succs(vertex_count) {} - void finish_vertex(NFAVertex input_v, - const boost::reverse_graph<NGHolder, const NGHolder &> &) { + void finish_vertex(NFAVertex input_v, + const boost::reverse_graph<NGHolder, const NGHolder &> &) { if (input_v == input_g.accept) { return; } assert(input_v != input_g.acceptEod); - DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index); + DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index); /* finish vertex is called on vertex --> implies that all its parents * (in the forward graph) are also finished. Our parents will have @@ -235,7 +235,7 @@ public: /* we need to push into all our (forward) children their successors * from us. */ for (auto v : adjacent_vertices_range(input_v, input_g)) { - DEBUG_PRINTF("pushing our states to pstate %zu\n", + DEBUG_PRINTF("pushing our states to pstate %zu\n", input_g[v].index); if (v == input_g.startDs) { /* no need for intra start edges */ @@ -288,7 +288,7 @@ flat_set<NFAVertex> execute_graph(const NGHolder &running_g, map<NFAVertex, boost::default_color_type> colours; /* could just a topo order, but really it is time to pull a slightly bigger * gun: DFS */ - boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag); + boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag); map<NFAVertex, dynamic_bitset<> > dfs_states; auto info = makeInfoTable(running_g); @@ -307,7 +307,7 @@ flat_set<NFAVertex> execute_graph(const NGHolder &running_g, #ifdef DEBUG DEBUG_PRINTF(" output rstates:"); for (const auto &v : states) { - printf(" %zu", running_g[v].index); + printf(" %zu", running_g[v].index); } printf("\n"); #endif @@ -323,49 +323,49 @@ flat_set<NFAVertex> execute_graph(const NGHolder &running_g, initial_states); } -static -bool can_die_early(const NGHolder &g, const vector<StateInfo> &info, - const dynamic_bitset<> &s, - map<dynamic_bitset<>, u32> &visited, u32 age_limit) { - if (contains(visited, s) && visited[s] >= age_limit) { - /* we have already (or are in the process) of visiting here with a - * looser limit. */ - return false; - } - visited[s] = age_limit; - - if (s.none()) { - DEBUG_PRINTF("dead\n"); - return true; - } - - if (age_limit == 0) { - return false; - } - - dynamic_bitset<> all_succ(s.size()); - step(g, info, s, &all_succ); - all_succ.reset(NODE_START_DOTSTAR); - - for (u32 i = 0; i < N_CHARS; i++) { - dynamic_bitset<> next = all_succ; - filter_by_reach(info, &next, CharReach(i)); - if (can_die_early(g, info, next, visited, age_limit - 1)) { - return true; - } - } - - return false; -} - -bool can_die_early(const NGHolder &g, u32 age_limit) { - if (proper_out_degree(g.startDs, g)) { - return false; - } - const vector<StateInfo> &info = makeInfoTable(g); - map<dynamic_bitset<>, u32> visited; - return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, - age_limit); -} - +static +bool can_die_early(const NGHolder &g, const vector<StateInfo> &info, + const dynamic_bitset<> &s, + map<dynamic_bitset<>, u32> &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already (or are in the process) of visiting here with a + * looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s.none()) { + DEBUG_PRINTF("dead\n"); + return true; + } + + if (age_limit == 0) { + return false; + } + + dynamic_bitset<> all_succ(s.size()); + step(g, info, s, &all_succ); + all_succ.reset(NODE_START_DOTSTAR); + + for (u32 i = 0; i < N_CHARS; i++) { + dynamic_bitset<> next = all_succ; + filter_by_reach(info, &next, CharReach(i)); + if (can_die_early(g, info, next, visited, age_limit - 1)) { + return true; + } + } + + return false; +} + +bool can_die_early(const NGHolder &g, u32 age_limit) { + if (proper_out_degree(g.startDs, g)) { + return false; + } + const vector<StateInfo> &info = makeInfoTable(g); + map<dynamic_bitset<>, u32> visited; + return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, + age_limit); +} + } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h index 32f5520d33..0dea4768c4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #define NG_EXECUTE_H #include "ng_holder.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <vector> @@ -64,9 +64,9 @@ flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag, const flat_set<NFAVertex> &input_start_states, const flat_set<NFAVertex> &initial); -/* returns true if it is possible for the nfa to die within age_limit bytes */ -bool can_die_early(const NGHolder &g, u32 age_limit); - +/* returns true if it is possible for the nfa to die within age_limit bytes */ +bool can_die_early(const NGHolder &g, u32 age_limit); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp index f8abbd04a2..85f5933d21 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,8 +27,8 @@ */ /** \file - * \brief Code for discovering properties of an NFA graph used by - * hs_expression_info(). + * \brief Code for discovering properties of an NFA graph used by + * hs_expression_info(). */ #include "ng_expr_info.h" @@ -37,14 +37,14 @@ #include "ng_asserts.h" #include "ng_depth.h" #include "ng_edge_redundancy.h" -#include "ng_extparam.h" -#include "ng_fuzzy.h" +#include "ng_extparam.h" +#include "ng_fuzzy.h" #include "ng_holder.h" -#include "ng_prune.h" +#include "ng_prune.h" #include "ng_reports.h" #include "ng_util.h" #include "ue2common.h" -#include "compiler/expression_info.h" +#include "compiler/expression_info.h" #include "parser/position.h" // for POS flags #include "util/boundary_reports.h" #include "util/compile_context.h" @@ -62,76 +62,76 @@ namespace ue2 { /* get rid of leading \b and multiline ^ vertices */ static -void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { +void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { vector<NFAVertex> victims; - for (auto v : adjacent_vertices_range(root, g)) { - if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { + for (auto v : adjacent_vertices_range(root, g)) { + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); victims.push_back(v); } } for (auto u : victims) { - for (auto v : adjacent_vertices_range(u, g)) { - add_edge_if_not_present(root, v, g); + for (auto v : adjacent_vertices_range(u, g)) { + add_edge_if_not_present(root, v, g); } } - remove_vertices(victims, g); + remove_vertices(victims, g); } static -void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, +void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, const vector<DepthMinMax> &depths, DepthMinMax &info) { - if (is_any_accept(v, g)) { + if (is_any_accept(v, g)) { return; } - if (is_any_start(v, g)) { - info.min = depth(0); + if (is_any_start(v, g)) { + info.min = depth(0); info.max = max(info.max, depth(0)); return; } - u32 idx = g[v].index; + u32 idx = g[v].index; assert(idx < depths.size()); const DepthMinMax &d = depths.at(idx); - for (ReportID report_id : g[v].reports) { - const Report &report = rm.getReport(report_id); - assert(report.type == EXTERNAL_CALLBACK); - - DepthMinMax rd = d; - - // Compute graph width to this report, taking any offset adjustment - // into account. - rd.min += report.offsetAdjust; - rd.max += report.offsetAdjust; - - // A min_length param is a lower bound for match width. - if (report.minLength && report.minLength <= depth::max_value()) { - depth min_len((u32)report.minLength); - rd.min = max(rd.min, min_len); - rd.max = max(rd.max, min_len); - } - - // A max_offset param is an upper bound for match width. - if (report.maxOffset && report.maxOffset <= depth::max_value()) { - depth max_offset((u32)report.maxOffset); - rd.min = min(rd.min, max_offset); - rd.max = min(rd.max, max_offset); - } - - DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id, - rd.str().c_str()); - - info = unionDepthMinMax(info, rd); + for (ReportID report_id : g[v].reports) { + const Report &report = rm.getReport(report_id); + assert(report.type == EXTERNAL_CALLBACK); + + DepthMinMax rd = d; + + // Compute graph width to this report, taking any offset adjustment + // into account. + rd.min += report.offsetAdjust; + rd.max += report.offsetAdjust; + + // A min_length param is a lower bound for match width. + if (report.minLength && report.minLength <= depth::max_value()) { + depth min_len((u32)report.minLength); + rd.min = max(rd.min, min_len); + rd.max = max(rd.max, min_len); + } + + // A max_offset param is an upper bound for match width. + if (report.maxOffset && report.maxOffset <= depth::max_value()) { + depth max_offset((u32)report.maxOffset); + rd.min = min(rd.min, max_offset); + rd.max = min(rd.max, max_offset); + } + + DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id, + rd.str().c_str()); + + info = unionDepthMinMax(info, rd); } } static -bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { - for (const auto &report_id : all_reports(g)) { +bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { + for (const auto &report_id : all_reports(g)) { if (rm.getReport(report_id).offsetAdjust) { return true; } @@ -139,64 +139,64 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { return false; } -void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, - NGHolder &g, ExpressionInfo &expr, - hs_expr_info *info) { +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, + hs_expr_info *info) { assert(info); - // remove reports that aren't on vertices connected to accept. - clearReports(g); - - assert(allMatchStatesHaveReports(g)); - - /* - * Note: the following set of analysis passes / transformations should - * match those in NG::addGraph(). - */ - + // remove reports that aren't on vertices connected to accept. + clearReports(g); + + assert(allMatchStatesHaveReports(g)); + + /* + * Note: the following set of analysis passes / transformations should + * match those in NG::addGraph(). + */ + /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, g, expr); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - bool hamming = expr.hamm_distance > 0; - u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; - - // validate graph's suitability for fuzzing - validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); - - resolveAsserts(rm, g, expr); - assert(allMatchStatesHaveReports(g)); - - // fuzz graph - this must happen before any transformations are made - make_fuzzy(g, e_dist, hamming, cc.grey); - - pruneUseless(g); - pruneEmptyVertices(g); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - optimiseVirtualStarts(g); - - propagateExtendedParams(g, expr, rm); - - removeLeadingVirtualVerticesFromRoot(g, g.start); - removeLeadingVirtualVerticesFromRoot(g, g.startDs); - - auto depths = calcDepthsFrom(g, g.start); - + ensureCodePointStart(rm, g, expr); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + bool hamming = expr.hamm_distance > 0; + u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; + + // validate graph's suitability for fuzzing + validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); + + resolveAsserts(rm, g, expr); + assert(allMatchStatesHaveReports(g)); + + // fuzz graph - this must happen before any transformations are made + make_fuzzy(g, e_dist, hamming, cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + optimiseVirtualStarts(g); + + propagateExtendedParams(g, expr, rm); + + removeLeadingVirtualVerticesFromRoot(g, g.start); + removeLeadingVirtualVerticesFromRoot(g, g.startDs); + + auto depths = calcDepthsFrom(g, g.start); + DepthMinMax d; - for (auto u : inv_adjacent_vertices_range(g.accept, g)) { - checkVertex(rm, g, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { + checkVertex(rm, g, u, depths, d); } - for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { - checkVertex(rm, g, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { + checkVertex(rm, g, u, depths, d); } if (d.max.is_finite()) { @@ -210,9 +210,9 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, info->min_width = UINT_MAX; } - info->unordered_matches = hasOffsetAdjust(rm, g); - info->matches_at_eod = can_match_at_eod(g); - info->matches_only_at_eod = can_only_match_at_eod(g); + info->unordered_matches = hasOffsetAdjust(rm, g); + info->matches_at_eod = can_match_at_eod(g); + info->matches_only_at_eod = can_only_match_at_eod(g); } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h index f9bd680939..aedd05069a 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief Code for discovering properties of an expression used by + * \brief Code for discovering properties of an expression used by * hs_expression_info. */ @@ -38,13 +38,13 @@ struct hs_expr_info; namespace ue2 { -class ExpressionInfo; -class NGHolder; +class ExpressionInfo; +class NGHolder; class ReportManager; -struct CompileContext; +struct CompileContext; -void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, - NGHolder &g, ExpressionInfo &expr, hs_expr_info *info); +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, hs_expr_info *info); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp index 6eb23113f3..4be5b73f77 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,22 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Propagate extended parameters to vertex reports and reduce graph if * possible. * * This code handles the propagation of the extension parameters specified by - * the user with the \ref hs_expr_ext structure into the reports on the graph's + * the user with the \ref hs_expr_ext structure into the reports on the graph's * vertices. * * There are also some analyses that prune edges that cannot contribute to a * match given these constraints, or transform the graph in order to make a * constraint implicit. */ - -#include "ng_extparam.h" - + +#include "ng_extparam.h" + #include "ng.h" #include "ng_depth.h" #include "ng_dump.h" @@ -51,7 +51,7 @@ #include "ng_width.h" #include "ng_util.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/compile_error.h" @@ -69,28 +69,28 @@ namespace ue2 { static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; -/** True if all the given reports have the same extparam bounds. */ -template<typename Container> -bool hasSameBounds(const Container &reports, const ReportManager &rm) { - assert(!reports.empty()); - - const auto &first = rm.getReport(*reports.begin()); - for (auto id : reports) { - const auto &report = rm.getReport(id); - if (report.minOffset != first.minOffset || - report.maxOffset != first.maxOffset || - report.minLength != first.minLength) { - return false; - } - } - - return true; -} - -/** - * \brief Find the (min, max) offset adjustment for the reports on a given - * vertex. - */ +/** True if all the given reports have the same extparam bounds. */ +template<typename Container> +bool hasSameBounds(const Container &reports, const ReportManager &rm) { + assert(!reports.empty()); + + const auto &first = rm.getReport(*reports.begin()); + for (auto id : reports) { + const auto &report = rm.getReport(id); + if (report.minOffset != first.minOffset || + report.maxOffset != first.maxOffset || + report.minLength != first.minLength) { + return false; + } + } + + return true; +} + +/** + * \brief Find the (min, max) offset adjustment for the reports on a given + * vertex. + */ static pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm, const NGHolder &g, NFAVertex v) { @@ -151,76 +151,76 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { return match_depths; } -template<typename Function> -void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, - Function func) { +template<typename Function> +void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, + Function func) { for (auto v : inv_adjacent_vertices_range(accept, g)) { if (v == g.accept) { - // Don't operate on accept: the accept->acceptEod edge is stylised. + // Don't operate on accept: the accept->acceptEod edge is stylised. assert(accept == g.acceptEod); - assert(g[v].reports.empty()); + assert(g[v].reports.empty()); continue; } - if (!seen.insert(v).second) { - continue; // We have already processed v. + if (!seen.insert(v).second) { + continue; // We have already processed v. } auto &reports = g[v].reports; - if (reports.empty()) { - continue; - } - decltype(g[v].reports) new_reports; - for (auto id : g[v].reports) { - new_reports.insert(func(v, id)); - } - reports = std::move(new_reports); - } -} - -/** - * Generic function for replacing all the reports in the graph. - * - * Pass this a function that takes a vertex and a ReportID returns another - * ReportID (or the same one) to replace it with. - */ -template<typename Function> -void replaceReports(NGHolder &g, Function func) { - flat_set<NFAVertex> seen; - replaceReports(g, g.accept, seen, func); - replaceReports(g, g.acceptEod, seen, func); -} - -/** \brief Replace the graph's reports with new reports that specify bounds. */ -static -void updateReportBounds(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr) { - DEBUG_PRINTF("updating report bounds\n"); - replaceReports(g, [&](NFAVertex, ReportID id) { - Report report = rm.getReport(id); // make a copy - assert(!report.hasBounds()); - - // Note that we need to cope with offset adjustment here. - - report.minOffset = expr.min_offset - report.offsetAdjust; - if (expr.max_offset == MAX_OFFSET) { - report.maxOffset = MAX_OFFSET; - } else { - report.maxOffset = expr.max_offset - report.offsetAdjust; - } - assert(report.maxOffset >= report.minOffset); - - report.minLength = expr.min_length; - if (expr.min_length && !expr.som) { - report.quashSom = true; + if (reports.empty()) { + continue; + } + decltype(g[v].reports) new_reports; + for (auto id : g[v].reports) { + new_reports.insert(func(v, id)); + } + reports = std::move(new_reports); + } +} + +/** + * Generic function for replacing all the reports in the graph. + * + * Pass this a function that takes a vertex and a ReportID returns another + * ReportID (or the same one) to replace it with. + */ +template<typename Function> +void replaceReports(NGHolder &g, Function func) { + flat_set<NFAVertex> seen; + replaceReports(g, g.accept, seen, func); + replaceReports(g, g.acceptEod, seen, func); +} + +/** \brief Replace the graph's reports with new reports that specify bounds. */ +static +void updateReportBounds(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + DEBUG_PRINTF("updating report bounds\n"); + replaceReports(g, [&](NFAVertex, ReportID id) { + Report report = rm.getReport(id); // make a copy + assert(!report.hasBounds()); + + // Note that we need to cope with offset adjustment here. + + report.minOffset = expr.min_offset - report.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { + report.maxOffset = MAX_OFFSET; + } else { + report.maxOffset = expr.max_offset - report.offsetAdjust; + } + assert(report.maxOffset >= report.minOffset); + + report.minLength = expr.min_length; + if (expr.min_length && !expr.som) { + report.quashSom = true; } - DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " - "min_length=%llu\n", id, report.minOffset, - report.maxOffset, report.minLength); - - return rm.getInternalId(report); - }); + DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " + "min_length=%llu\n", id, report.minOffset, + report.maxOffset, report.minLength); + + return rm.getInternalId(report); + }); } static @@ -233,93 +233,93 @@ bool hasVirtualStarts(const NGHolder &g) { return false; } -/** Set the min_length param for all reports to zero. */ -static -void clearMinLengthParam(NGHolder &g, ReportManager &rm) { - DEBUG_PRINTF("clearing min length\n"); - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - new_report.minLength = 0; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for - * all reports. - */ -static -void clearOffsetParams(NGHolder &g, ReportManager &rm) { - DEBUG_PRINTF("clearing min and max offset\n"); - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - new_report.minOffset = 0; - new_report.maxOffset = MAX_OFFSET; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * If the pattern is unanchored, has a max_offset and has not asked for SOM, we - * can use that knowledge to anchor it which will limit its lifespan. Note that - * we can't use this transformation if there's a min_length, as it's currently - * handled using "sly SOM". +/** Set the min_length param for all reports to zero. */ +static +void clearMinLengthParam(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min length\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for + * all reports. + */ +static +void clearOffsetParams(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min and max offset\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minOffset = 0; + new_report.maxOffset = MAX_OFFSET; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * If the pattern is unanchored, has a max_offset and has not asked for SOM, we + * can use that knowledge to anchor it which will limit its lifespan. Note that + * we can't use this transformation if there's a min_length, as it's currently + * handled using "sly SOM". * * Note that it is possible to handle graphs that have a combination of * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { - if (!isFloating(g)) { - return false; - } - - const auto &reports = all_reports(g); - if (reports.empty()) { - return false; - } - - if (any_of_in(reports, [&](ReportID id) { - const auto &report = rm.getReport(id); - return report.maxOffset == MAX_OFFSET || report.minLength || - report.offsetAdjust; - })) { - return false; - } - - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("mixed report bounds\n"); - return false; - } - - const depth minWidth = findMinWidth(g); - const depth maxWidth = findMaxWidth(g); - +bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { + if (!isFloating(g)) { + return false; + } + + const auto &reports = all_reports(g); + if (reports.empty()) { + return false; + } + + if (any_of_in(reports, [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.maxOffset == MAX_OFFSET || report.minLength || + report.offsetAdjust; + })) { + return false; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const depth minWidth = findMinWidth(g); + const depth maxWidth = findMaxWidth(g); + assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); - const auto &first_report = rm.getReport(*reports.begin()); - const auto min_offset = first_report.minOffset; - const auto max_offset = first_report.maxOffset; - assert(max_offset < MAX_OFFSET); - + const auto &first_report = rm.getReport(*reports.begin()); + const auto min_offset = first_report.minOffset; + const auto max_offset = first_report.maxOffset; + assert(max_offset < MAX_OFFSET); + DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), - min_offset, max_offset); + minWidth.str().c_str(), maxWidth.str().c_str(), + min_offset, max_offset); - if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } - if (max_offset < minWidth) { + if (max_offset < minWidth) { assert(0); return false; } @@ -340,10 +340,10 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = max_offset - minWidth; + max_bound = max_offset - minWidth; } else { - min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; - max_bound = max_offset - minWidth; + min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; + max_bound = max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -393,44 +393,44 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { add_edge(u, v, g); } - renumber_vertices(g); - renumber_edges(g); - - if (minWidth == maxWidth) { - // For a fixed width pattern, we can retire the offsets as - // they are implicit in the graph now. - clearOffsetParams(g, rm); - } + renumber_vertices(g); + renumber_edges(g); - clearReports(g); + if (minWidth == maxWidth) { + // For a fixed width pattern, we can retire the offsets as + // they are implicit in the graph now. + clearOffsetParams(g, rm); + } + + clearReports(g); return true; } static NFAVertex findSingleCyclic(const NGHolder &g) { - NFAVertex v = NGHolder::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); for (const auto &e : edges_range(g)) { if (source(e, g) == target(e, g)) { if (source(e, g) == g.startDs) { continue; } - if (v != NGHolder::null_vertex()) { + if (v != NGHolder::null_vertex()) { // More than one cyclic vertex. - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } v = source(e, g); } } - if (v != NGHolder::null_vertex()) { - DEBUG_PRINTF("cyclic is %zu\n", g[v].index); + if (v != NGHolder::null_vertex()) { + DEBUG_PRINTF("cyclic is %zu\n", g[v].index); assert(!is_special(v, g)); } return v; } static -bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, +bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -451,30 +451,30 @@ bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, return true; } -/** - * If the pattern has a min_length and is of "ratchet" form with one unbounded +/** + * If the pattern has a min_length and is of "ratchet" form with one unbounded * repeat, that repeat can become a bounded repeat. * * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { - const auto &reports = all_reports(g); - - if (reports.empty()) { - return false; - } +bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { + const auto &reports = all_reports(g); - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("mixed report bounds\n"); - return false; - } - - const auto &min_length = rm.getReport(*reports.begin()).minLength; - if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { + if (reports.empty()) { return false; } + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const auto &min_length = rm.getReport(*reports.begin()).minLength; + if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { + return false; + } + // If the pattern has virtual starts, we probably don't want to touch it. if (hasVirtualStarts(g)) { DEBUG_PRINTF("virtual starts, bailing\n"); @@ -484,11 +484,11 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // The graph must contain a single cyclic vertex (other than startDs), and // that vertex can have one pred and one successor. NFAVertex cyclic = findSingleCyclic(g); - if (cyclic == NGHolder::null_vertex()) { + if (cyclic == NGHolder::null_vertex()) { return false; } - NGHolder::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; tie(ai, ae) = adjacent_vertices(g.start, g); if (*ai == g.startDs) { ++ai; @@ -504,9 +504,9 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); + auto succ = succs(v, g); if (contains(succ, cyclic)) { if (succ.size() == 1) { v = cyclic; @@ -534,7 +534,7 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Check the cyclic state is A-OK. v = getSoleDestVertex(g, cyclic); - if (v == NGHolder::null_vertex()) { + if (v == NGHolder::null_vertex()) { DEBUG_PRINTF("cyclic has more than one successor\n"); return false; } @@ -542,9 +542,9 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Walk from the cyclic state to an accept and ensure we have a chain of // vertices. while (!is_any_accept(v, g)) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); + auto succ = succs(v, g); if (succ.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; @@ -559,20 +559,20 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); width += offsetAdjust; - DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, + DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= min_length) { + if (width >= min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - min_length, width); - clearMinLengthParam(g, rm); + min_length, width); + clearMinLengthParam(g, rm); return true; } vector<NFAVertex> preds; vector<NFAEdge> dead; for (auto u : inv_adjacent_vertices_range(cyclic, g)) { - DEBUG_PRINTF("pred %zu\n", g[u].index); + DEBUG_PRINTF("pred %zu\n", g[u].index); if (u == cyclic) { continue; } @@ -593,7 +593,7 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < min_length - width - 1; ++i) { + for (u32 i = 0; i < min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -608,22 +608,22 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { add_edge(u, cyclic, g); } - renumber_vertices(g); - renumber_edges(g); - clearMinLengthParam(g, rm); + renumber_vertices(g); + renumber_edges(g); + clearMinLengthParam(g, rm); clearReports(g); return true; } static -bool hasExtParams(const ExpressionInfo &expr) { - if (expr.min_length != 0) { +bool hasExtParams(const ExpressionInfo &expr) { + if (expr.min_length != 0) { return true; } - if (expr.min_offset != 0) { + if (expr.min_offset != 0) { return true; } - if (expr.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { return true; } return false; @@ -650,13 +650,13 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGHolder &g, const Report &report, +bool isEdgePrunable(const NGHolder &g, const Report &report, const vector<NFAVertexBidiDepth> &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); + DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); // Leave our special-to-special edges alone. if (is_special(u, g) && is_special(v, g)) { @@ -679,29 +679,29 @@ bool isEdgePrunable(const NGHolder &g, const Report &report, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (report.minOffset) { - depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < report.minOffset) { + if (report.minOffset) { + depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv); + if (max_offset.is_finite() && max_offset < report.minOffset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (report.maxOffset != MAX_OFFSET) { + if (report.maxOffset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > report.maxOffset) { + if (min_offset > report.maxOffset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (report.minLength && is_any_accept(v, g)) { + if (report.minLength && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. - const depth &width = maxDistFromInit(du); - if (width.is_finite() && width < report.minLength) { + const depth &width = maxDistFromInit(du); + if (width.is_finite() && width < report.minLength) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -712,25 +712,25 @@ bool isEdgePrunable(const NGHolder &g, const Report &report, } static -void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { - const auto &reports = all_reports(g); - if (reports.empty()) { - return; - } - - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("report bounds vary\n"); - return; - } - - const auto &report = rm.getReport(*reports.begin()); - - auto depths = calcBidiDepths(g); - +void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { + const auto &reports = all_reports(g); + if (reports.empty()) { + return; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("report bounds vary\n"); + return; + } + + const auto &report = rm.getReport(*reports.begin()); + + auto depths = calcBidiDepths(g); + vector<NFAEdge> dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, report, depths, e)) { + if (isEdgePrunable(g, report, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -742,45 +742,45 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { remove_edges(dead, g); pruneUseless(g); - clearReports(g); + clearReports(g); } -/** - * Remove vacuous edges in graphs where the min_offset or min_length - * constraints dictate that they can never produce a match. - */ +/** + * Remove vacuous edges in graphs where the min_offset or min_length + * constraints dictate that they can never produce a match. + */ static -void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { +void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { vector<NFAEdge> dead; - auto has_min_offset = [&](NFAVertex v) { - assert(!g[v].reports.empty()); // must be reporter - return all_of_in(g[v].reports, [&](ReportID id) { - return rm.getReport(id).minOffset > 0; - }); - }; - - auto has_min_length = [&](NFAVertex v) { - assert(!g[v].reports.empty()); // must be reporter - return all_of_in(g[v].reports, [&](ReportID id) { - return rm.getReport(id).minLength > 0; - }); - }; - + auto has_min_offset = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minOffset > 0; + }); + }; + + auto has_min_length = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minLength > 0; + }); + }; + for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - // Special case: Crudely remove vacuous edges from start in graphs with - // a min_offset. - if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { + // Special case: Crudely remove vacuous edges from start in graphs with + // a min_offset. + if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { + if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -791,14 +791,14 @@ void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { return; } - DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); + DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); remove_edges(dead, g); pruneUseless(g); - clearReports(g); + clearReports(g); } static -void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, +void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, const ReportManager &rm, NFAVertex accept) { vector<NFAEdge> dead; @@ -809,11 +809,11 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, continue; } - if (!hasSameBounds(g[v].reports, rm)) { - continue; - } - const auto &report = rm.getReport(*g[v].reports.begin()); - + if (!hasSameBounds(g[v].reports, rm)) { + continue; + } + const auto &report = rm.getReport(*g[v].reports.begin()); + u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v); @@ -822,16 +822,16 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < report.minLength) { + if (d.max.is_finite() && d.max < report.minLength) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), report.minLength); + d.max.str().c_str(), report.minLength); dead.push_back(e); continue; } - if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { + if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), report.maxOffset); + d.min.str().c_str(), report.maxOffset); dead.push_back(e); continue; } @@ -840,15 +840,15 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, remove_edges(dead, g); } -/** - * Remove edges to accepts that can never produce a match long enough to - * satisfy our min_length and max_offset constraints. - */ +/** + * Remove edges to accepts that can never produce a match long enough to + * satisfy our min_length and max_offset constraints. + */ static -void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { - if (!any_of_in(all_reports(g), [&](ReportID id) { - return rm.getReport(id).minLength > 0; - })) { +void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { + if (!any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { return; } @@ -858,19 +858,19 @@ void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); - clearReports(g); + clearReports(g); } static bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { - return any_of_in(all_reports(g), [&rm](ReportID id) { - return rm.getReport(id).offsetAdjust != 0; - }); + return any_of_in(all_reports(g), [&rm](ReportID id) { + return rm.getReport(id).offsetAdjust != 0; + }); } -void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, - ReportManager &rm) { - if (!hasExtParams(expr)) { +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm) { + if (!hasExtParams(expr)) { return; } @@ -882,154 +882,154 @@ void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" - << expr.min_offset << " as it can only produce matches of length " + << expr.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; - throw CompileError(expr.index, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (minWidth > expr.max_offset) { + if (minWidth > expr.max_offset) { ostringstream oss; - oss << "Expression has max_offset=" << expr.max_offset - << " but requires " << minWidth << " bytes to match."; - throw CompileError(expr.index, oss.str()); + oss << "Expression has max_offset=" << expr.max_offset + << " but requires " << minWidth << " bytes to match."; + throw CompileError(expr.index, oss.str()); } - if (maxWidth.is_finite() && match_depths.max < expr.min_length) { + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { ostringstream oss; - oss << "Expression has min_length=" << expr.min_length << " but can " + oss << "Expression has min_length=" << expr.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; - throw CompileError(expr.index, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (expr.min_length && expr.min_length <= match_depths.min) { + if (expr.min_length && expr.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", - expr.min_length); - expr.min_length = 0; - } - - if (!hasExtParams(expr)) { - return; + expr.min_length); + expr.min_length = 0; } - updateReportBounds(rm, g, expr); -} - -/** - * If the pattern is completely anchored and has a min_length set, this can - * be converted to a min_offset. - */ -static -void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { - if (has_proper_successor(g.startDs, g)) { - return; // not wholly anchored - } - - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - u64a min_len_offset = report.minLength - report.offsetAdjust; - new_report.minOffset = max(report.minOffset, min_len_offset); - new_report.minLength = 0; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * Clear offset bounds on reports that are not needed because they're satisfied - * by vertex depth. - */ -static -void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { - auto depths = calcDepths(g); - - replaceReports(g, [&](NFAVertex v, ReportID id) { - const auto &d = depths.at(g[v].index); - const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); - const depth &max_depth = maxDistFromStartOfData(d); - - DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, - min_depth.str().c_str(), max_depth.str().c_str()); - - Report report = rm.getReport(id); // copy - bool modified = false; - if (report.minOffset && !report.offsetAdjust && - report.minOffset <= min_depth) { - report.minOffset = 0; - modified = true; - } - if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && - report.maxOffset >= max_depth) { - report.maxOffset = MAX_OFFSET; - modified = true; - } - if (modified) { - DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", - g[v].index, report.minOffset, report.maxOffset); - return rm.getInternalId(report); - } - - return id; - }); -} - -void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { - if (!any_of_in(all_reports(g), - [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { - DEBUG_PRINTF("no extparam bounds\n"); - return; - } - - DEBUG_PRINTF("graph has extparam bounds\n"); - - pruneVacuousEdges(g, rm); - if (can_never_match(g)) { + if (!hasExtParams(expr)) { return; } - pruneUnmatchable(g, rm); - if (can_never_match(g)) { - return; - } - - if (!hasOffsetAdjustments(rm, g)) { - pruneExtUnreachable(g, rm); - if (can_never_match(g)) { - return; - } - } - - replaceMinLengthWithOffset(g, rm); - if (can_never_match(g)) { + updateReportBounds(rm, g, expr); +} + +/** + * If the pattern is completely anchored and has a min_length set, this can + * be converted to a min_offset. + */ +static +void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { + if (has_proper_successor(g.startDs, g)) { + return; // not wholly anchored + } + + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + u64a min_len_offset = report.minLength - report.offsetAdjust; + new_report.minOffset = max(report.minOffset, min_len_offset); + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Clear offset bounds on reports that are not needed because they're satisfied + * by vertex depth. + */ +static +void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { + auto depths = calcDepths(g); + + replaceReports(g, [&](NFAVertex v, ReportID id) { + const auto &d = depths.at(g[v].index); + const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); + const depth &max_depth = maxDistFromStartOfData(d); + + DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, + min_depth.str().c_str(), max_depth.str().c_str()); + + Report report = rm.getReport(id); // copy + bool modified = false; + if (report.minOffset && !report.offsetAdjust && + report.minOffset <= min_depth) { + report.minOffset = 0; + modified = true; + } + if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && + report.maxOffset >= max_depth) { + report.maxOffset = MAX_OFFSET; + modified = true; + } + if (modified) { + DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", + g[v].index, report.minOffset, report.maxOffset); + return rm.getInternalId(report); + } + + return id; + }); +} + +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { + if (!any_of_in(all_reports(g), + [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { + DEBUG_PRINTF("no extparam bounds\n"); + return; + } + + DEBUG_PRINTF("graph has extparam bounds\n"); + + pruneVacuousEdges(g, rm); + if (can_never_match(g)) { + return; + } + + pruneUnmatchable(g, rm); + if (can_never_match(g)) { + return; + } + + if (!hasOffsetAdjustments(rm, g)) { + pruneExtUnreachable(g, rm); + if (can_never_match(g)) { + return; + } + } + + replaceMinLengthWithOffset(g, rm); + if (can_never_match(g)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - transformMinLengthToRepeat(g, rm); - if (can_never_match(g)) { - return; + transformMinLengthToRepeat(g, rm); + if (can_never_match(g)) { + return; } // If the pattern is unanchored, has a max_offset and has not asked for // SOM, we can use that knowledge to anchor it which will limit its // lifespan. Note that we can't use this transformation if there's a // min_length, as it's currently handled using "sly SOM". - if (som == SOM_NONE) { - anchorPatternWithBoundedRepeat(g, rm); - if (can_never_match(g)) { - return; + if (som == SOM_NONE) { + anchorPatternWithBoundedRepeat(g, rm); + if (can_never_match(g)) { + return; } } - removeUnneededOffsetBounds(g, rm); + removeUnneededOffsetBounds(g, rm); } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h index ae818075c0..521302642a 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,31 +34,31 @@ #ifndef NG_EXTPARAM_H #define NG_EXTPARAM_H -#include "som/som.h" - +#include "som/som.h" + namespace ue2 { -class ExpressionInfo; -class NGHolder; +class ExpressionInfo; +class NGHolder; class ReportManager; -/** - * \brief Propagate extended parameter information to vertex reports. Will - * throw CompileError if this expression's extended parameters are not - * satisfiable. - * - * This will also remove extended parameter constraints that are guaranteed to - * be satisfied from ExpressionInfo. - */ -void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, - ReportManager &rm); - -/** - * \brief Perform graph reductions (if possible) to do with extended parameter - * constraints on reports. - */ -void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som); +/** + * \brief Propagate extended parameter information to vertex reports. Will + * throw CompileError if this expression's extended parameters are not + * satisfiable. + * + * This will also remove extended parameter constraints that are guaranteed to + * be satisfied from ExpressionInfo. + */ +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm); +/** + * \brief Perform graph reductions (if possible) to do with extended parameter + * constraints on reports. + */ +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp index 8fb264d8a9..d5ca511647 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,7 @@ namespace ue2 { static bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored, - flat_set<ReportID> *reports) { + flat_set<ReportID> *reports) { DEBUG_PRINTF("looking for a mask pattern\n"); set<NFAVertex> s_succ; insert(&s_succ, adjacent_vertices(g.start, g)); @@ -77,7 +77,7 @@ bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored, NFAVertex v = *succs.begin(); while (true) { - DEBUG_PRINTF("validating vertex %zu\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod); @@ -117,7 +117,7 @@ bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) { return false; } - flat_set<ReportID> reports; + flat_set<ReportID> reports; bool anchored = false; vector<CharReach> mask; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp index 78fd862919..9dd19f05de 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp @@ -1,707 +1,707 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Graph fuzzer for approximate matching - */ - -#include "ng_fuzzy.h" - -#include "ng.h" -#include "ng_depth.h" -#include "ng_util.h" - -#include <map> -#include <vector> -using namespace std; - -namespace ue2 { - -// returns all successors up to a given depth in a vector of sets, indexed by -// zero-based depth from source vertex -static -vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g, - NFAVertex src, u32 depth) { - vector<flat_set<NFAVertex>> result(depth); - flat_set<NFAVertex> cur, next; - - assert(depth > 0); - - // populate current set of successors - for (auto v : adjacent_vertices_range(src, g)) { - // ignore self-loops - if (src == v) { - continue; - } - DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); - cur.insert(v); - } - result[0] = cur; - - for (unsigned d = 1; d < depth; d++) { - // collect all successors for all current level vertices - for (auto v : cur) { - // don't go past special nodes - if (is_special(v, g)) { - continue; - } - - for (auto succ : adjacent_vertices_range(v, g)) { - // ignore self-loops - if (v == succ) { - continue; - } - DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); - next.insert(succ); - } - } - result[d] = next; - next.swap(cur); - next.clear(); - } - - return result; -} - -// returns all predecessors up to a given depth in a vector of sets, indexed by -// zero-based depth from source vertex -static -vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g, - NFAVertex src, - u32 depth) { - vector<flat_set<NFAVertex>> result(depth); - flat_set<NFAVertex> cur, next; - - assert(depth > 0); - - // populate current set of successors - for (auto v : inv_adjacent_vertices_range(src, g)) { - // ignore self-loops - if (src == v) { - continue; - } - DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); - cur.insert(v); - } - result[0] = cur; - - for (unsigned d = 1; d < depth; d++) { - // collect all successors for all current level vertices - for (auto v : cur) { - for (auto pred : inv_adjacent_vertices_range(v, g)) { - // ignore self-loops - if (v == pred) { - continue; - } - DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); - next.insert(pred); - } - } - result[d] = next; - next.swap(cur); - next.clear(); - } - - return result; -} - -/* - * This struct produces a fuzzed graph; that is, a graph that is able to match - * the original pattern, as well as input data within a certain edit distance. - * Construct the struct, then call fuzz_graph() to transform the graph. - * - * Terminology used: - * - Shadow vertices: vertices mirroring the original graph at various edit - * distances - * - Shadow graph level: edit distance of a particular shadow graph - * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace - */ -struct ShadowGraph { - NGHolder &g; - u32 edit_distance; - bool hamming; - map<pair<NFAVertex, u32>, NFAVertex> shadow_map; - map<pair<NFAVertex, u32>, NFAVertex> helper_map; - map<NFAVertex, NFAVertex> clones; - // edge creation is deferred - vector<pair<NFAVertex, NFAVertex>> edges_to_be_added; - flat_set<NFAVertex> orig; - - ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in) - : g(g_in), edit_distance(ed_in), hamming(hamm_in) {} - - void fuzz_graph() { - if (edit_distance == 0) { - return; - } - - DEBUG_PRINTF("edit distance = %u hamming = %s\n", edit_distance, - hamming ? "true" : "false"); - - // step 1: prepare the vertices, helpers and shadows according to - // the original graph - prepare_graph(); - - // step 2: add shadow and helper nodes - build_shadow_graph(); - - // step 3: set up reports for newly created vertices (and make clones - // if necessary) - if (!hamming) { - create_reports(); - } - - // step 4: wire up shadow graph and helpers for insert/replace/remove - connect_shadow_graph(); - - // step 5: commit all the edge wirings - DEBUG_PRINTF("Committing edge wirings\n"); - for (const auto &p : edges_to_be_added) { - add_edge_if_not_present(p.first, p.second, g); - } - - DEBUG_PRINTF("Done!\n"); - } - -private: - const NFAVertex& get_clone(const NFAVertex &v) { - return contains(clones, v) ? - clones[v] : v; - } - - void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { - const NFAVertex &clone_u = get_clone(u); - const NFAVertex &clone_v = get_clone(v); - - edges_to_be_added.emplace_back(u, v); - DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index); - - // do not connect clones to accepts, we do it during cloning - if (is_any_accept(clone_v, g)) { - return; - } - edges_to_be_added.emplace_back(clone_u, clone_v); - DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index, - g[clone_v].index); - } - - void prepare_graph() { - DEBUG_PRINTF("Building shadow graphs\n"); - - for (auto v : vertices_range(g)) { - // all level 0 vertices are their own helpers and their own shadows - helper_map[make_pair(v, 0)] = v; - shadow_map[make_pair(v, 0)] = v; - - // find special nodes - if (is_any_accept(v, g)) { - DEBUG_PRINTF("Node %zu is a special node\n", g[v].index); - for (unsigned edit = 1; edit <= edit_distance; edit++) { - // all accepts are their own shadows and helpers at all - // levels - shadow_map[make_pair(v, edit)] = v; - helper_map[make_pair(v, edit)] = v; - } - continue; - } - DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index); - orig.insert(v); - } - } - - void build_shadow_graph() { - for (auto v : orig) { - DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n", - g[v].index); - for (unsigned dist = 1; dist <= edit_distance; dist++) { - auto shadow_v = v; - - // start and startDs cannot have shadows but do have helpers - if (!is_any_start(v, g)) { - shadow_v = clone_vertex(g, v); - DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", - g[shadow_v].index, dist); - } - shadow_map[make_pair(v, dist)] = shadow_v; - - // if there's nowhere to go from this vertex, no helper needed - if (proper_out_degree(v, g) < 1) { - DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", - g[shadow_v].index, dist); - helper_map[make_pair(v, dist)] = shadow_v; - continue; - } - - // start and startDs only have helpers for insert, so not Hamming - if (hamming && is_any_start(v, g)) { - DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", - g[shadow_v].index, dist); - helper_map[make_pair(v, dist)] = shadow_v; - continue; - } - - auto helper_v = clone_vertex(g, v); - DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", - g[helper_v].index, dist); - - // this is a helper, so make it a dot - g[helper_v].char_reach = CharReach::dot(); - // do not copy virtual start's assert flags - if (is_virtual_start(v, g)) { - DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n", - g[helper_v].index, dist); - g[helper_v].assert_flags = 0; - } - helper_map[make_pair(v, dist)] = helper_v; - } - } - } - - // wire up successors according to the original graph, wire helpers - // to shadow successors (insert/replace) - void connect_succs(NFAVertex v, u32 dist) { - DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n", - g[v].index, dist); - const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; - const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; - - // multiple insert - if (!hamming && dist > 1) { - const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; - connect_to_clones(prev_level_helper, cur_shadow_helper); - } - - for (auto orig_dst : adjacent_vertices_range(v, g)) { - const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; - - connect_to_clones(cur_shadow_v, shadow_dst); - - // ignore startDs for insert/replace - if (orig_dst == g.startDs) { - continue; - } - - connect_to_clones(cur_shadow_helper, shadow_dst); - } - } - - // wire up predecessors according to the original graph, wire - // predecessors to helpers (replace), wire predecessor helpers to - // helpers (multiple replace) - void connect_preds(NFAVertex v, u32 dist) { - DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n", - g[v].index, dist); - const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; - const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; - - auto orig_src_vertices = inv_adjacent_vertices_range(v, g); - for (auto orig_src : orig_src_vertices) { - // ignore edges from start to startDs - if (v == g.startDs && orig_src == g.start) { - continue; - } - // ignore self-loops for replace - if (orig_src != v) { - // do not wire a replace node for start vertices if we - // have a virtual start - if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { - continue; - } - - if (dist) { - const auto &prev_level_src = - shadow_map[make_pair(orig_src, dist - 1)]; - const auto &prev_level_helper = - helper_map[make_pair(orig_src, dist - 1)]; - - connect_to_clones(prev_level_src, cur_shadow_helper); - connect_to_clones(prev_level_helper, cur_shadow_helper); - } - } - // wire predecessor according to original graph - const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; - - connect_to_clones(shadow_src, cur_shadow_v); - } - } - - // wire up previous level helper to current shadow (insert) - void connect_helpers(NFAVertex v, u32 dist) { - DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n", - g[v].index, dist); - const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; - auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; - - connect_to_clones(prev_level_v, cur_shadow_helper); - } - - /* - * wiring edges for removal is a special case. - * - * when wiring edges for removal, as well as wiring up immediate - * predecessors to immediate successors, we also need to wire up more - * distant successors to their respective shadow graph levels. - * - * for example, consider graph start->a->b->c->d->accept. - * - * at edit distance 1, we need remove edges start->b, a->c, b->d, and - * c->accept, all going from original graph (level 0) to shadow graph - * level 1. - * - * at edit distance 2, we also need edges start->c, a->d and b->accept, - * all going from level 0 to shadow graph level 2. - * - * this is propagated to all shadow levels; that is, given edit - * distance 3, we will have edges from shadow levels 0->1, 0->2, - * 0->3, 1->2, 1->3, and 2->3. - * - * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, - * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. - * - * we also have to wire helpers to their removal successors, to - * accommodate for a replace followed by a remove, on all shadow levels. - * - * and finally, we also have to wire source shadows into removal - * successor helpers on a level above, to accommodate for a remove - * followed by a replace. - */ - void connect_removals(NFAVertex v) { - DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index); - - // vertices returned by this function don't include self-loops - auto dst_vertices_by_depth = - gatherSuccessorsByDepth(g, v, edit_distance); - auto orig_src_vertices = inv_adjacent_vertices_range(v, g); - for (auto orig_src : orig_src_vertices) { - // ignore self-loops - if (orig_src == v) { - continue; - } - for (unsigned step = 1; step <= edit_distance; step++) { - for (unsigned dist = step; dist <= edit_distance; dist++) { - auto &dst_vertices = dst_vertices_by_depth[step - 1]; - for (auto &orig_dst : dst_vertices) { - const auto &shadow_src = - shadow_map[make_pair(orig_src, dist - step)]; - const auto &shadow_helper = - helper_map[make_pair(orig_src, dist - step)]; - const auto &shadow_dst = - shadow_map[make_pair(orig_dst, dist)]; - - // removal - connect_to_clones(shadow_src, shadow_dst); - - // removal from helper vertex - connect_to_clones(shadow_helper, shadow_dst); - - // removal into helper, requires additional edit - if ((dist + 1) <= edit_distance) { - const auto &next_level_helper = - helper_map[make_pair(orig_dst, dist + 1)]; - - connect_to_clones(shadow_src, next_level_helper); - } - } - } - } - } - } - - void connect_shadow_graph() { - DEBUG_PRINTF("Wiring up the graph\n"); - - for (auto v : orig) { - - DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index); - - for (unsigned dist = 0; dist <= edit_distance; dist++) { - - // handle insert/replace - connect_succs(v, dist); - - // handle replace/multiple insert - connect_preds(v, dist); - - // handle helpers - if (!hamming && dist > 0) { - connect_helpers(v, dist); - } - } - - // handle removals - if (!hamming) { - connect_removals(v); - } - } - } - - void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) { - for (auto dst : targets) { - DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index, - g[dst].index); - edges_to_be_added.emplace_back(src, dst); - } - } - - // create a clone of the vertex, but overwrite its report set - void create_clone(NFAVertex v, const flat_set<ReportID> &reports, - unsigned max_edit_distance, - const flat_set<NFAVertex> &targets) { - // some vertices may have the same reports, but different successors; - // therefore, we may need to connect them multiple times, but still only - // clone once - bool needs_cloning = !contains(clones, v); - - DEBUG_PRINTF("Cloning node %zu\n", g[v].index); - // go through all shadows and helpers, including - // original vertex - for (unsigned d = 0; d < max_edit_distance; d++) { - auto shadow_v = shadow_map[make_pair(v, d)]; - auto helper_v = helper_map[make_pair(v, d)]; - - NFAVertex new_shadow_v, new_helper_v; - - // make sure we don't clone the same vertex twice - if (needs_cloning) { - new_shadow_v = clone_vertex(g, shadow_v); - DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", - g[new_shadow_v].index, d); - clones[shadow_v] = new_shadow_v; - } else { - new_shadow_v = clones[shadow_v]; - } - g[new_shadow_v].reports = reports; - - connect_to_targets(new_shadow_v, targets); - - if (shadow_v == helper_v) { - continue; - } - if (needs_cloning) { - new_helper_v = clone_vertex(g, helper_v); - DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", - g[new_helper_v].index, d); - clones[helper_v] = new_helper_v; - } else { - new_helper_v = clones[helper_v]; - } - g[new_helper_v].reports = reports; - - connect_to_targets(new_helper_v, targets); - } - } - - void write_reports(NFAVertex v, const flat_set<ReportID> &reports, - unsigned max_edit_distance, - const flat_set<NFAVertex> &targets) { - // we're overwriting reports, but we're not losing any - // information as we already cached all the different report - // sets, so vertices having different reports will be cloned and set up - // with the correct report set - - // go through all shadows and helpers, including original - // vertex - for (unsigned d = 0; d < max_edit_distance; d++) { - auto shadow_v = shadow_map[make_pair(v, d)]; - auto helper_v = helper_map[make_pair(v, d)]; - DEBUG_PRINTF("Setting up reports for shadow node: %zu " - "(level %u)\n", - g[shadow_v].index, d); - DEBUG_PRINTF("Setting up reports for helper node: %zu " - "(level %u)\n", - g[helper_v].index, d); - g[shadow_v].reports = reports; - g[helper_v].reports = reports; - - connect_to_targets(shadow_v, targets); - connect_to_targets(helper_v, targets); - } - } - - /* - * we may have multiple report sets per graph. that means, whenever we - * construct additional paths through the graph (alternations, removals), we - * have to account for the fact that some vertices are predecessors to - * vertices with different report sets. - * - * whenever that happens, we have to clone the paths for both report sets, - * and set up these new vertices with their respective report sets as well. - * - * in order to do that, we first have to get all the predecessors for accept - * and acceptEod vertices. then, go through them one by one, and take note - * of the report lists. the first report set we find, wins, the rest we - * clone. - * - * we also have to do this in two passes, because there may be vertices that - * are predecessors to vertices with different report sets, so to avoid - * overwriting reports we will be caching reports info instead. - */ - void create_reports() { - map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices; - flat_set<NFAVertex> accepts{g.accept, g.acceptEod}; - - // gather reports info from all vertices connected to accept - for (auto accept : accepts) { - for (auto src : inv_adjacent_vertices_range(accept, g)) { - // skip special vertices - if (is_special(src, g)) { - continue; - } - reports_to_vertices[g[src].reports].insert(src); - } - } - - // we expect to see at most two report sets - assert(reports_to_vertices.size() > 0 && - reports_to_vertices.size() <= 2); - - // set up all reports - bool clone = false; - for (auto &pair : reports_to_vertices) { - const auto &reports = pair.first; - const auto &vertices = pair.second; - - for (auto src : vertices) { - // get all predecessors up to edit distance - auto src_vertices_by_depth = - gatherPredecessorsByDepth(g, src, edit_distance); - - // find which accepts source vertex connects to - flat_set<NFAVertex> targets; - for (const auto &accept : accepts) { - NFAEdge e = edge(src, accept, g); - if (e) { - targets.insert(accept); - } - } - assert(targets.size()); - - for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { - const auto &preds = src_vertices_by_depth[d]; - for (auto v : preds) { - // only clone a node if it already contains reports - if (clone && !g[v].reports.empty()) { - create_clone(v, reports, edit_distance - d, - targets); - } else { - write_reports(v, reports, edit_distance - d, - targets); - } - } - } - } - // clone vertices only if it's not our first report set - clone = true; - } - } -}; - -// check if we will edit our way into a vacuous pattern -static -bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { - auto depths = calcRevDepths(g); - - depth min_depth = depth::infinity(); - auto idx = g[g.start].index; - - // check distance from start to accept/acceptEod - if (depths[idx].toAccept.min.is_finite()) { - min_depth = min(depths[idx].toAccept.min, min_depth); - } - if (depths[idx].toAcceptEod.min.is_finite()) { - min_depth = min(depths[idx].toAcceptEod.min, min_depth); - } - - idx = g[g.startDs].index; - - // check distance from startDs to accept/acceptEod - if (depths[idx].toAccept.min.is_finite()) { - min_depth = min(depths[idx].toAccept.min, min_depth); - } - if (depths[idx].toAcceptEod.min.is_finite()) { - min_depth = min(depths[idx].toAcceptEod.min, min_depth); - } - - assert(min_depth.is_finite()); - - // now, check if we can edit our way into a vacuous pattern - if (min_depth <= (u64a) edit_distance + 1) { - DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n"); - return true; - } - return false; -} - -void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, - bool utf8, const Grey &grey) { - if (edit_distance == 0) { - return; - } - if (!grey.allowApproximateMatching) { - throw CompileError("Approximate matching is disabled."); - } - if (edit_distance > grey.maxEditDistance) { - throw CompileError("Edit distance is too big."); - } - if (utf8) { - throw CompileError("UTF-8 is disallowed for approximate matching."); - } - // graph isn't fuzzable if there are edge assertions anywhere in the graph - for (auto e : edges_range(g)) { - if (g[e].assert_flags) { - throw CompileError("Zero-width assertions are disallowed for " - "approximate matching."); - } - } - if (!hamming && will_turn_vacuous(g, edit_distance)) { - throw CompileError("Approximate matching patterns that reduce to " - "vacuous patterns are disallowed."); - } -} - -void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, - const Grey &grey) { - if (edit_distance == 0) { - return; - } - - assert(grey.allowApproximateMatching); - assert(grey.maxEditDistance >= edit_distance); - - ShadowGraph sg(g, edit_distance, hamming); - sg.fuzz_graph(); - - // For safety, enforce limit on actual vertex count. - if (num_vertices(g) > grey.limitApproxMatchingVertices) { - DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g), - grey.limitApproxMatchingVertices); - throw ResourceLimitError(); - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Graph fuzzer for approximate matching + */ + +#include "ng_fuzzy.h" + +#include "ng.h" +#include "ng_depth.h" +#include "ng_util.h" + +#include <map> +#include <vector> +using namespace std; + +namespace ue2 { + +// returns all successors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g, + NFAVertex src, u32 depth) { + vector<flat_set<NFAVertex>> result(depth); + flat_set<NFAVertex> cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + // don't go past special nodes + if (is_special(v, g)) { + continue; + } + + for (auto succ : adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == succ) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); + next.insert(succ); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +// returns all predecessors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g, + NFAVertex src, + u32 depth) { + vector<flat_set<NFAVertex>> result(depth); + flat_set<NFAVertex> cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : inv_adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + for (auto pred : inv_adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == pred) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); + next.insert(pred); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +/* + * This struct produces a fuzzed graph; that is, a graph that is able to match + * the original pattern, as well as input data within a certain edit distance. + * Construct the struct, then call fuzz_graph() to transform the graph. + * + * Terminology used: + * - Shadow vertices: vertices mirroring the original graph at various edit + * distances + * - Shadow graph level: edit distance of a particular shadow graph + * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace + */ +struct ShadowGraph { + NGHolder &g; + u32 edit_distance; + bool hamming; + map<pair<NFAVertex, u32>, NFAVertex> shadow_map; + map<pair<NFAVertex, u32>, NFAVertex> helper_map; + map<NFAVertex, NFAVertex> clones; + // edge creation is deferred + vector<pair<NFAVertex, NFAVertex>> edges_to_be_added; + flat_set<NFAVertex> orig; + + ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in) + : g(g_in), edit_distance(ed_in), hamming(hamm_in) {} + + void fuzz_graph() { + if (edit_distance == 0) { + return; + } + + DEBUG_PRINTF("edit distance = %u hamming = %s\n", edit_distance, + hamming ? "true" : "false"); + + // step 1: prepare the vertices, helpers and shadows according to + // the original graph + prepare_graph(); + + // step 2: add shadow and helper nodes + build_shadow_graph(); + + // step 3: set up reports for newly created vertices (and make clones + // if necessary) + if (!hamming) { + create_reports(); + } + + // step 4: wire up shadow graph and helpers for insert/replace/remove + connect_shadow_graph(); + + // step 5: commit all the edge wirings + DEBUG_PRINTF("Committing edge wirings\n"); + for (const auto &p : edges_to_be_added) { + add_edge_if_not_present(p.first, p.second, g); + } + + DEBUG_PRINTF("Done!\n"); + } + +private: + const NFAVertex& get_clone(const NFAVertex &v) { + return contains(clones, v) ? + clones[v] : v; + } + + void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { + const NFAVertex &clone_u = get_clone(u); + const NFAVertex &clone_v = get_clone(v); + + edges_to_be_added.emplace_back(u, v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index); + + // do not connect clones to accepts, we do it during cloning + if (is_any_accept(clone_v, g)) { + return; + } + edges_to_be_added.emplace_back(clone_u, clone_v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index, + g[clone_v].index); + } + + void prepare_graph() { + DEBUG_PRINTF("Building shadow graphs\n"); + + for (auto v : vertices_range(g)) { + // all level 0 vertices are their own helpers and their own shadows + helper_map[make_pair(v, 0)] = v; + shadow_map[make_pair(v, 0)] = v; + + // find special nodes + if (is_any_accept(v, g)) { + DEBUG_PRINTF("Node %zu is a special node\n", g[v].index); + for (unsigned edit = 1; edit <= edit_distance; edit++) { + // all accepts are their own shadows and helpers at all + // levels + shadow_map[make_pair(v, edit)] = v; + helper_map[make_pair(v, edit)] = v; + } + continue; + } + DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index); + orig.insert(v); + } + } + + void build_shadow_graph() { + for (auto v : orig) { + DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n", + g[v].index); + for (unsigned dist = 1; dist <= edit_distance; dist++) { + auto shadow_v = v; + + // start and startDs cannot have shadows but do have helpers + if (!is_any_start(v, g)) { + shadow_v = clone_vertex(g, v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[shadow_v].index, dist); + } + shadow_map[make_pair(v, dist)] = shadow_v; + + // if there's nowhere to go from this vertex, no helper needed + if (proper_out_degree(v, g) < 1) { + DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", + g[shadow_v].index, dist); + helper_map[make_pair(v, dist)] = shadow_v; + continue; + } + + // start and startDs only have helpers for insert, so not Hamming + if (hamming && is_any_start(v, g)) { + DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", + g[shadow_v].index, dist); + helper_map[make_pair(v, dist)] = shadow_v; + continue; + } + + auto helper_v = clone_vertex(g, v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[helper_v].index, dist); + + // this is a helper, so make it a dot + g[helper_v].char_reach = CharReach::dot(); + // do not copy virtual start's assert flags + if (is_virtual_start(v, g)) { + DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n", + g[helper_v].index, dist); + g[helper_v].assert_flags = 0; + } + helper_map[make_pair(v, dist)] = helper_v; + } + } + } + + // wire up successors according to the original graph, wire helpers + // to shadow successors (insert/replace) + void connect_succs(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + // multiple insert + if (!hamming && dist > 1) { + const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + + for (auto orig_dst : adjacent_vertices_range(v, g)) { + const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; + + connect_to_clones(cur_shadow_v, shadow_dst); + + // ignore startDs for insert/replace + if (orig_dst == g.startDs) { + continue; + } + + connect_to_clones(cur_shadow_helper, shadow_dst); + } + } + + // wire up predecessors according to the original graph, wire + // predecessors to helpers (replace), wire predecessor helpers to + // helpers (multiple replace) + void connect_preds(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore edges from start to startDs + if (v == g.startDs && orig_src == g.start) { + continue; + } + // ignore self-loops for replace + if (orig_src != v) { + // do not wire a replace node for start vertices if we + // have a virtual start + if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { + continue; + } + + if (dist) { + const auto &prev_level_src = + shadow_map[make_pair(orig_src, dist - 1)]; + const auto &prev_level_helper = + helper_map[make_pair(orig_src, dist - 1)]; + + connect_to_clones(prev_level_src, cur_shadow_helper); + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + } + // wire predecessor according to original graph + const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; + + connect_to_clones(shadow_src, cur_shadow_v); + } + } + + // wire up previous level helper to current shadow (insert) + void connect_helpers(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; + + connect_to_clones(prev_level_v, cur_shadow_helper); + } + + /* + * wiring edges for removal is a special case. + * + * when wiring edges for removal, as well as wiring up immediate + * predecessors to immediate successors, we also need to wire up more + * distant successors to their respective shadow graph levels. + * + * for example, consider graph start->a->b->c->d->accept. + * + * at edit distance 1, we need remove edges start->b, a->c, b->d, and + * c->accept, all going from original graph (level 0) to shadow graph + * level 1. + * + * at edit distance 2, we also need edges start->c, a->d and b->accept, + * all going from level 0 to shadow graph level 2. + * + * this is propagated to all shadow levels; that is, given edit + * distance 3, we will have edges from shadow levels 0->1, 0->2, + * 0->3, 1->2, 1->3, and 2->3. + * + * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, + * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. + * + * we also have to wire helpers to their removal successors, to + * accommodate for a replace followed by a remove, on all shadow levels. + * + * and finally, we also have to wire source shadows into removal + * successor helpers on a level above, to accommodate for a remove + * followed by a replace. + */ + void connect_removals(NFAVertex v) { + DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index); + + // vertices returned by this function don't include self-loops + auto dst_vertices_by_depth = + gatherSuccessorsByDepth(g, v, edit_distance); + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore self-loops + if (orig_src == v) { + continue; + } + for (unsigned step = 1; step <= edit_distance; step++) { + for (unsigned dist = step; dist <= edit_distance; dist++) { + auto &dst_vertices = dst_vertices_by_depth[step - 1]; + for (auto &orig_dst : dst_vertices) { + const auto &shadow_src = + shadow_map[make_pair(orig_src, dist - step)]; + const auto &shadow_helper = + helper_map[make_pair(orig_src, dist - step)]; + const auto &shadow_dst = + shadow_map[make_pair(orig_dst, dist)]; + + // removal + connect_to_clones(shadow_src, shadow_dst); + + // removal from helper vertex + connect_to_clones(shadow_helper, shadow_dst); + + // removal into helper, requires additional edit + if ((dist + 1) <= edit_distance) { + const auto &next_level_helper = + helper_map[make_pair(orig_dst, dist + 1)]; + + connect_to_clones(shadow_src, next_level_helper); + } + } + } + } + } + } + + void connect_shadow_graph() { + DEBUG_PRINTF("Wiring up the graph\n"); + + for (auto v : orig) { + + DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index); + + for (unsigned dist = 0; dist <= edit_distance; dist++) { + + // handle insert/replace + connect_succs(v, dist); + + // handle replace/multiple insert + connect_preds(v, dist); + + // handle helpers + if (!hamming && dist > 0) { + connect_helpers(v, dist); + } + } + + // handle removals + if (!hamming) { + connect_removals(v); + } + } + } + + void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) { + for (auto dst : targets) { + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index, + g[dst].index); + edges_to_be_added.emplace_back(src, dst); + } + } + + // create a clone of the vertex, but overwrite its report set + void create_clone(NFAVertex v, const flat_set<ReportID> &reports, + unsigned max_edit_distance, + const flat_set<NFAVertex> &targets) { + // some vertices may have the same reports, but different successors; + // therefore, we may need to connect them multiple times, but still only + // clone once + bool needs_cloning = !contains(clones, v); + + DEBUG_PRINTF("Cloning node %zu\n", g[v].index); + // go through all shadows and helpers, including + // original vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + + NFAVertex new_shadow_v, new_helper_v; + + // make sure we don't clone the same vertex twice + if (needs_cloning) { + new_shadow_v = clone_vertex(g, shadow_v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[new_shadow_v].index, d); + clones[shadow_v] = new_shadow_v; + } else { + new_shadow_v = clones[shadow_v]; + } + g[new_shadow_v].reports = reports; + + connect_to_targets(new_shadow_v, targets); + + if (shadow_v == helper_v) { + continue; + } + if (needs_cloning) { + new_helper_v = clone_vertex(g, helper_v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[new_helper_v].index, d); + clones[helper_v] = new_helper_v; + } else { + new_helper_v = clones[helper_v]; + } + g[new_helper_v].reports = reports; + + connect_to_targets(new_helper_v, targets); + } + } + + void write_reports(NFAVertex v, const flat_set<ReportID> &reports, + unsigned max_edit_distance, + const flat_set<NFAVertex> &targets) { + // we're overwriting reports, but we're not losing any + // information as we already cached all the different report + // sets, so vertices having different reports will be cloned and set up + // with the correct report set + + // go through all shadows and helpers, including original + // vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + DEBUG_PRINTF("Setting up reports for shadow node: %zu " + "(level %u)\n", + g[shadow_v].index, d); + DEBUG_PRINTF("Setting up reports for helper node: %zu " + "(level %u)\n", + g[helper_v].index, d); + g[shadow_v].reports = reports; + g[helper_v].reports = reports; + + connect_to_targets(shadow_v, targets); + connect_to_targets(helper_v, targets); + } + } + + /* + * we may have multiple report sets per graph. that means, whenever we + * construct additional paths through the graph (alternations, removals), we + * have to account for the fact that some vertices are predecessors to + * vertices with different report sets. + * + * whenever that happens, we have to clone the paths for both report sets, + * and set up these new vertices with their respective report sets as well. + * + * in order to do that, we first have to get all the predecessors for accept + * and acceptEod vertices. then, go through them one by one, and take note + * of the report lists. the first report set we find, wins, the rest we + * clone. + * + * we also have to do this in two passes, because there may be vertices that + * are predecessors to vertices with different report sets, so to avoid + * overwriting reports we will be caching reports info instead. + */ + void create_reports() { + map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices; + flat_set<NFAVertex> accepts{g.accept, g.acceptEod}; + + // gather reports info from all vertices connected to accept + for (auto accept : accepts) { + for (auto src : inv_adjacent_vertices_range(accept, g)) { + // skip special vertices + if (is_special(src, g)) { + continue; + } + reports_to_vertices[g[src].reports].insert(src); + } + } + + // we expect to see at most two report sets + assert(reports_to_vertices.size() > 0 && + reports_to_vertices.size() <= 2); + + // set up all reports + bool clone = false; + for (auto &pair : reports_to_vertices) { + const auto &reports = pair.first; + const auto &vertices = pair.second; + + for (auto src : vertices) { + // get all predecessors up to edit distance + auto src_vertices_by_depth = + gatherPredecessorsByDepth(g, src, edit_distance); + + // find which accepts source vertex connects to + flat_set<NFAVertex> targets; + for (const auto &accept : accepts) { + NFAEdge e = edge(src, accept, g); + if (e) { + targets.insert(accept); + } + } + assert(targets.size()); + + for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { + const auto &preds = src_vertices_by_depth[d]; + for (auto v : preds) { + // only clone a node if it already contains reports + if (clone && !g[v].reports.empty()) { + create_clone(v, reports, edit_distance - d, + targets); + } else { + write_reports(v, reports, edit_distance - d, + targets); + } + } + } + } + // clone vertices only if it's not our first report set + clone = true; + } + } +}; + +// check if we will edit our way into a vacuous pattern +static +bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { + auto depths = calcRevDepths(g); + + depth min_depth = depth::infinity(); + auto idx = g[g.start].index; + + // check distance from start to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + idx = g[g.startDs].index; + + // check distance from startDs to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + assert(min_depth.is_finite()); + + // now, check if we can edit our way into a vacuous pattern + if (min_depth <= (u64a) edit_distance + 1) { + DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n"); + return true; + } + return false; +} + +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, + bool utf8, const Grey &grey) { + if (edit_distance == 0) { + return; + } + if (!grey.allowApproximateMatching) { + throw CompileError("Approximate matching is disabled."); + } + if (edit_distance > grey.maxEditDistance) { + throw CompileError("Edit distance is too big."); + } + if (utf8) { + throw CompileError("UTF-8 is disallowed for approximate matching."); + } + // graph isn't fuzzable if there are edge assertions anywhere in the graph + for (auto e : edges_range(g)) { + if (g[e].assert_flags) { + throw CompileError("Zero-width assertions are disallowed for " + "approximate matching."); + } + } + if (!hamming && will_turn_vacuous(g, edit_distance)) { + throw CompileError("Approximate matching patterns that reduce to " + "vacuous patterns are disallowed."); + } +} + +void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, + const Grey &grey) { + if (edit_distance == 0) { + return; + } + + assert(grey.allowApproximateMatching); + assert(grey.maxEditDistance >= edit_distance); + + ShadowGraph sg(g, edit_distance, hamming); + sg.fuzz_graph(); + + // For safety, enforce limit on actual vertex count. + if (num_vertices(g) > grey.limitApproxMatchingVertices) { + DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g), + grey.limitApproxMatchingVertices); + throw ResourceLimitError(); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.h b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.h index a99767d87c..4093768cd6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.h @@ -1,49 +1,49 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Graph fuzzer for approximate matching - */ - -#ifndef NG_FUZZY_H -#define NG_FUZZY_H - -#include "ue2common.h" - -namespace ue2 { -struct Grey; -class NGHolder; -class ReportManager; - -void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, - bool utf8, const Grey &grey); - -void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, const Grey &grey); -} - -#endif // NG_FUZZY_H +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Graph fuzzer for approximate matching + */ + +#ifndef NG_FUZZY_H +#define NG_FUZZY_H + +#include "ue2common.h" + +namespace ue2 { +struct Grey; +class NGHolder; +class ReportManager; + +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, + bool utf8, const Grey &grey); + +void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, const Grey &grey); +} + +#endif // NG_FUZZY_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp index 8054544772..26889f8946 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp @@ -40,12 +40,12 @@ #include "util/bitfield.h" #include "util/container.h" #include "util/determinise.h" -#include "util/flat_containers.h" -#include "util/graph.h" +#include "util/flat_containers.h" +#include "util/graph.h" #include "util/graph_range.h" -#include "util/hash_dynamic_bitset.h" +#include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <functional> @@ -70,15 +70,15 @@ struct haig_too_wide { template<typename stateset> static -void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, +void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, stateset *init, stateset *initDS, vector<NFAVertex> *v_by_index) { DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str()); for (auto v : vertices_range(g)) { - if (contains(unused, v)) { + if (contains(unused, v)) { continue; } - u32 v_index = g[v].index; + u32 v_index = g[v].index; if (is_any_start(v, g)) { init->set(v_index); if (hasSelfLoop(v, g) || is_triggered(g)) { @@ -90,11 +90,11 @@ void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, } v_by_index->clear(); - v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); + v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); for (auto v : vertices_range(g)) { u32 v_index = g[v].index; - assert((*v_by_index)[v_index] == NGHolder::null_vertex()); + assert((*v_by_index)[v_index] == NGHolder::null_vertex()); (*v_by_index)[v_index] = v; } } @@ -112,29 +112,29 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { } } -template<typename Automaton_Traits> +template<typename Automaton_Traits> class Automaton_Base { -public: - using StateSet = typename Automaton_Traits::StateSet; - using StateMap = typename Automaton_Traits::StateMap; - +public: + using StateSet = typename Automaton_Traits::StateSet; + using StateMap = typename Automaton_Traits::StateMap; + protected: - Automaton_Base(const NGHolder &graph_in, som_type som, - const vector<vector<CharReach>> &triggers, - bool unordered_som) - : graph(graph_in), numStates(num_vertices(graph)), - unused(getRedundantStarts(graph_in)), - init(Automaton_Traits::init_states(numStates)), - initDS(Automaton_Traits::init_states(numStates)), - squash(Automaton_Traits::init_states(numStates)), - accept(Automaton_Traits::init_states(numStates)), - acceptEod(Automaton_Traits::init_states(numStates)), - toppable(Automaton_Traits::init_states(numStates)), - dead(Automaton_Traits::init_states(numStates)) { + Automaton_Base(const NGHolder &graph_in, som_type som, + const vector<vector<CharReach>> &triggers, + bool unordered_som) + : graph(graph_in), numStates(num_vertices(graph)), + unused(getRedundantStarts(graph_in)), + init(Automaton_Traits::init_states(numStates)), + initDS(Automaton_Traits::init_states(numStates)), + squash(Automaton_Traits::init_states(numStates)), + accept(Automaton_Traits::init_states(numStates)), + acceptEod(Automaton_Traits::init_states(numStates)), + toppable(Automaton_Traits::init_states(numStates)), + dead(Automaton_Traits::init_states(numStates)) { calculateAlphabet(graph, alpha, unalpha, &alphasize); assert(alphasize <= ALPHABET_SIZE); - populateInit(graph, unused, &init, &initDS, &v_by_index); + populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; @@ -146,8 +146,8 @@ protected: start_floating = DEAD_STATE; } - cr_by_index = populateCR(graph, v_by_index, alpha); - + cr_by_index = populateCR(graph, v_by_index, alpha); + if (!unordered_som) { for (const auto &sq : findSquashers(graph, som)) { NFAVertex v = sq.first; @@ -158,16 +158,16 @@ protected: } if (is_triggered(graph)) { - dynamic_bitset<> temp(numStates); - markToppableStarts(graph, unused, false, triggers, &temp); - toppable = Automaton_Traits::copy_states(temp, numStates); + dynamic_bitset<> temp(numStates); + markToppableStarts(graph, unused, false, triggers, &temp); + toppable = Automaton_Traits::copy_states(temp, numStates); } } private: // Convert an NFAStateSet (as used by the squash code) into a StateSet. StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out = Automaton_Traits::init_states(numStates); + StateSet out = Automaton_Traits::init_states(numStates); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); @@ -175,24 +175,24 @@ private: return out; } - void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { - StateSet acc = in & (eod ? acceptEod : accept); - for (size_t i = acc.find_first(); i != StateSet::npos; - i = acc.find_next(i)) { - NFAVertex v = v_by_index[i]; - DEBUG_PRINTF("marking report\n"); - const auto &my_reports = graph[v].reports; - rv.insert(my_reports.begin(), my_reports.end()); - } - } - + void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { + StateSet acc = in & (eod ? acceptEod : accept); + for (size_t i = acc.find_first(); i != StateSet::npos; + i = acc.find_next(i)) { + NFAVertex v = v_by_index[i]; + DEBUG_PRINTF("marking report\n"); + const auto &my_reports = graph[v].reports; + rv.insert(my_reports.begin(), my_reports.end()); + } + } + public: void transition(const StateSet &in, StateSet *next) { transition_graph(*this, v_by_index, in, next); } const vector<StateSet> initial() { - vector<StateSet> rv = {init}; + vector<StateSet> rv = {init}; if (start_floating != DEAD_STATE && start_floating != start_anchored) { rv.push_back(initDS); } @@ -202,27 +202,27 @@ public: void reports(const StateSet &in, flat_set<ReportID> &rv) { reports_i(in, false, rv); } - + void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { reports_i(in, true, rv); } - static bool canPrune(const flat_set<ReportID> &) { return false; } - - const NGHolder &graph; - const u32 numStates; - const flat_set<NFAVertex> unused; - - array<u16, ALPHABET_SIZE> alpha; - array<u16, ALPHABET_SIZE> unalpha; - u16 alphasize; - - set<dstate_id_t> done_a; - set<dstate_id_t> done_b; - - u16 start_anchored; - u16 start_floating; - + static bool canPrune(const flat_set<ReportID> &) { return false; } + + const NGHolder &graph; + const u32 numStates; + const flat_set<NFAVertex> unused; + + array<u16, ALPHABET_SIZE> alpha; + array<u16, ALPHABET_SIZE> unalpha; + u16 alphasize; + + set<dstate_id_t> done_a; + set<dstate_id_t> done_b; + + u16 start_anchored; + u16 start_floating; + vector<NFAVertex> v_by_index; vector<CharReach> cr_by_index; /* pre alpha'ed */ StateSet init; @@ -236,58 +236,58 @@ public: StateSet dead; }; -struct Big_Traits { - using StateSet = dynamic_bitset<>; - using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; - - static StateSet init_states(u32 num) { - return StateSet(num); - } - - static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { - assert(in.size() == num); - return in; - } -}; - -class Automaton_Big : public Automaton_Base<Big_Traits> { -public: - Automaton_Big(const NGHolder &graph_in, som_type som, - const vector<vector<CharReach>> &triggers, bool unordered_som) - : Automaton_Base(graph_in, som, triggers, unordered_som) {} -}; - -struct Graph_Traits { - using StateSet = bitfield<NFA_STATE_LIMIT>; - using StateMap = unordered_map<StateSet, dstate_id_t>; - - static StateSet init_states(UNUSED u32 num) { - assert(num <= NFA_STATE_LIMIT); - return StateSet(); - } - - static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { - StateSet out = init_states(num); +struct Big_Traits { + using StateSet = dynamic_bitset<>; + using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; + + static StateSet init_states(u32 num) { + return StateSet(num); + } + + static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { + assert(in.size() == num); + return in; + } +}; + +class Automaton_Big : public Automaton_Base<Big_Traits> { +public: + Automaton_Big(const NGHolder &graph_in, som_type som, + const vector<vector<CharReach>> &triggers, bool unordered_som) + : Automaton_Base(graph_in, som, triggers, unordered_som) {} +}; + +struct Graph_Traits { + using StateSet = bitfield<NFA_STATE_LIMIT>; + using StateMap = unordered_map<StateSet, dstate_id_t>; + + static StateSet init_states(UNUSED u32 num) { + assert(num <= NFA_STATE_LIMIT); + return StateSet(); + } + + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { + StateSet out = init_states(num); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); } return out; } -}; +}; -class Automaton_Graph : public Automaton_Base<Graph_Traits> { +class Automaton_Graph : public Automaton_Base<Graph_Traits> { public: - Automaton_Graph(const NGHolder &graph_in, som_type som, - const vector<vector<CharReach>> &triggers, - bool unordered_som) - : Automaton_Base(graph_in, som, triggers, unordered_som) {} + Automaton_Graph(const NGHolder &graph_in, som_type som, + const vector<vector<CharReach>> &triggers, + bool unordered_som) + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; class Automaton_Haig_Merge { public: - using StateSet = vector<u16>; - using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; + using StateSet = vector<u16>; + using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in) : nfas(in.begin(), in.end()), dead(in.size()) { @@ -430,10 +430,10 @@ bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { } static -s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused, +s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused, NFAVertex v) { if (is_triggered(g) && v == g.start) { - assert(!contains(unused, v)); + assert(!contains(unused, v)); } else if (is_any_start_inc_virtual(v, g)) { return CREATE_NEW_SOM; } @@ -451,7 +451,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, NFAVertex v = state_mapping[i]; s32 slot_id = g[v].index; - DEBUG_PRINTF("d vertex %zu\n", g[v].index); + DEBUG_PRINTF("d vertex %zu\n", g[v].index); vector<u32> &out_map = preds[slot_id]; for (auto u : inv_adjacent_vertices_range(v, g)) { out_map.push_back(g[u].index); @@ -464,7 +464,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, template<typename stateset> static -void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused, +void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused, NFAVertex accept_v, const stateset &source_nfa_states, const vector<NFAVertex> &state_mapping, set<som_report> &out) { @@ -475,7 +475,7 @@ void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused, continue; } for (ReportID report_id : g[v].reports) { - out.insert(som_report(report_id, getSlotID(g, unused, v))); + out.insert(som_report(report_id, getSlotID(g, unused, v))); } } } @@ -492,7 +492,7 @@ void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { for (auto v : vertices_range(g)) { if (is_any_start_inc_virtual(v, g)) { - DEBUG_PRINTF("%zu creates new som value\n", g[v].index); + DEBUG_PRINTF("%zu creates new som value\n", g[v].index); out->emplace(g[v].index, 0U); continue; } @@ -503,7 +503,7 @@ void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { const DepthMinMax &d = depths[g[v].index]; if (d.min == d.max && d.min.is_finite()) { - DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min); + DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min); out->emplace(g[v].index, d.min); } } @@ -511,16 +511,16 @@ void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { template<class Auto> static -bool doHaig(const NGHolder &g, som_type som, - const vector<vector<CharReach>> &triggers, bool unordered_som, - raw_som_dfa *rdfa) { +bool doHaig(const NGHolder &g, som_type som, + const vector<vector<CharReach>> &triggers, bool unordered_som, + raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ - using StateSet = typename Auto::StateSet; + using StateSet = typename Auto::StateSet; vector<StateSet> nfa_state_map; - Auto n(g, som, triggers, unordered_som); + Auto n(g, som, triggers, unordered_som); try { - if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { + if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); return false; } @@ -548,9 +548,9 @@ bool doHaig(const NGHolder &g, som_type som, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -559,10 +559,10 @@ bool doHaig(const NGHolder &g, som_type som, return true; } -unique_ptr<raw_som_dfa> -attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, - const vector<vector<CharReach>> &triggers, const Grey &grey, - bool unordered_som) { +unique_ptr<raw_som_dfa> +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const vector<vector<CharReach>> &triggers, const Grey &grey, + bool unordered_som) { assert(is_triggered(g) != triggers.empty()); assert(!unordered_som || is_triggered(g)); @@ -588,11 +588,11 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som, + rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get()); + rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get()); } if (!rv) { @@ -722,14 +722,14 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df } } - using StateSet = Automaton_Haig_Merge::StateSet; + using StateSet = Automaton_Haig_Merge::StateSet; vector<StateSet> nfa_state_map; auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som, NODE_START, dfas[0]->stream_som_loc_width); - if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { - DEBUG_PRINTF("state limit (%u) exceeded\n", limit); + if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { + DEBUG_PRINTF("state limit (%u) exceeded\n", limit); return nullptr; /* over state limit */ } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h index baff2f5866..a299ddb377 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h @@ -54,10 +54,10 @@ struct raw_som_dfa; * between) */ -std::unique_ptr<raw_som_dfa> -attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, - const std::vector<std::vector<CharReach>> &triggers, - const Grey &grey, bool unordered_som_triggers = false); +std::unique_ptr<raw_som_dfa> +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const std::vector<std::vector<CharReach>> &triggers, + const Grey &grey, bool unordered_som_triggers = false); std::unique_ptr<raw_som_dfa> attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas, diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp index a2fbb28863..85d3c03f0e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,27 +36,27 @@ using namespace std; namespace ue2 { // internal use only -static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) { - NFAVertex v(add_vertex(g)); +static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) { + NFAVertex v(add_vertex(g)); g[v].index = id; return v; } NGHolder::NGHolder(nfa_kind k) - : kind (k), + : kind (k), // add initial special nodes - start(addSpecialVertex(*this, NODE_START)), - startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)), - accept(addSpecialVertex(*this, NODE_ACCEPT)), - acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) { + start(addSpecialVertex(*this, NODE_START)), + startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)), + accept(addSpecialVertex(*this, NODE_ACCEPT)), + acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) { // wire up some fake edges for the stylized bits of the NFA add_edge(start, startDs, *this); add_edge(startDs, startDs, *this); add_edge(accept, acceptEod, *this); - (*this)[start].char_reach.setall(); - (*this)[startDs].char_reach.setall(); + (*this)[start].char_reach.setall(); + (*this)[startDs].char_reach.setall(); } NGHolder::~NGHolder(void) { @@ -64,7 +64,7 @@ NGHolder::~NGHolder(void) { } void clear_graph(NGHolder &h) { - NGHolder::vertex_iterator vi, ve; + NGHolder::vertex_iterator vi, ve; for (tie(vi, ve) = vertices(h); vi != ve;) { NFAVertex v = *vi; ++vi; @@ -76,8 +76,8 @@ void clear_graph(NGHolder &h) { } assert(num_vertices(h) == N_SPECIALS); - renumber_vertices(h); /* ensure that we reset our next allocated index */ - renumber_edges(h); + renumber_vertices(h); /* ensure that we reset our next allocated index */ + renumber_edges(h); // Recreate special stylised edges. add_edge(h.start, h.startDs, h); @@ -87,11 +87,11 @@ void clear_graph(NGHolder &h) { NFAVertex NGHolder::getSpecialVertex(u32 id) const { switch (id) { - case NODE_START: return start; - case NODE_START_DOTSTAR: return startDs; - case NODE_ACCEPT: return accept; - case NODE_ACCEPT_EOD: return acceptEod; - default: return null_vertex(); + case NODE_START: return start; + case NODE_START_DOTSTAR: return startDs; + case NODE_ACCEPT: return accept; + case NODE_ACCEPT_EOD: return acceptEod; + default: return null_vertex(); } } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h index 36cf62447b..9281a76f30 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,75 +26,75 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Definition of the NGHolder type used for to represent general nfa - * graphs as well as all associated types (vertex and edge properties, etc). - * - * The NGHolder also contains the special vertices used to represents starts and - * accepts. - */ - +/** \file + * \brief Definition of the NGHolder type used for to represent general nfa + * graphs as well as all associated types (vertex and edge properties, etc). + * + * The NGHolder also contains the special vertices used to represents starts and + * accepts. + */ + #ifndef NG_HOLDER_H #define NG_HOLDER_H #include "ue2common.h" #include "nfa/nfa_kind.h" -#include "util/charreach.h" -#include "util/flat_containers.h" -#include "util/ue2_graph.h" +#include "util/charreach.h" +#include "util/flat_containers.h" +#include "util/ue2_graph.h" namespace ue2 { -/** \brief Properties associated with each vertex in an NFAGraph. */ -struct NFAGraphVertexProps { - /** \brief Set of characters on which this vertex is reachable. */ - CharReach char_reach; - - /** \brief Set of reports raised by this vertex. */ - flat_set<ReportID> reports; - - /** \brief Unique index for this vertex, used for BGL algorithms. */ - size_t index = 0; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -/** \brief Properties associated with each edge in an NFAGraph. */ -struct NFAGraphEdgeProps { - /** \brief Unique index for this edge, used for BGL algorithms. */ - size_t index = 0; - - /** \brief For graphs that will be implemented as multi-top engines, this - * specifies the top events. Only used on edges from the start vertex. */ - flat_set<u32> tops; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -/** \brief vertex_index values for special nodes in the NFAGraph. */ -enum SpecialNodes { - /** \brief Anchored start vertex. WARNING: this may be triggered at various - * locations (not just zero) for triggered graphs. */ - NODE_START, - - /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a - * proper self-loop. */ - NODE_START_DOTSTAR, - - /** \brief Accept vertex. All vertices that can match at arbitrary offsets - * must have an edge to this vertex. */ - NODE_ACCEPT, - - /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only - * must have an edge to this vertex. */ - NODE_ACCEPT_EOD, - - /** \brief Sentinel, number of special vertices. */ - N_SPECIALS -}; - +/** \brief Properties associated with each vertex in an NFAGraph. */ +struct NFAGraphVertexProps { + /** \brief Set of characters on which this vertex is reachable. */ + CharReach char_reach; + + /** \brief Set of reports raised by this vertex. */ + flat_set<ReportID> reports; + + /** \brief Unique index for this vertex, used for BGL algorithms. */ + size_t index = 0; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief Properties associated with each edge in an NFAGraph. */ +struct NFAGraphEdgeProps { + /** \brief Unique index for this edge, used for BGL algorithms. */ + size_t index = 0; + + /** \brief For graphs that will be implemented as multi-top engines, this + * specifies the top events. Only used on edges from the start vertex. */ + flat_set<u32> tops; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief vertex_index values for special nodes in the NFAGraph. */ +enum SpecialNodes { + /** \brief Anchored start vertex. WARNING: this may be triggered at various + * locations (not just zero) for triggered graphs. */ + NODE_START, + + /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a + * proper self-loop. */ + NODE_START_DOTSTAR, + + /** \brief Accept vertex. All vertices that can match at arbitrary offsets + * must have an edge to this vertex. */ + NODE_ACCEPT, + + /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only + * must have an edge to this vertex. */ + NODE_ACCEPT_EOD, + + /** \brief Sentinel, number of special vertices. */ + N_SPECIALS +}; + /** \brief Encapsulates an NFAGraph, stores special vertices and other * metadata. * @@ -105,31 +105,31 @@ enum SpecialNodes { * - (startDs, startDs) (self-loop) * - (accept, acceptEod) */ -class NGHolder : public ue2_graph<NGHolder, NFAGraphVertexProps, - NFAGraphEdgeProps> { +class NGHolder : public ue2_graph<NGHolder, NFAGraphVertexProps, + NFAGraphEdgeProps> { public: explicit NGHolder(nfa_kind kind); - NGHolder(void) : NGHolder(NFA_OUTFIX) {}; + NGHolder(void) : NGHolder(NFA_OUTFIX) {}; virtual ~NGHolder(void); - nfa_kind kind; /* Role that this plays in Rose */ + nfa_kind kind; /* Role that this plays in Rose */ - static const size_t N_SPECIAL_VERTICES = N_SPECIALS; -public: - const vertex_descriptor start; //!< Anchored start vertex. - const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex. - const vertex_descriptor accept; //!< Accept vertex. - const vertex_descriptor acceptEod; //!< Accept at EOD vertex. + static const size_t N_SPECIAL_VERTICES = N_SPECIALS; +public: + const vertex_descriptor start; //!< Anchored start vertex. + const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex. + const vertex_descriptor accept; //!< Accept vertex. + const vertex_descriptor acceptEod; //!< Accept at EOD vertex. - vertex_descriptor getSpecialVertex(u32 id) const; -}; + vertex_descriptor getSpecialVertex(u32 id) const; +}; -typedef NGHolder::vertex_descriptor NFAVertex; -typedef NGHolder::edge_descriptor NFAEdge; +typedef NGHolder::vertex_descriptor NFAVertex; +typedef NGHolder::edge_descriptor NFAEdge; /** \brief True if the vertex \p v is one of our special vertices. */ template <typename GraphT> -bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) { +bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) { return g[v].index < N_SPECIALS; } @@ -167,8 +167,8 @@ void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - renumber_edges(h); - renumber_vertices(h); + renumber_edges(h); + renumber_vertices(h); } } @@ -203,12 +203,12 @@ void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - renumber_edges(h); + renumber_edges(h); } } -#define DEFAULT_TOP 0U - +#define DEFAULT_TOP 0U + /** \brief Clear and remove all of the edges pointed to by the edge descriptors * in the given container. * @@ -219,26 +219,26 @@ void remove_edges(const Container &c, NGHolder &h, bool renumber = true) { remove_edges(c.begin(), c.end(), h, renumber); } -inline +inline bool is_triggered(const NGHolder &g) { return is_triggered(g.kind); } -inline +inline bool generates_callbacks(const NGHolder &g) { return generates_callbacks(g.kind); } - -inline -bool has_managed_reports(const NGHolder &g) { - return has_managed_reports(g.kind); -} - -inline -bool inspects_states_for_accepts(const NGHolder &g) { - return inspects_states_for_accepts(g.kind); -} - + +inline +bool has_managed_reports(const NGHolder &g) { + return has_managed_reports(g.kind); +} + +inline +bool inspects_states_for_accepts(const NGHolder &g) { + return inspects_states_for_accepts(g.kind); +} + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp index 35a09d0ea2..72b26b6f80 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,7 +39,7 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -73,28 +73,28 @@ private: ReportID a_rep; ReportID b_rep; }; - -/** Comparison functor used to sort by vertex_index. */ -template<typename Graph> -struct VertexIndexOrdering { - explicit VertexIndexOrdering(const Graph &g_in) : g(g_in) {} - bool operator()(typename Graph::vertex_descriptor a, - typename Graph::vertex_descriptor b) const { - assert(a == b || g[a].index != g[b].index); - return g[a].index < g[b].index; - } -private: - const Graph &g; -}; - -template<typename Graph> -static -VertexIndexOrdering<Graph> make_index_ordering(const Graph &g) { - return VertexIndexOrdering<Graph>(g); -} - + +/** Comparison functor used to sort by vertex_index. */ +template<typename Graph> +struct VertexIndexOrdering { + explicit VertexIndexOrdering(const Graph &g_in) : g(g_in) {} + bool operator()(typename Graph::vertex_descriptor a, + typename Graph::vertex_descriptor b) const { + assert(a == b || g[a].index != g[b].index); + return g[a].index < g[b].index; + } +private: + const Graph &g; +}; + +template<typename Graph> +static +VertexIndexOrdering<Graph> make_index_ordering(const Graph &g) { + return VertexIndexOrdering<Graph>(g); } +} + static bool is_equal_i(const NGHolder &a, const NGHolder &b, const check_report &check_rep) { @@ -125,7 +125,7 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, for (size_t i = 0; i < vert_a.size(); i++) { NFAVertex va = vert_a[i]; NFAVertex vb = vert_b[i]; - DEBUG_PRINTF("vertex %zu\n", a[va].index); + DEBUG_PRINTF("vertex %zu\n", a[va].index); // Vertex index must be the same. if (a[va].index != b[vb].index) { @@ -169,14 +169,14 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, } /* check top for edges out of start */ - vector<pair<u32, flat_set<u32>>> top_a; - vector<pair<u32, flat_set<u32>>> top_b; + vector<pair<u32, flat_set<u32>>> top_a; + vector<pair<u32, flat_set<u32>>> top_b; for (const auto &e : out_edges_range(a.start, a)) { - top_a.emplace_back(a[target(e, a)].index, a[e].tops); + top_a.emplace_back(a[target(e, a)].index, a[e].tops); } for (const auto &e : out_edges_range(b.start, b)) { - top_b.emplace_back(b[target(e, b)].index, b[e].tops); + top_b.emplace_back(b[target(e, b)].index, b[e].tops); } sort(top_a.begin(), top_a.end()); @@ -196,11 +196,11 @@ u64a hash_holder(const NGHolder &g) { size_t rv = 0; for (auto v : vertices_range(g)) { - hash_combine(rv, g[v].index); - hash_combine(rv, g[v].char_reach); + hash_combine(rv, g[v].index); + hash_combine(rv, g[v].char_reach); for (auto w : adjacent_vertices_range(v, g)) { - hash_combine(rv, g[w].index); + hash_combine(rv, g[w].index); } } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h index d8046270ff..4dd4fd34b7 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp index d8ba503ce6..b511e5f290 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -37,19 +37,19 @@ #include "ng_holder.h" #include "ng_repeat.h" #include "ng_reports.h" -#include "nfa/castlecompile.h" +#include "nfa/castlecompile.h" #include "nfa/lbr_internal.h" #include "nfa/nfa_internal.h" #include "nfa/repeatcompile.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" #include "util/alloc.h" #include "util/bitutils.h" // for lg2 #include "util/compile_context.h" #include "util/container.h" #include "util/depth.h" #include "util/dump_charclass.h" -#include "util/report_manager.h" +#include "util/report_manager.h" #include "util/verify_types.h" using namespace std; @@ -129,31 +129,31 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, } template <class LbrStruct> static -bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, - const depth &repeatMax) { +bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, + const depth &repeatMax) { size_t tableLen = 0; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { tableLen = sizeof(u64a) * (repeatMax + 1); } size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + tableLen + sizeof(u64a); - auto nfa = make_zeroed_bytecode_ptr<NFA>(len); + auto nfa = make_zeroed_bytecode_ptr<NFA>(len); nfa->type = verify_u8(nfa_type); nfa->length = verify_u32(len); return nfa; } static -bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { if (!cr.all()) { return nullptr; } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax); + auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax); struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax, @@ -164,9 +164,9 @@ bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin, } static -bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(~cr); if (escapes.count() != 1) { @@ -175,7 +175,7 @@ bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax); + auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -187,9 +187,9 @@ bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, } static -bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(cr); if (escapes.count() != 1) { @@ -198,7 +198,7 @@ bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax); + auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -210,18 +210,18 @@ bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, } static -bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax); + auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax); struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax, minPeriod, rtype); - if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { + if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { return nullptr; } @@ -230,27 +230,27 @@ bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin, } static -bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax); + auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax); struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax, minPeriod, rtype); - truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); + truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); DEBUG_PRINTF("built truffle lbr\n"); return nfa; } static -bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", repeatMin.str().c_str(), repeatMax.str().c_str(), describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), @@ -258,8 +258,8 @@ bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); - auto nfa = - buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); + auto nfa = + buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); if (!nfa) { nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, @@ -286,19 +286,19 @@ bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, return nfa; } -bytecode_ptr<NFA> constructLBR(const CastleProto &proto, - const vector<vector<CharReach>> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr<NFA> constructLBR(const CastleProto &proto, + const vector<vector<CharReach>> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } - if (proto.repeats.size() != 1) { - return nullptr; - } - - const PureRepeat &repeat = proto.repeats.begin()->second; + if (proto.repeats.size() != 1) { + return nullptr; + } + + const PureRepeat &repeat = proto.repeats.begin()->second; assert(!repeat.reach.none()); if (repeat.reports.size() != 1) { @@ -315,9 +315,9 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto, } ReportID report = *repeat.reports.begin(); - if (has_managed_reports(proto.kind)) { - report = rm.getProgramOffset(report); - } + if (has_managed_reports(proto.kind)) { + report = rm.getProgramOffset(report); + } DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, @@ -325,10 +325,10 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto, } /** \brief Construct an LBR engine from the given graph \p g. */ -bytecode_ptr<NFA> constructLBR(const NGHolder &g, - const vector<vector<CharReach>> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr<NFA> constructLBR(const NGHolder &g, + const vector<vector<CharReach>> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } @@ -339,11 +339,11 @@ bytecode_ptr<NFA> constructLBR(const NGHolder &g, } if (repeat.reports.size() != 1) { DEBUG_PRINTF("too many reports\n"); - return nullptr; + return nullptr; } - CastleProto proto(g.kind, repeat); - return constructLBR(proto, triggers, cc, rm); + CastleProto proto(g.kind, repeat); + return constructLBR(proto, triggers, cc, rm); } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h index c181dbb9e7..3fa67dd918 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -35,7 +35,7 @@ #define NG_LBR_H #include "ue2common.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include <memory> #include <vector> @@ -47,24 +47,24 @@ namespace ue2 { class CharReach; class NGHolder; class ReportManager; -struct CastleProto; +struct CastleProto; struct CompileContext; struct Grey; /** \brief Construct an LBR engine from the given graph \p g. */ -bytecode_ptr<NFA> +bytecode_ptr<NFA> constructLBR(const NGHolder &g, const std::vector<std::vector<CharReach>> &triggers, - const CompileContext &cc, const ReportManager &rm); + const CompileContext &cc, const ReportManager &rm); -/** - * \brief Construct an LBR engine from the given CastleProto, which should - * contain only one repeat. - */ -bytecode_ptr<NFA> -constructLBR(const CastleProto &proto, +/** + * \brief Construct an LBR engine from the given CastleProto, which should + * contain only one repeat. + */ +bytecode_ptr<NFA> +constructLBR(const CastleProto &proto, const std::vector<std::vector<CharReach>> &triggers, - const CompileContext &cc, const ReportManager &rm); + const CompileContext &cc, const ReportManager &rm); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp index 2f0a55eab9..87756df493 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp @@ -26,11 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Limex NFA construction code. */ - + #include "ng_limex.h" #include "grey.h" @@ -52,21 +52,21 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/report_manager.h" -#include "util/flat_containers.h" +#include "util/report_manager.h" +#include "util/flat_containers.h" #include "util/verify_types.h" -#include <algorithm> +#include <algorithm> #include <map> -#include <unordered_map> -#include <unordered_set> +#include <unordered_map> +#include <unordered_set> #include <vector> -#include <boost/range/adaptor/map.hpp> - +#include <boost/range/adaptor/map.hpp> + using namespace std; -using boost::adaptors::map_values; -using boost::adaptors::map_keys; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { @@ -75,39 +75,39 @@ namespace ue2 { // Only used in assertions. static bool sanityCheckGraph(const NGHolder &g, - const unordered_map<NFAVertex, u32> &state_ids) { - unordered_set<u32> seen_states; + const unordered_map<NFAVertex, u32> &state_ids) { + unordered_set<u32> seen_states; for (auto v : vertices_range(g)) { // Non-specials should have non-empty reachability. if (!is_special(v, g)) { if (g[v].char_reach.none()) { - DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index); + DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index); return false; } } - // Vertices with edges to accept or acceptEod must have reports and - // other vertices must not have them. + // Vertices with edges to accept or acceptEod must have reports and + // other vertices must not have them. if (is_match_vertex(v, g) && v != g.accept) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index); return false; } - } else if (!g[v].reports.empty()) { - DEBUG_PRINTF("vertex %zu has reports but no accept edge\n", - g[v].index); - return false; + } else if (!g[v].reports.empty()) { + DEBUG_PRINTF("vertex %zu has reports but no accept edge\n", + g[v].index); + return false; } // Participant vertices should have distinct state indices. if (!contains(state_ids, v)) { - DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index); return false; } u32 s = state_ids.at(v); if (s != NO_STATE && !seen_states.insert(s).second) { - DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s); + DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s); return false; } } @@ -117,20 +117,20 @@ bool sanityCheckGraph(const NGHolder &g, #endif static -unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g, - const vector<BoundedRepeatData> &repeats) { - auto squashMap = findSquashers(g); +unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g, + const vector<BoundedRepeatData> &repeats) { + auto squashMap = findSquashers(g); filterSquashers(g, squashMap); /* We also filter out the cyclic states representing bounded repeats, as - * they are not really cyclic -- they may turn off unexpectedly. */ + * they are not really cyclic -- they may turn off unexpectedly. */ for (const auto &br : repeats) { - if (br.repeatMax.is_finite()) { - squashMap.erase(br.cyclic); - } + if (br.repeatMax.is_finite()) { + squashMap.erase(br.cyclic); + } } - - return squashMap; + + return squashMap; } /** @@ -153,315 +153,315 @@ void dropRedundantStartEdges(NGHolder &g) { } static -CharReach calcTopVertexReach(const flat_set<u32> &tops, - const map<u32, CharReach> &top_reach) { - CharReach top_cr; - for (u32 t : tops) { +CharReach calcTopVertexReach(const flat_set<u32> &tops, + const map<u32, CharReach> &top_reach) { + CharReach top_cr; + for (u32 t : tops) { if (contains(top_reach, t)) { - top_cr |= top_reach.at(t); + top_cr |= top_reach.at(t); } else { top_cr = CharReach::dot(); - break; - } - } - return top_cr; -} - -static -NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops, - const flat_set<NFAVertex> &succs, - const map<u32, CharReach> &top_reach) { - assert(!succs.empty()); - assert(!tops.empty()); - - bool reporter = false; - - NFAVertex u = add_vertex(g[g.start], g); - CharReach top_cr = calcTopVertexReach(tops, top_reach); - g[u].char_reach = top_cr; - - for (auto v : succs) { - if (v == g.accept || v == g.acceptEod) { - reporter = true; + break; } - add_edge(u, v, g); - } - - // Only retain reports (which we copied on add_vertex above) for new top - // vertices connected to accepts. - if (!reporter) { - g[u].reports.clear(); - } - - return u; -} - -static -void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs, - const map<NFAVertex, flat_set<u32>> &succ_tops, - flat_set<u32> *picked_tops, - flat_set<NFAVertex> *picked_succs) { - /* pick top or vertex we want to handle */ - if (top_succs.size() < succ_tops.size()) { - auto best = top_succs.end(); - for (auto it = top_succs.begin(); it != top_succs.end(); ++it) { - if (best == top_succs.end() - || it->second.size() < best->second.size()) { - best = it; + } + return top_cr; +} + +static +NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops, + const flat_set<NFAVertex> &succs, + const map<u32, CharReach> &top_reach) { + assert(!succs.empty()); + assert(!tops.empty()); + + bool reporter = false; + + NFAVertex u = add_vertex(g[g.start], g); + CharReach top_cr = calcTopVertexReach(tops, top_reach); + g[u].char_reach = top_cr; + + for (auto v : succs) { + if (v == g.accept || v == g.acceptEod) { + reporter = true; + } + add_edge(u, v, g); + } + + // Only retain reports (which we copied on add_vertex above) for new top + // vertices connected to accepts. + if (!reporter) { + g[u].reports.clear(); + } + + return u; +} + +static +void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs, + const map<NFAVertex, flat_set<u32>> &succ_tops, + flat_set<u32> *picked_tops, + flat_set<NFAVertex> *picked_succs) { + /* pick top or vertex we want to handle */ + if (top_succs.size() < succ_tops.size()) { + auto best = top_succs.end(); + for (auto it = top_succs.begin(); it != top_succs.end(); ++it) { + if (best == top_succs.end() + || it->second.size() < best->second.size()) { + best = it; } } - assert(best != top_succs.end()); - assert(!best->second.empty()); /* should already been pruned */ - - *picked_tops = { best->first }; - *picked_succs = best->second; - } else { - auto best = succ_tops.end(); - for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) { - /* have to worry about determinism for this one */ - if (best == succ_tops.end() - || it->second.size() < best->second.size() - || (it->second.size() == best->second.size() - && it->second < best->second)) { - best = it; + assert(best != top_succs.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_tops = { best->first }; + *picked_succs = best->second; + } else { + auto best = succ_tops.end(); + for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) { + /* have to worry about determinism for this one */ + if (best == succ_tops.end() + || it->second.size() < best->second.size() + || (it->second.size() == best->second.size() + && it->second < best->second)) { + best = it; } } - assert(best != succ_tops.end()); - assert(!best->second.empty()); /* should already been pruned */ - - *picked_succs = { best->first }; - *picked_tops = best->second; - } -} - -static -void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs, - const map<u32, flat_set<NFAVertex>> &top_succs, - const map<NFAVertex, flat_set<u32>> &succ_tops, - flat_set<u32> &picked_tops, - flat_set<NFAVertex> &picked_succs) { - NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */ - const auto &cand_tops = succ_tops.at(v); - - for (u32 t : cand_tops) { - if (!contains(unhandled_top_succs, t)) { - continue; - } - if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) { - continue; /* not adding any useful work that hasn't already been - * done */ - } - if (!is_subset_of(picked_succs, top_succs.at(t))) { - continue; /* will not form a cbs */ - } - picked_tops.insert(t); - } -} - -static -void expandCbsBySuccs(const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, - const map<u32, flat_set<NFAVertex>> &top_succs, - const map<NFAVertex, flat_set<u32>> &succ_tops, - flat_set<u32> &picked_tops, - flat_set<NFAVertex> &picked_succs) { - u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */ - const auto &cand_succs = top_succs.at(t); - - for (NFAVertex v : cand_succs) { - if (!contains(unhandled_succ_tops, v)) { - continue; - } - if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) { - continue; /* not adding any useful work that hasn't already been - * done */ - } - if (!is_subset_of(picked_tops, succ_tops.at(v))) { - continue; /* will not form a cbs */ - } - picked_succs.insert(v); - } -} - -/* See if we can expand the complete bipartite subgraph (cbs) specified by the - * picked tops/succs by adding more to either of the tops or succs. - */ -static -void expandTopSuccCbs(const map<u32, flat_set<NFAVertex>> &top_succs, - const map<NFAVertex, flat_set<u32>> &succ_tops, - const map<u32, flat_set<NFAVertex>> &unhandled_top_succs, - const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, - flat_set<u32> &picked_tops, - flat_set<NFAVertex> &picked_succs) { - /* Note: all picked (tops|succs) are equivalent */ - - /* Try to expand first (as we are more likely to succeed) on the side - * with fewest remaining things to be handled */ - - if (unhandled_top_succs.size() < unhandled_succ_tops.size()) { - expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, - picked_tops, picked_succs); - expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, - picked_tops, picked_succs); - } else { - expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, - picked_tops, picked_succs); - expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, - picked_tops, picked_succs); - } -} - -static -void markTopSuccAsHandled(NFAVertex start_v, - const flat_set<u32> &handled_tops, - const flat_set<NFAVertex> &handled_succs, - map<u32, set<NFAVertex>> &tops_out, - map<u32, flat_set<NFAVertex>> &unhandled_top_succs, - map<NFAVertex, flat_set<u32>> &unhandled_succ_tops) { - for (u32 t : handled_tops) { - tops_out[t].insert(start_v); - assert(contains(unhandled_top_succs, t)); - erase_all(&unhandled_top_succs[t], handled_succs); - if (unhandled_top_succs[t].empty()) { - unhandled_top_succs.erase(t); - } - } - - for (NFAVertex v : handled_succs) { - assert(contains(unhandled_succ_tops, v)); - erase_all(&unhandled_succ_tops[v], handled_tops); - if (unhandled_succ_tops[v].empty()) { - unhandled_succ_tops.erase(v); - } - } -} - -static -void attemptToUseAsStart(const NGHolder &g, NFAVertex u, - const map<u32, CharReach> &top_reach, - map<u32, flat_set<NFAVertex>> &unhandled_top_succs, - map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, - map<u32, set<NFAVertex>> &tops_out) { - flat_set<u32> top_inter = unhandled_succ_tops.at(u); - flat_set<NFAVertex> succs; - for (NFAVertex v : adjacent_vertices_range(u, g)) { - if (!contains(unhandled_succ_tops, v)) { - return; - } - /* if it has vacuous reports we need to make sure that the report sets - * are the same */ - if ((v == g.accept || v == g.acceptEod) - && g[g.start].reports != g[u].reports) { - DEBUG_PRINTF("different report behaviour\n"); - return; - } - const flat_set<u32> &v_tops = unhandled_succ_tops.at(v); - flat_set<u32> new_inter; - auto ni_inserter = inserter(new_inter, new_inter.end()); - set_intersection(top_inter.begin(), top_inter.end(), - v_tops.begin(), v_tops.end(), ni_inserter); - top_inter = std::move(new_inter); - succs.insert(v); - } - - if (top_inter.empty()) { - return; - } - - auto top_cr = calcTopVertexReach(top_inter, top_reach); - if (!top_cr.isSubsetOf(g[u].char_reach)) { - return; - } - - DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); - markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, - unhandled_succ_tops); -} - -/* We may have cases where a top triggers something that starts with a .* (or - * similar state). In these cases we can make use of that state as a start - * state. - */ -static -void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach, - map<u32, flat_set<NFAVertex>> &unhandled_top_succs, - map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, - map<u32, set<NFAVertex>> &tops_out) { - /* create list of candidates first, to avoid issues of iter invalidation */ - DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); - vector<NFAVertex> cand_starts; - for (NFAVertex u : unhandled_succ_tops | map_keys) { - if (hasSelfLoop(u, g)) { - cand_starts.push_back(u); - } - } - - for (NFAVertex u : cand_starts) { - if (!contains(unhandled_succ_tops, u)) { - continue; - } - attemptToUseAsStart(g, u, top_reach, unhandled_top_succs, - unhandled_succ_tops, tops_out); - } -} - -static -void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out, - const map<u32, CharReach> &top_reach) { - /* Ideally, we want to add the smallest number of states to the graph for - * tops to turn on so that they can accurately trigger their successors. - * - * The relationships between tops and their successors forms a bipartite - * graph. Finding the optimal number of start states to add is equivalent to - * finding a minimal biclique coverings. Unfortunately, this is known to be - * NP-complete. - * - * Given this, we will just do something simple to avoid creating something - * truly wasteful: - * 1) Try to find any cyclic states which can act as their own start states - * 2) Pick a top or a succ to create a start state for and then try to find - * the largest complete bipartite subgraph that it is part of. - */ - - map<u32, flat_set<NFAVertex>> top_succs; - map<NFAVertex, flat_set<u32>> succ_tops; - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - for (u32 t : g[e].tops) { - top_succs[t].insert(v); - succ_tops[v].insert(t); - } - } - - auto unhandled_top_succs = top_succs; - auto unhandled_succ_tops = succ_tops; - - reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops, - tops_out); - - /* Note: there may be successors which are equivalent (in terms of - top-triggering), it may be more efficient to discover this and treat them - as a unit. TODO */ - - while (!unhandled_succ_tops.empty()) { - assert(!unhandled_top_succs.empty()); - DEBUG_PRINTF("creating top start vertex\n"); - flat_set<u32> u_tops; - flat_set<NFAVertex> u_succs; - pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, - &u_tops, &u_succs); - - expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs, - unhandled_succ_tops, u_tops, u_succs); - - /* create start vertex to handle this top/succ combination */ - NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach); - - /* update maps */ - markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs, - unhandled_succ_tops); - } - assert(unhandled_top_succs.empty()); - + assert(best != succ_tops.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_succs = { best->first }; + *picked_tops = best->second; + } +} + +static +void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs, + const map<u32, flat_set<NFAVertex>> &top_succs, + const map<NFAVertex, flat_set<u32>> &succ_tops, + flat_set<u32> &picked_tops, + flat_set<NFAVertex> &picked_succs) { + NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */ + const auto &cand_tops = succ_tops.at(v); + + for (u32 t : cand_tops) { + if (!contains(unhandled_top_succs, t)) { + continue; + } + if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_succs, top_succs.at(t))) { + continue; /* will not form a cbs */ + } + picked_tops.insert(t); + } +} + +static +void expandCbsBySuccs(const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, + const map<u32, flat_set<NFAVertex>> &top_succs, + const map<NFAVertex, flat_set<u32>> &succ_tops, + flat_set<u32> &picked_tops, + flat_set<NFAVertex> &picked_succs) { + u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */ + const auto &cand_succs = top_succs.at(t); + + for (NFAVertex v : cand_succs) { + if (!contains(unhandled_succ_tops, v)) { + continue; + } + if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_tops, succ_tops.at(v))) { + continue; /* will not form a cbs */ + } + picked_succs.insert(v); + } +} + +/* See if we can expand the complete bipartite subgraph (cbs) specified by the + * picked tops/succs by adding more to either of the tops or succs. + */ +static +void expandTopSuccCbs(const map<u32, flat_set<NFAVertex>> &top_succs, + const map<NFAVertex, flat_set<u32>> &succ_tops, + const map<u32, flat_set<NFAVertex>> &unhandled_top_succs, + const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, + flat_set<u32> &picked_tops, + flat_set<NFAVertex> &picked_succs) { + /* Note: all picked (tops|succs) are equivalent */ + + /* Try to expand first (as we are more likely to succeed) on the side + * with fewest remaining things to be handled */ + + if (unhandled_top_succs.size() < unhandled_succ_tops.size()) { + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + } else { + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + } +} + +static +void markTopSuccAsHandled(NFAVertex start_v, + const flat_set<u32> &handled_tops, + const flat_set<NFAVertex> &handled_succs, + map<u32, set<NFAVertex>> &tops_out, + map<u32, flat_set<NFAVertex>> &unhandled_top_succs, + map<NFAVertex, flat_set<u32>> &unhandled_succ_tops) { + for (u32 t : handled_tops) { + tops_out[t].insert(start_v); + assert(contains(unhandled_top_succs, t)); + erase_all(&unhandled_top_succs[t], handled_succs); + if (unhandled_top_succs[t].empty()) { + unhandled_top_succs.erase(t); + } + } + + for (NFAVertex v : handled_succs) { + assert(contains(unhandled_succ_tops, v)); + erase_all(&unhandled_succ_tops[v], handled_tops); + if (unhandled_succ_tops[v].empty()) { + unhandled_succ_tops.erase(v); + } + } +} + +static +void attemptToUseAsStart(const NGHolder &g, NFAVertex u, + const map<u32, CharReach> &top_reach, + map<u32, flat_set<NFAVertex>> &unhandled_top_succs, + map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, + map<u32, set<NFAVertex>> &tops_out) { + flat_set<u32> top_inter = unhandled_succ_tops.at(u); + flat_set<NFAVertex> succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (!contains(unhandled_succ_tops, v)) { + return; + } + /* if it has vacuous reports we need to make sure that the report sets + * are the same */ + if ((v == g.accept || v == g.acceptEod) + && g[g.start].reports != g[u].reports) { + DEBUG_PRINTF("different report behaviour\n"); + return; + } + const flat_set<u32> &v_tops = unhandled_succ_tops.at(v); + flat_set<u32> new_inter; + auto ni_inserter = inserter(new_inter, new_inter.end()); + set_intersection(top_inter.begin(), top_inter.end(), + v_tops.begin(), v_tops.end(), ni_inserter); + top_inter = std::move(new_inter); + succs.insert(v); + } + + if (top_inter.empty()) { + return; + } + + auto top_cr = calcTopVertexReach(top_inter, top_reach); + if (!top_cr.isSubsetOf(g[u].char_reach)) { + return; + } + + DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); + markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); +} + +/* We may have cases where a top triggers something that starts with a .* (or + * similar state). In these cases we can make use of that state as a start + * state. + */ +static +void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach, + map<u32, flat_set<NFAVertex>> &unhandled_top_succs, + map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, + map<u32, set<NFAVertex>> &tops_out) { + /* create list of candidates first, to avoid issues of iter invalidation */ + DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); + vector<NFAVertex> cand_starts; + for (NFAVertex u : unhandled_succ_tops | map_keys) { + if (hasSelfLoop(u, g)) { + cand_starts.push_back(u); + } + } + + for (NFAVertex u : cand_starts) { + if (!contains(unhandled_succ_tops, u)) { + continue; + } + attemptToUseAsStart(g, u, top_reach, unhandled_top_succs, + unhandled_succ_tops, tops_out); + } +} + +static +void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out, + const map<u32, CharReach> &top_reach) { + /* Ideally, we want to add the smallest number of states to the graph for + * tops to turn on so that they can accurately trigger their successors. + * + * The relationships between tops and their successors forms a bipartite + * graph. Finding the optimal number of start states to add is equivalent to + * finding a minimal biclique coverings. Unfortunately, this is known to be + * NP-complete. + * + * Given this, we will just do something simple to avoid creating something + * truly wasteful: + * 1) Try to find any cyclic states which can act as their own start states + * 2) Pick a top or a succ to create a start state for and then try to find + * the largest complete bipartite subgraph that it is part of. + */ + + map<u32, flat_set<NFAVertex>> top_succs; + map<NFAVertex, flat_set<u32>> succ_tops; + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + for (u32 t : g[e].tops) { + top_succs[t].insert(v); + succ_tops[v].insert(t); + } + } + + auto unhandled_top_succs = top_succs; + auto unhandled_succ_tops = succ_tops; + + reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops, + tops_out); + + /* Note: there may be successors which are equivalent (in terms of + top-triggering), it may be more efficient to discover this and treat them + as a unit. TODO */ + + while (!unhandled_succ_tops.empty()) { + assert(!unhandled_top_succs.empty()); + DEBUG_PRINTF("creating top start vertex\n"); + flat_set<u32> u_tops; + flat_set<NFAVertex> u_succs; + pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, + &u_tops, &u_succs); + + expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs, + unhandled_succ_tops, u_tops, u_succs); + + /* create start vertex to handle this top/succ combination */ + NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach); + + /* update maps */ + markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); + } + assert(unhandled_top_succs.empty()); + // We are completely replacing the start vertex, so clear its reports. clear_out_edges(g.start, g); add_edge(g.start, g.startDs, g); @@ -471,7 +471,7 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out, static set<NFAVertex> findZombies(const NGHolder &h, const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - const unordered_map<NFAVertex, u32> &state_ids, + const unordered_map<NFAVertex, u32> &state_ids, const CompileContext &cc) { set<NFAVertex> zombies; if (!cc.grey.allowZombies) { @@ -484,7 +484,7 @@ set<NFAVertex> findZombies(const NGHolder &h, } if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) { - DEBUG_PRINTF("cannot be made undead - bad reports\n"); + DEBUG_PRINTF("cannot be made undead - bad reports\n"); return zombies; } @@ -519,7 +519,7 @@ set<NFAVertex> findZombies(const NGHolder &h, } static -void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) { +void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) { vector<NFAVertex> ordering; for (auto &e : state_ids) { if (e.second == NO_STATE) { @@ -572,9 +572,9 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, const map<u32, u32> &fixed_depth_tops, const map<u32, vector<vector<CharReach>>> &triggers, bool impl_test_only, const CompileContext &cc, - unordered_map<NFAVertex, u32> &state_ids, - vector<BoundedRepeatData> &repeats, - map<u32, set<NFAVertex>> &tops) { + unordered_map<NFAVertex, u32> &state_ids, + vector<BoundedRepeatData> &repeats, + map<u32, set<NFAVertex>> &tops) { assert(is_triggered(h_in) || fixed_depth_tops.empty()); unique_ptr<NGHolder> h = cloneHolder(h_in); @@ -584,19 +584,19 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, impl_test_only, cc.grey); // If we're building a rose/suffix, do the top dance. - flat_set<NFAVertex> topVerts; + flat_set<NFAVertex> topVerts; if (is_triggered(*h)) { makeTopStates(*h, tops, findTopReach(triggers)); - - for (const auto &vv : tops | map_values) { - insert(&topVerts, vv); - } + + for (const auto &vv : tops | map_values) { + insert(&topVerts, vv); + } } dropRedundantStartEdges(*h); // Do state numbering - state_ids = numberStates(*h, topVerts); + state_ids = numberStates(*h, topVerts); // In debugging, we sometimes like to reverse the state numbering to stress // the NFA construction code. @@ -609,47 +609,47 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, } static -void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { - for (const auto &v : vertices_range(h)) { - auto &reports = h[v].reports; - if (reports.empty()) { - continue; - } - auto old_reports = reports; - reports.clear(); - for (const ReportID &id : old_reports) { - u32 program = rm.getProgramOffset(id); - reports.insert(program); - } - DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n", - h[v].index, as_string_list(old_reports).c_str(), - as_string_list(reports).c_str()); - } -} - -static -bytecode_ptr<NFA> +void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { + for (const auto &v : vertices_range(h)) { + auto &reports = h[v].reports; + if (reports.empty()) { + continue; + } + auto old_reports = reports; + reports.clear(); + for (const ReportID &id : old_reports) { + u32 program = rm.getProgramOffset(id); + reports.insert(program); + } + DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n", + h[v].index, as_string_list(old_reports).c_str(), + as_string_list(reports).c_str()); + } +} + +static +bytecode_ptr<NFA> constructNFA(const NGHolder &h_in, const ReportManager *rm, const map<u32, u32> &fixed_depth_tops, const map<u32, vector<vector<CharReach>>> &triggers, bool compress_state, bool do_accel, bool impl_test_only, bool &fast, u32 hint, const CompileContext &cc) { - if (!has_managed_reports(h_in)) { + if (!has_managed_reports(h_in)) { rm = nullptr; } else { assert(rm); } - unordered_map<NFAVertex, u32> state_ids; + unordered_map<NFAVertex, u32> state_ids; vector<BoundedRepeatData> repeats; - map<u32, set<NFAVertex>> tops; + map<u32, set<NFAVertex>> tops; unique_ptr<NGHolder> h = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(state_ids); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -660,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); } - unordered_map<NFAVertex, NFAStateSet> reportSquashMap; - unordered_map<NFAVertex, NFAStateSet> squashMap; + unordered_map<NFAVertex, NFAStateSet> reportSquashMap; + unordered_map<NFAVertex, NFAStateSet> squashMap; // build map of squashed and squashers if (cc.grey.squashNFA) { - squashMap = findSquashStates(*h, repeats); + squashMap = findSquashStates(*h, repeats); if (rm && cc.grey.highlanderSquash) { reportSquashMap = findHighlanderSquashers(*h, *rm); @@ -674,11 +674,11 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc); - if (has_managed_reports(*h)) { - assert(rm); - remapReportsToPrograms(*h, *rm); - } - + if (has_managed_reports(*h)) { + assert(rm); + remapReportsToPrograms(*h, *rm); + } + if (!cc.streaming || !cc.grey.compressNFAState) { compress_state = false; } @@ -687,7 +687,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, zombies, do_accel, compress_state, fast, hint, cc); } -bytecode_ptr<NFA> +bytecode_ptr<NFA> constructNFA(const NGHolder &h_in, const ReportManager *rm, const map<u32, u32> &fixed_depth_tops, const map<u32, vector<vector<CharReach>>> &triggers, @@ -701,7 +701,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -bytecode_ptr<NFA> +bytecode_ptr<NFA> constructNFA(const NGHolder &h_in, const ReportManager *rm, const map<u32, u32> &fixed_depth_tops, const map<u32, vector<vector<CharReach>>> &triggers, @@ -714,19 +714,19 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #endif // RELEASE_BUILD static -bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { // Make a mutable copy of the graph that we can renumber etc. NGHolder h; cloneHolder(h, h_in); assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ // Do state numbering. - auto state_ids = numberStates(h, {}); + auto state_ids = numberStates(h, {}); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(state_ids); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -734,47 +734,47 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(sanityCheckGraph(h, state_ids)); - map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */ + map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */ set<NFAVertex> zombies; vector<BoundedRepeatData> repeats; - unordered_map<NFAVertex, NFAStateSet> reportSquashMap; - unordered_map<NFAVertex, NFAStateSet> squashMap; + unordered_map<NFAVertex, NFAStateSet> reportSquashMap; + unordered_map<NFAVertex, NFAStateSet> squashMap; UNUSED bool fast = false; return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, false, false, fast, hint, cc); } -bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, - const CompileContext &cc) { +bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, + const CompileContext &cc) { u32 hint = INVALID_NFA; // no hint return constructReversedNFA_i(h_in, hint, cc); } #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { return constructReversedNFA_i(h_in, hint, cc); } #endif // RELEASE_BUILD u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { - if (!cc.grey.allowLimExNFA) { - return false; - } - - assert(!can_never_match(g)); - + if (!cc.grey.allowLimExNFA) { + return false; + } + + assert(!can_never_match(g)); + // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. - if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { + if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { return true; } - if (!has_managed_reports(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); @@ -789,14 +789,14 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can * implement it as an NFA. */ - unordered_map<NFAVertex, u32> state_ids; + unordered_map<NFAVertex, u32> state_ids; vector<BoundedRepeatData> repeats; - map<u32, set<NFAVertex>> tops; + map<u32, set<NFAVertex>> tops; unique_ptr<NGHolder> h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); assert(h); - u32 numStates = countStates(state_ids); + u32 numStates = countStates(state_ids); if (numStates <= NFA_MAX_STATES) { return numStates; } @@ -813,7 +813,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm removeRedundancy(g, som); - if (rm && has_managed_reports(g)) { + if (rm && has_managed_reports(g)) { pruneHighlanderDominated(g, *rm); } @@ -826,7 +826,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { - if (!has_managed_reports(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); @@ -836,14 +836,14 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const map<u32, u32> fixed_depth_tops; // empty const map<u32, vector<vector<CharReach>>> triggers; // empty - unordered_map<NFAVertex, u32> state_ids; + unordered_map<NFAVertex, u32> state_ids; vector<BoundedRepeatData> repeats; - map<u32, set<NFAVertex>> tops; + map<u32, set<NFAVertex>> tops; unique_ptr<NGHolder> h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); - if (!h || countStates(state_ids) > NFA_MAX_STATES) { + if (!h || countStates(state_ids) > NFA_MAX_STATES) { DEBUG_PRINTF("not constructible\n"); return NFA_MAX_ACCEL_STATES + 1; } @@ -852,8 +852,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, // Should have no bearing on accel calculation, so we leave these empty. const set<NFAVertex> zombies; - unordered_map<NFAVertex, NFAStateSet> reportSquashMap; - unordered_map<NFAVertex, NFAStateSet> squashMap; + unordered_map<NFAVertex, NFAStateSet> reportSquashMap; + unordered_map<NFAVertex, NFAStateSet> squashMap; return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, cc); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h index 7eba2eff06..7c9de044d0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Limex NFA construction code. */ @@ -36,7 +36,7 @@ #include "ue2common.h" #include "som/som.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include <map> #include <memory> @@ -52,8 +52,8 @@ class NGHolder; class ReportManager; struct CompileContext; -/** - * \brief Determine if the given graph is implementable as an NFA. +/** + * \brief Determine if the given graph is implementable as an NFA. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -64,14 +64,14 @@ struct CompileContext; u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** - * \brief Late-stage graph reductions. +/** + * \brief Late-stage graph reductions. * * This will call \ref removeRedundancy and apply its changes to the given - * holder only if it is implementable afterwards. - */ -void reduceImplementableGraph(NGHolder &g, som_type som, - const ReportManager *rm, + * holder only if it is implementable afterwards. + */ +void reduceImplementableGraph(NGHolder &g, som_type som, + const ReportManager *rm, const CompileContext &cc); /** @@ -84,8 +84,8 @@ void reduceImplementableGraph(NGHolder &g, som_type som, u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** - * \brief Construct an NFA from the given graph. +/** + * \brief Construct an NFA from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -96,25 +96,25 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -bytecode_ptr<NFA> +bytecode_ptr<NFA> constructNFA(const NGHolder &g, const ReportManager *rm, const std::map<u32, u32> &fixed_depth_tops, const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, bool compress_state, bool &fast, const CompileContext &cc); -/** - * \brief Build a reverse NFA from the graph given, which should have already +/** + * \brief Build a reverse NFA from the graph given, which should have already * been reversed. * * Used for reverse NFAs used in SOM mode. */ -bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, - const CompileContext &cc); +bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, + const CompileContext &cc); #ifndef RELEASE_BUILD -/** - * \brief Construct an NFA (with model type hint) from the given graph. +/** + * \brief Construct an NFA (with model type hint) from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -125,20 +125,20 @@ bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -bytecode_ptr<NFA> +bytecode_ptr<NFA> constructNFA(const NGHolder &g, const ReportManager *rm, const std::map<u32, u32> &fixed_depth_tops, const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, bool compress_state, bool &fast, u32 hint, const CompileContext &cc); -/** - * \brief Build a reverse NFA (with model type hint) from the graph given, +/** + * \brief Build a reverse NFA (with model type hint) from the graph given, * which should have already been reversed. * * Used for reverse NFAs used in SOM mode. */ -bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint, - const CompileContext &cc); +bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint, + const CompileContext &cc); #endif // RELEASE_BUILD diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp index f1f829f2c1..271e14fb9c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,20 +40,20 @@ #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" -#include "util/compile_context.h" +#include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/small_vector.h" -#include "util/target_info.h" +#include "util/small_vector.h" +#include "util/target_info.h" #include <algorithm> #include <map> -#include <boost/range/adaptor/map.hpp> - +#include <boost/range/adaptor/map.hpp> + using namespace std; -using boost::adaptors::map_keys; +using boost::adaptors::map_keys; namespace ue2 { @@ -72,7 +72,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, } const CharReach &acr = g[v].char_reach; - DEBUG_PRINTF("checking %zu\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { DEBUG_PRINTF("bad reach %zu\n", acr.count()); @@ -89,7 +89,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, next_preds->insert(v); insert(next_cands, adjacent_vertices(v, g)); - DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); + DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); friends->insert(v); next_cand:; } @@ -136,321 +136,321 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, } static -void findPaths(const NGHolder &g, NFAVertex v, - const vector<CharReach> &refined_cr, - vector<vector<CharReach>> *paths, - const flat_set<NFAVertex> &forbidden, u32 depth) { - static const u32 MAGIC_TOO_WIDE_NUMBER = 16; - if (!depth) { - paths->push_back({}); - return; - } - if (v == g.accept || v == g.acceptEod) { - paths->push_back({}); - if (!generates_callbacks(g) || v == g.acceptEod) { - paths->back().push_back(CharReach()); /* red tape options */ - } - return; - } - - /* for the escape 'literals' we want to use the minimal cr so we - * can be more selective */ - const CharReach &cr = refined_cr[g[v].index]; - - if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER - || hasSelfLoop(v, g)) { - /* give up on pushing past this point */ - paths->push_back({cr}); - return; - } - - vector<vector<CharReach>> curr; +void findPaths(const NGHolder &g, NFAVertex v, + const vector<CharReach> &refined_cr, + vector<vector<CharReach>> *paths, + const flat_set<NFAVertex> &forbidden, u32 depth) { + static const u32 MAGIC_TOO_WIDE_NUMBER = 16; + if (!depth) { + paths->push_back({}); + return; + } + if (v == g.accept || v == g.acceptEod) { + paths->push_back({}); + if (!generates_callbacks(g) || v == g.acceptEod) { + paths->back().push_back(CharReach()); /* red tape options */ + } + return; + } + + /* for the escape 'literals' we want to use the minimal cr so we + * can be more selective */ + const CharReach &cr = refined_cr[g[v].index]; + + if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER + || hasSelfLoop(v, g)) { + /* give up on pushing past this point */ + paths->push_back({cr}); + return; + } + + vector<vector<CharReach>> curr; for (auto w : adjacent_vertices_range(v, g)) { - if (contains(forbidden, w)) { - /* path has looped back to one of the active+boring acceleration - * states. We can ignore this path if we have sufficient back- - * off. */ + if (contains(forbidden, w)) { + /* path has looped back to one of the active+boring acceleration + * states. We can ignore this path if we have sufficient back- + * off. */ paths->push_back({cr}); continue; } - - u32 new_depth = depth - 1; - do { - curr.clear(); - findPaths(g, w, refined_cr, &curr, forbidden, new_depth); - } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); - - for (auto &c : curr) { - c.push_back(cr); - paths->push_back(std::move(c)); + + u32 new_depth = depth - 1; + do { + curr.clear(); + findPaths(g, w, refined_cr, &curr, forbidden, new_depth); + } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); + + for (auto &c : curr) { + c.push_back(cr); + paths->push_back(std::move(c)); } } } -namespace { -struct SAccelScheme { - SAccelScheme(CharReach cr_in, u32 offset_in) - : cr(std::move(cr_in)), offset(offset_in) { - assert(offset <= MAX_ACCEL_DEPTH); +namespace { +struct SAccelScheme { + SAccelScheme(CharReach cr_in, u32 offset_in) + : cr(std::move(cr_in)), offset(offset_in) { + assert(offset <= MAX_ACCEL_DEPTH); } - SAccelScheme() {} - - bool operator<(const SAccelScheme &b) const { - const SAccelScheme &a = *this; + SAccelScheme() {} - const size_t a_count = cr.count(), b_count = b.cr.count(); - if (a_count != b_count) { - return a_count < b_count; + bool operator<(const SAccelScheme &b) const { + const SAccelScheme &a = *this; + + const size_t a_count = cr.count(), b_count = b.cr.count(); + if (a_count != b_count) { + return a_count < b_count; } - /* TODO: give bonus if one is a 'caseless' character */ - ORDER_CHECK(offset); - ORDER_CHECK(cr); + /* TODO: give bonus if one is a 'caseless' character */ + ORDER_CHECK(offset); + ORDER_CHECK(cr); return false; } - CharReach cr = CharReach::dot(); - u32 offset = MAX_ACCEL_DEPTH + 1; -}; + CharReach cr = CharReach::dot(); + u32 offset = MAX_ACCEL_DEPTH + 1; +}; } -/** - * \brief Limit on the number of (recursive) calls to findBestInternal(). - */ -static constexpr size_t MAX_FINDBEST_CALLS = 1000000; - +/** + * \brief Limit on the number of (recursive) calls to findBestInternal(). + */ +static constexpr size_t MAX_FINDBEST_CALLS = 1000000; + static -void findBestInternal(vector<vector<CharReach>>::const_iterator pb, - vector<vector<CharReach>>::const_iterator pe, - size_t *num_calls, const SAccelScheme &curr, - SAccelScheme *best) { - assert(curr.offset <= MAX_ACCEL_DEPTH); - - if (++(*num_calls) > MAX_FINDBEST_CALLS) { - DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); - return; - } - - DEBUG_PRINTF("paths left %zu\n", pe - pb); - if (pb == pe) { - if (curr < *best) { - *best = curr; - DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", - best->cr.count(), describeClass(best->cr).c_str(), - best->offset); - } - return; - } - - DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - - small_vector<SAccelScheme, 10> priority_path; - priority_path.reserve(pb->size()); - u32 i = 0; - for (auto p = pb->begin(); p != pb->end(); ++p, i++) { - SAccelScheme as(*p | curr.cr, max(i, curr.offset)); - if (*best < as) { - DEBUG_PRINTF("worse\n"); - continue; - } - priority_path.push_back(move(as)); - } - - sort(priority_path.begin(), priority_path.end()); - for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { - auto jt = next(it); - for (; jt != priority_path.end(); ++jt) { - if (!it->cr.isSubsetOf(jt->cr)) { - break; - } - } - priority_path.erase(next(it), jt); - DEBUG_PRINTF("||%zu\n", it->cr.count()); - } - DEBUG_PRINTF("---\n"); - - for (const SAccelScheme &in : priority_path) { - DEBUG_PRINTF("in: count %zu\n", in.cr.count()); - if (*best < in) { - DEBUG_PRINTF("worse\n"); - continue; - } - findBestInternal(pb + 1, pe, num_calls, in, best); - - if (curr.cr == best->cr) { - return; /* could only get better by offset */ - } +void findBestInternal(vector<vector<CharReach>>::const_iterator pb, + vector<vector<CharReach>>::const_iterator pe, + size_t *num_calls, const SAccelScheme &curr, + SAccelScheme *best) { + assert(curr.offset <= MAX_ACCEL_DEPTH); + + if (++(*num_calls) > MAX_FINDBEST_CALLS) { + DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); + return; + } + + DEBUG_PRINTF("paths left %zu\n", pe - pb); + if (pb == pe) { + if (curr < *best) { + *best = curr; + DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", + best->cr.count(), describeClass(best->cr).c_str(), + best->offset); + } + return; + } + + DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); + + small_vector<SAccelScheme, 10> priority_path; + priority_path.reserve(pb->size()); + u32 i = 0; + for (auto p = pb->begin(); p != pb->end(); ++p, i++) { + SAccelScheme as(*p | curr.cr, max(i, curr.offset)); + if (*best < as) { + DEBUG_PRINTF("worse\n"); + continue; + } + priority_path.push_back(move(as)); + } + + sort(priority_path.begin(), priority_path.end()); + for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { + auto jt = next(it); + for (; jt != priority_path.end(); ++jt) { + if (!it->cr.isSubsetOf(jt->cr)) { + break; + } + } + priority_path.erase(next(it), jt); + DEBUG_PRINTF("||%zu\n", it->cr.count()); + } + DEBUG_PRINTF("---\n"); + + for (const SAccelScheme &in : priority_path) { + DEBUG_PRINTF("in: count %zu\n", in.cr.count()); + if (*best < in) { + DEBUG_PRINTF("worse\n"); + continue; + } + findBestInternal(pb + 1, pe, num_calls, in, best); + + if (curr.cr == best->cr) { + return; /* could only get better by offset */ + } } } static -SAccelScheme findBest(const vector<vector<CharReach>> &paths, - const CharReach &terminating) { - SAccelScheme curr(terminating, 0U); - SAccelScheme best; - size_t num_calls = 0; - findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); - DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); - DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", - best.cr.count(), describeClass(best.cr).c_str(), best.offset); - return best; -} - -namespace { -struct DAccelScheme { - DAccelScheme(CharReach cr_in, u32 offset_in) - : double_cr(std::move(cr_in)), double_offset(offset_in) { - assert(double_offset <= MAX_ACCEL_DEPTH); - } - - bool operator<(const DAccelScheme &b) const { - const DAccelScheme &a = *this; - - size_t a_dcount = a.double_cr.count(); - size_t b_dcount = b.double_cr.count(); - - assert(!a.double_byte.empty() || a_dcount || a.double_offset); - assert(!b.double_byte.empty() || b_dcount || b.double_offset); - - if (a_dcount != b_dcount) { - return a_dcount < b_dcount; - } - - if (!a_dcount) { - bool cd_a = buildDvermMask(a.double_byte); - bool cd_b = buildDvermMask(b.double_byte); - if (cd_a != cd_b) { - return cd_a > cd_b; +SAccelScheme findBest(const vector<vector<CharReach>> &paths, + const CharReach &terminating) { + SAccelScheme curr(terminating, 0U); + SAccelScheme best; + size_t num_calls = 0; + findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); + DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); + DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", + best.cr.count(), describeClass(best.cr).c_str(), best.offset); + return best; +} + +namespace { +struct DAccelScheme { + DAccelScheme(CharReach cr_in, u32 offset_in) + : double_cr(std::move(cr_in)), double_offset(offset_in) { + assert(double_offset <= MAX_ACCEL_DEPTH); + } + + bool operator<(const DAccelScheme &b) const { + const DAccelScheme &a = *this; + + size_t a_dcount = a.double_cr.count(); + size_t b_dcount = b.double_cr.count(); + + assert(!a.double_byte.empty() || a_dcount || a.double_offset); + assert(!b.double_byte.empty() || b_dcount || b.double_offset); + + if (a_dcount != b_dcount) { + return a_dcount < b_dcount; + } + + if (!a_dcount) { + bool cd_a = buildDvermMask(a.double_byte); + bool cd_b = buildDvermMask(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; } } - ORDER_CHECK(double_byte.size()); - ORDER_CHECK(double_offset); + ORDER_CHECK(double_byte.size()); + ORDER_CHECK(double_offset); - /* TODO: give bonus if one is a 'caseless' character */ - ORDER_CHECK(double_byte); - ORDER_CHECK(double_cr); + /* TODO: give bonus if one is a 'caseless' character */ + ORDER_CHECK(double_byte); + ORDER_CHECK(double_cr); - return false; + return false; } - flat_set<pair<u8, u8>> double_byte; - CharReach double_cr; - u32 double_offset = 0; -}; + flat_set<pair<u8, u8>> double_byte; + CharReach double_cr; + u32 double_offset = 0; +}; } static -DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, - const CharReach &cr_2_in, u32 offset_in) { - cr_1 &= ~as.double_cr; - CharReach cr_2 = cr_2_in & ~as.double_cr; - u32 offset = offset_in; - - if (cr_1.none()) { - DEBUG_PRINTF("empty first element\n"); - ENSURE_AT_LEAST(&as.double_offset, offset); - return as; - } - - if (cr_2_in != cr_2 || cr_2.none()) { - offset = offset_in + 1; - } - - size_t two_count = cr_1.count() * cr_2.count(); - - DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); - - if (!two_count) { - DEBUG_PRINTF("empty element\n"); - ENSURE_AT_LEAST(&as.double_offset, offset); - return as; - } - - if (two_count > DOUBLE_SHUFTI_LIMIT) { - if (cr_2.count() < cr_1.count()) { - as.double_cr |= cr_2; - offset = offset_in + 1; - } else { - as.double_cr |= cr_1; - } - } else { - for (auto i = cr_1.find_first(); i != CharReach::npos; - i = cr_1.find_next(i)) { - for (auto j = cr_2.find_first(); j != CharReach::npos; - j = cr_2.find_next(j)) { - as.double_byte.emplace(i, j); - } - } - } - - ENSURE_AT_LEAST(&as.double_offset, offset); - DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", - as.double_byte.size(), as.double_cr.count(), as.double_offset); - return as; +DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, + const CharReach &cr_2_in, u32 offset_in) { + cr_1 &= ~as.double_cr; + CharReach cr_2 = cr_2_in & ~as.double_cr; + u32 offset = offset_in; + + if (cr_1.none()) { + DEBUG_PRINTF("empty first element\n"); + ENSURE_AT_LEAST(&as.double_offset, offset); + return as; + } + + if (cr_2_in != cr_2 || cr_2.none()) { + offset = offset_in + 1; + } + + size_t two_count = cr_1.count() * cr_2.count(); + + DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); + + if (!two_count) { + DEBUG_PRINTF("empty element\n"); + ENSURE_AT_LEAST(&as.double_offset, offset); + return as; + } + + if (two_count > DOUBLE_SHUFTI_LIMIT) { + if (cr_2.count() < cr_1.count()) { + as.double_cr |= cr_2; + offset = offset_in + 1; + } else { + as.double_cr |= cr_1; + } + } else { + for (auto i = cr_1.find_first(); i != CharReach::npos; + i = cr_1.find_next(i)) { + for (auto j = cr_2.find_first(); j != CharReach::npos; + j = cr_2.find_next(j)) { + as.double_byte.emplace(i, j); + } + } + } + + ENSURE_AT_LEAST(&as.double_offset, offset); + DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", + as.double_byte.size(), as.double_cr.count(), as.double_offset); + return as; } static -void findDoubleBest(vector<vector<CharReach> >::const_iterator pb, +void findDoubleBest(vector<vector<CharReach> >::const_iterator pb, vector<vector<CharReach> >::const_iterator pe, - const DAccelScheme &curr, DAccelScheme *best) { - assert(curr.double_offset <= MAX_ACCEL_DEPTH); + const DAccelScheme &curr, DAccelScheme *best) { + assert(curr.double_offset <= MAX_ACCEL_DEPTH); DEBUG_PRINTF("paths left %zu\n", pe - pb); - DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", - curr.double_byte.size(), curr.double_cr.count(), - curr.double_offset); + DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", + curr.double_byte.size(), curr.double_cr.count(), + curr.double_offset); if (pb == pe) { - if (curr < *best) { - *best = curr; - DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", - best->double_byte.size(), best->double_cr.count(), - best->double_offset); - } + if (curr < *best) { + *best = curr; + DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->double_offset); + } return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - small_vector<DAccelScheme, 10> priority_path; - priority_path.reserve(pb->size()); + small_vector<DAccelScheme, 10> priority_path; + priority_path.reserve(pb->size()); u32 i = 0; - for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); + for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); ++p, i++) { - DAccelScheme as = make_double_accel(curr, *p, *next(p), i); - if (*best < as) { - DEBUG_PRINTF("worse\n"); - continue; - } - priority_path.push_back(move(as)); + DAccelScheme as = make_double_accel(curr, *p, *next(p), i); + if (*best < as) { + DEBUG_PRINTF("worse\n"); + continue; + } + priority_path.push_back(move(as)); } sort(priority_path.begin(), priority_path.end()); - DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); - DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", - best->double_byte.size(), best->double_cr.count(), - best->double_offset); - - for (const DAccelScheme &in : priority_path) { - DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", - in.double_byte.size(), in.double_cr.count(), - in.double_offset); - if (*best < in) { + DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); + DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->double_offset); + + for (const DAccelScheme &in : priority_path) { + DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", + in.double_byte.size(), in.double_cr.count(), + in.double_offset); + if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } - findDoubleBest(pb + 1, pe, in, best); + findDoubleBest(pb + 1, pe, in, best); } } #ifdef DEBUG static -void dumpPaths(const vector<vector<CharReach>> &paths) { - for (const auto &path : paths) { +void dumpPaths(const vector<vector<CharReach>> &paths) { + for (const auto &path : paths) { DEBUG_PRINTF("path: ["); - for (const auto &cr : path) { + for (const auto &cr : path) { printf(" ["); - describeClass(stdout, cr, 20, CC_OUT_TEXT); + describeClass(stdout, cr, 20, CC_OUT_TEXT); printf("]"); } printf(" ]\n"); @@ -459,13 +459,13 @@ void dumpPaths(const vector<vector<CharReach>> &paths) { #endif static -void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) { +void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) { /* paths segments which are a superset of an earlier segment should never be * picked as an acceleration segment -> to improve processing just replace * with dot */ - for (auto &p : paths) { - for (auto it = p.begin(); it != p.end(); ++it) { - for (auto jt = next(it); jt != p.end(); ++jt) { + for (auto &p : paths) { + for (auto it = p.begin(); it != p.end(); ++it) { + for (auto jt = next(it); jt != p.end(); ++jt) { if (it->isSubsetOf(*jt)) { *jt = CharReach::dot(); } @@ -475,10 +475,10 @@ void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) { } static -void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { +void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { /* try to unify paths which only differ in the last segment */ - for (vector<vector<CharReach> >::iterator p = paths.begin(); - p != paths.end() && p + 1 != paths.end();) { + for (vector<vector<CharReach> >::iterator p = paths.begin(); + p != paths.end() && p + 1 != paths.end();) { vector<CharReach> &a = *p; vector<CharReach> &b = *(p + 1); @@ -496,7 +496,7 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { if (i == a.size() - 1) { /* we can unify these paths */ a[i] |= b[i]; - paths.erase(p + 1); + paths.erase(p + 1); } else { ++p; } @@ -504,117 +504,117 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { } static -void improvePaths(vector<vector<CharReach> > &paths) { +void improvePaths(vector<vector<CharReach> > &paths) { #ifdef DEBUG DEBUG_PRINTF("orig paths\n"); - dumpPaths(paths); + dumpPaths(paths); #endif blowoutPathsLessStrictSegment(paths); - sort(paths.begin(), paths.end()); + sort(paths.begin(), paths.end()); unifyPathsLastSegment(paths); #ifdef DEBUG DEBUG_PRINTF("opt paths\n"); - dumpPaths(paths); + dumpPaths(paths); #endif } -#define MAX_DOUBLE_ACCEL_PATHS 10 - -static -DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths, - const CharReach &terminating) { - DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", - terminating.count()); - unifyPathsLastSegment(paths); - -#ifdef DEBUG - DEBUG_PRINTF("paths:\n"); - dumpPaths(paths); -#endif - - /* if there are too many paths, shorten the paths to reduce the number of - * distinct paths we have to consider */ - while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { - for (auto &p : paths) { - if (p.empty()) { - return DAccelScheme(terminating, 0U); - } - p.pop_back(); - } - unifyPathsLastSegment(paths); - } - - if (paths.empty()) { - return DAccelScheme(terminating, 0U); - } - - DAccelScheme curr(terminating, 0U); - DAccelScheme best(CharReach::dot(), 0U); - findDoubleBest(paths.begin(), paths.end(), curr, &best); - DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(), - best.double_cr.count()); - return best; -} - -#define MAX_EXPLORE_PATHS 40 - -AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths, - const CharReach &terminating, - bool look_for_double_byte) { - AccelScheme rv; - if (look_for_double_byte) { - DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); - if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { - rv.double_byte = std::move(da.double_byte); - rv.double_cr = move(da.double_cr); - rv.double_offset = da.double_offset; - } - } - - improvePaths(paths); - - DEBUG_PRINTF("we have %zu paths\n", paths.size()); - if (paths.size() > MAX_EXPLORE_PATHS) { - return rv; /* too many paths to explore */ - } - - /* if we were smart we would do something netflowy on the paths to find the - * best cut. But we aren't, so we will just brute force it. - */ - SAccelScheme best = findBest(paths, terminating); - - /* find best is a bit lazy in terms of minimising the offset, see if we can - * make it better. need to find the min max offset that we need.*/ - u32 offset = 0; - for (const auto &path : paths) { - u32 i = 0; - for (const auto &cr : path) { - if (cr.isSubsetOf(best.cr)) { - break; - } - i++; - } - offset = MAX(offset, i); - } - assert(offset <= best.offset); - best.offset = offset; - - rv.offset = best.offset; - rv.cr = best.cr; - if (rv.cr.count() < rv.double_cr.count()) { - rv.double_byte.clear(); - } - - return rv; -} - +#define MAX_DOUBLE_ACCEL_PATHS 10 + +static +DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths, + const CharReach &terminating) { + DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", + terminating.count()); + unifyPathsLastSegment(paths); + +#ifdef DEBUG + DEBUG_PRINTF("paths:\n"); + dumpPaths(paths); +#endif + + /* if there are too many paths, shorten the paths to reduce the number of + * distinct paths we have to consider */ + while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { + for (auto &p : paths) { + if (p.empty()) { + return DAccelScheme(terminating, 0U); + } + p.pop_back(); + } + unifyPathsLastSegment(paths); + } + + if (paths.empty()) { + return DAccelScheme(terminating, 0U); + } + + DAccelScheme curr(terminating, 0U); + DAccelScheme best(CharReach::dot(), 0U); + findDoubleBest(paths.begin(), paths.end(), curr, &best); + DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(), + best.double_cr.count()); + return best; +} + +#define MAX_EXPLORE_PATHS 40 + +AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths, + const CharReach &terminating, + bool look_for_double_byte) { + AccelScheme rv; + if (look_for_double_byte) { + DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); + if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { + rv.double_byte = std::move(da.double_byte); + rv.double_cr = move(da.double_cr); + rv.double_offset = da.double_offset; + } + } + + improvePaths(paths); + + DEBUG_PRINTF("we have %zu paths\n", paths.size()); + if (paths.size() > MAX_EXPLORE_PATHS) { + return rv; /* too many paths to explore */ + } + + /* if we were smart we would do something netflowy on the paths to find the + * best cut. But we aren't, so we will just brute force it. + */ + SAccelScheme best = findBest(paths, terminating); + + /* find best is a bit lazy in terms of minimising the offset, see if we can + * make it better. need to find the min max offset that we need.*/ + u32 offset = 0; + for (const auto &path : paths) { + u32 i = 0; + for (const auto &cr : path) { + if (cr.isSubsetOf(best.cr)) { + break; + } + i++; + } + offset = MAX(offset, i); + } + assert(offset <= best.offset); + best.offset = offset; + + rv.offset = best.offset; + rv.cr = best.cr; + if (rv.cr.count() < rv.double_cr.count()) { + rv.double_byte.clear(); + } + + return rv; +} + AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts, const vector<CharReach> &refined_cr, const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - bool allow_wide, bool look_for_double_byte) { + bool allow_wide, bool look_for_double_byte) { CharReach terminating; for (auto v : verts) { if (!hasSelfLoop(v, g)) { @@ -633,15 +633,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts, return AccelScheme(); /* invalid scheme */ } - vector<vector<CharReach>> paths; + vector<vector<CharReach>> paths; flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end()); /* Note: we can not in general (TODO: ignore when possible) ignore entries * into the bounded repeat cyclic states as that is when the magic happens */ - for (auto v : br_cyclic | map_keys) { + for (auto v : br_cyclic | map_keys) { /* TODO: can allow if repeatMin <= 1 ? */ - ignore_vert_set.erase(v); + ignore_vert_set.erase(v); } for (auto v : verts) { @@ -654,12 +654,12 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts, } /* paths built wrong: reverse them */ - for (auto &path : paths) { - reverse(path.begin(), path.end()); + for (auto &path : paths) { + reverse(path.begin(), path.end()); } - return findBestAccelScheme(std::move(paths), terminating, - look_for_double_byte); + return findBestAccelScheme(std::move(paths), terminating, + look_for_double_byte); } NFAVertex get_sds_or_proxy(const NGHolder &g) { @@ -668,7 +668,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } - NFAVertex v = NGHolder::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); for (auto w : adjacent_vertices_range(g.start, g)) { if (w != g.startDs) { if (!v) { @@ -685,7 +685,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { while (true) { if (hasSelfLoop(v, g)) { - DEBUG_PRINTF("woot %zu\n", g[v].index); + DEBUG_PRINTF("woot %zu\n", g[v].index); return v; } if (out_degree(v, g) != 1) { @@ -719,7 +719,7 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, CharReach terminating = g[v].char_reach; terminating.flip(); - DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", + DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", g[v].index, terminating.count(), allow_wide ? " (w)" : ""); @@ -789,9 +789,9 @@ depth_done: for (unsigned int i = 0; i < depth; i++) { if (depthReach[i].none()) { DEBUG_PRINTF("red tape acceleration engine depth %u\n", i); - *as = AccelScheme(); - as->offset = i; - as->cr = CharReach(); + *as = AccelScheme(); + as->offset = i; + as->cr = CharReach(); return true; } } @@ -806,8 +806,8 @@ depth_done: || (cra.count() == 2 && crb.count() == 2 && cra.isBit5Insensitive() && crb.isBit5Insensitive())) { DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i); - *as = AccelScheme(); - as->offset = i; + *as = AccelScheme(); + as->offset = i; return true; } } @@ -817,19 +817,19 @@ depth_done: // literals) if (depth > 1) { for (unsigned int i = 0; i < (depth - 1); i++) { - if (depthReach[i].count() * depthReach[i+1].count() - <= DOUBLE_SHUFTI_LIMIT) { + if (depthReach[i].count() * depthReach[i+1].count() + <= DOUBLE_SHUFTI_LIMIT) { DEBUG_PRINTF("two-byte shufti, depth %u\n", i); - *as = AccelScheme(); - as->offset = i; + *as = AccelScheme(); + as->offset = i; return true; } } } - // Look for offset accel schemes verm/shufti; + // Look for offset accel schemes verm/shufti; vector<NFAVertex> verts(1, v); - *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); + *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); DEBUG_PRINTF("as width %zu\n", as->cr.count()); return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h index f6f7f1b3cb..c6eed0d168 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,10 +36,10 @@ #include "ng_holder.h" #include "ng_misc_opt.h" #include "ue2common.h" -#include "nfa/accelcompile.h" -#include "util/accel_scheme.h" +#include "nfa/accelcompile.h" +#include "util/accel_scheme.h" #include "util/charreach.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/order_check.h" #include <map> @@ -52,35 +52,35 @@ namespace ue2 { #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ -// forward-declaration of CompileContext -struct CompileContext; - +// forward-declaration of CompileContext +struct CompileContext; + void findAccelFriends(const NGHolder &g, NFAVertex v, const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - u32 offset, flat_set<NFAVertex> *friends); + u32 offset, flat_set<NFAVertex> *friends); -#define DOUBLE_SHUFTI_LIMIT 20 +#define DOUBLE_SHUFTI_LIMIT 20 NFAVertex get_sds_or_proxy(const NGHolder &g); AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts, const std::vector<CharReach> &refined_cr, const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - bool allow_wide, bool look_for_double_byte = false); - -AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths, - const CharReach &terminating, - bool look_for_double_byte = false); - -/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a - * single byte accel scheme is found it is placed into *as - */ + bool allow_wide, bool look_for_double_byte = false); + +AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths, + const CharReach &terminating, + bool look_for_double_byte = false); + +/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a + * single byte accel scheme is found it is placed into *as + */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::vector<CharReach> &refined_cr, const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, AccelScheme *as, bool allow_wide); - + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp index d25ac43e87..e7b9db416f 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,8 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" -#include "util/ue2_graph.h" +#include "util/graph_small_color_map.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include <algorithm> @@ -66,31 +66,31 @@ namespace { * compressAndScore. */ struct LitGraphVertexProps { - LitGraphVertexProps() = default; - explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} + LitGraphVertexProps() = default; + explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} ue2_literal::elem c; // string element (char + bool) size_t index = 0; // managed by ue2_graph }; struct LitGraphEdgeProps { - LitGraphEdgeProps() = default; + LitGraphEdgeProps() = default; explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; size_t index = 0; // managed by ue2_graph }; -struct LitGraph - : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> { - - LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} - - const vertex_descriptor root; - const vertex_descriptor sink; -}; - -typedef LitGraph::vertex_descriptor LitVertex; -typedef LitGraph::edge_descriptor LitEdge; - +struct LitGraph + : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> { + + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} + + const vertex_descriptor root; + const vertex_descriptor sink; +}; + +typedef LitGraph::vertex_descriptor LitVertex; +typedef LitGraph::edge_descriptor LitEdge; + typedef pair<LitVertex, NFAVertex> VertexPair; typedef std::queue<VertexPair> LitVertexQ; @@ -100,16 +100,16 @@ typedef std::queue<VertexPair> LitVertexQ; /** \brief Dump the literal graph in Graphviz format. */ static UNUSED -void dumpGraph(const char *filename, const LitGraph &lg) { +void dumpGraph(const char *filename, const LitGraph &lg) { ofstream fout(filename); fout << "digraph G {" << endl; for (auto v : vertices_range(lg)) { - fout << lg[v].index; - if (v == lg.root) { + fout << lg[v].index; + if (v == lg.root) { fout << "[label=\"ROOT\"];"; - } else if (v == lg.sink) { + } else if (v == lg.sink) { fout << "[label=\"SINK\"];"; } else { ue2_literal s; @@ -121,9 +121,9 @@ void dumpGraph(const char *filename, const LitGraph &lg) { for (const auto &e : edges_range(lg)) { LitVertex u = source(e, lg), v = target(e, lg); - fout << lg[u].index << " -> " << lg[v].index << "[label=\"" - << lg[e].score << "\"]" - << ";" << endl; + fout << lg[u].index << " -> " << lg[v].index << "[label=\"" + << lg[e].score << "\"]" + << ";" << endl; } fout << "}" << endl; @@ -145,11 +145,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) { } static -LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, - const ue2_literal::elem &c) { +LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, + const ue2_literal::elem &c) { // Check if we already have this in the graph. for (auto v : adjacent_vertices_range(pred, lg)) { - if (v == lg.sink) { + if (v == lg.sink) { continue; } if (lg[v].c == c) { @@ -163,10 +163,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, } static -void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, - const CharReach &cr, NFAVertex v) { - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { +void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, + const CharReach &cr, NFAVertex v) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -174,14 +174,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lv = addToLitGraph(lg, pred, c); + LitVertex lv = addToLitGraph(lg, pred, c); workQ.push(VertexPair(lv, v)); } } static -void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, - const NFAEdge &e) { +void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, + const NFAEdge &e) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); const CharReach &cr = g[v].char_reach; @@ -190,7 +190,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, return; } - addToQueue(workQ, lg, lg.root, cr, u); + addToQueue(workQ, lg, lg.root, cr, u); } static @@ -202,8 +202,8 @@ u32 crCardinality(const CharReach &cr) { } u32 rv = 0; - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -218,10 +218,10 @@ u32 crCardinality(const CharReach &cr) { * identifying vertices connected to the sink and removing their other * out-edges. */ static -void filterLitGraph(LitGraph &lg) { - for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { - remove_out_edge_if(v, [&lg](const LitEdge &e) { - return target(e, lg) != lg.sink; +void filterLitGraph(LitGraph &lg) { + for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { + remove_out_edge_if(v, [&lg](const LitEdge &e) { + return target(e, lg) != lg.sink; }, lg); } @@ -234,12 +234,12 @@ void filterLitGraph(LitGraph &lg) { * from each predecessor of the sink (note: it's a suffix tree except for this * convenience) towards the source, storing each string as we go. */ static -void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) { +void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) { ue2_literal lit; - for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { lit.clear(); - while (u != lg.root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) <= 1); LitGraph::inv_adjacency_iterator ai2, ae2; @@ -283,7 +283,7 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, LitGraph lg; LitVertexQ workQ; - initWorkQueue(workQ, lg, g, e); + initWorkQueue(workQ, lg, g, e); while (!workQ.empty()) { const LitVertex lv = workQ.front().first; @@ -292,18 +292,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, u32 cr_card = crCardinality(cr); size_t numItems = cr_card * in_degree(t, g); - size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; + size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; if (g[t].index == NODE_START) { // reached start, add to literal set - add_edge_if_not_present(lv, lg.sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); goto next_work_elem; } // Expand next vertex if (allowExpand(numItems, committed_count)) { for (auto u : inv_adjacent_vertices_range(t, g)) { - addToQueue(workQ, lg, lv, cr, u); + addToQueue(workQ, lg, lv, cr, u); } goto next_work_elem; } @@ -319,35 +319,35 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lt = addToLitGraph(lg, lv, c); - add_edge_if_not_present(lt, lg.sink, lg); + LitVertex lt = addToLitGraph(lg, lv, c); + add_edge_if_not_present(lt, lg.sink, lg); } goto next_work_elem; } // add to literal set - add_edge_if_not_present(lv, lg.sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); next_work_elem: workQ.pop(); } - filterLitGraph(lg); - //dumpGraph("litgraph.dot", lg); - extractLiterals(lg, s); + filterLitGraph(lg); + //dumpGraph("litgraph.dot", lg); + extractLiterals(lg, s); // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); - DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, + DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, g[source(e, g)].index, g[target(e, g)].index, s.size()); } -bool bad_mixed_sensitivity(const ue2_literal &s) { - /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, - * we should be able to handle it */ - return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; -} - +bool bad_mixed_sensitivity(const ue2_literal &s) { + /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, + * we should be able to handle it */ + return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; +} + static u64a litUniqueness(const string &s) { CharReach seen(s); @@ -412,15 +412,15 @@ u64a calculateScore(const ue2_literal &s) { /** Adds a literal in reverse order, building up a suffix tree. */ static -void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { +void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); ue2_literal suffix; - LitVertex v = lg.root; + LitVertex v = lg.root; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { suffix.push_back(*it); LitVertex w; for (auto v2 : adjacent_vertices_range(v, lg)) { - if (v2 != lg.sink && lg[v2].c == *it) { + if (v2 != lg.sink && lg[v2].c == *it) { w = v2; goto next_char; } @@ -432,18 +432,18 @@ next_char: } // Wire the last vertex to the sink. - add_edge(v, lg.sink, lg); + add_edge(v, lg.sink, lg); } static void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg, - set<ue2_literal> &s) { + set<ue2_literal> &s) { for (const auto &e : cutset) { - LitVertex u = source(e, lg); - LitVertex v = target(e, lg); + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); ue2_literal lit; lit.push_back(lg[v].c); - while (u != lg.root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) == 1); LitGraph::inv_adjacency_iterator ai, ae; @@ -463,13 +463,13 @@ next_literal: #ifdef DEBUG static UNUSED -const char *describeColor(small_color c) { +const char *describeColor(small_color c) { switch (c) { - case small_color::white: + case small_color::white: return "white"; - case small_color::gray: + case small_color::gray: return "gray"; - case small_color::black: + case small_color::black: return "black"; default: return "unknown"; @@ -479,90 +479,90 @@ const char *describeColor(small_color c) { /** * The BGL's boykov_kolmogorov_max_flow requires that all edges have their - * reverse edge in the graph. This function adds them, returning a vector - * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there - * should be no existing reverse_edges. + * reverse edge in the graph. This function adds them, returning a vector + * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there + * should be no existing reverse_edges. */ static -vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { - const size_t edge_count = num_edges(lg); - vector<LitEdge> fwd_edges; - fwd_edges.reserve(edge_count); - for (const auto &e : edges_range(lg)) { - fwd_edges.push_back(e); - } +vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { + const size_t edge_count = num_edges(lg); + vector<LitEdge> fwd_edges; + fwd_edges.reserve(edge_count); + for (const auto &e : edges_range(lg)) { + fwd_edges.push_back(e); + } - vector<LitEdge> rev_map(2 * edge_count); + vector<LitEdge> rev_map(2 * edge_count); - for (const auto &e : fwd_edges) { - LitVertex u = source(e, lg); - LitVertex v = target(e, lg); + for (const auto &e : fwd_edges) { + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); - assert(!edge(v, u, lg).second); + assert(!edge(v, u, lg).second); - LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; - rev_map[lg[e].index] = rev; - rev_map[lg[rev].index] = e; + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; + rev_map[lg[e].index] = rev; + rev_map[lg[rev].index] = e; } - return rev_map; + return rev_map; } static -void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) { +void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) { cutset.clear(); - //dumpGraph("litgraph.dot", lg); + //dumpGraph("litgraph.dot", lg); - assert(!in_degree(lg.root, lg)); - assert(!out_degree(lg.sink, lg)); - size_t num_real_edges = num_edges(lg); + assert(!in_degree(lg.root, lg)); + assert(!out_degree(lg.sink, lg)); + size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. - vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg); + vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg); - const auto v_index_map = get(&LitGraphVertexProps::index, lg); - const auto e_index_map = get(&LitGraphEdgeProps::index, lg); + const auto v_index_map = get(&LitGraphVertexProps::index, lg); + const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); - auto colors = make_small_color_map(lg); + auto colors = make_small_color_map(lg); vector<s32> distances(num_verts); vector<LitEdge> predecessors(num_verts); - vector<u64a> residuals(num_edges(lg)); + vector<u64a> residuals(num_edges(lg)); UNUSED u64a flow = boykov_kolmogorov_max_flow(lg, get(&LitGraphEdgeProps::score, lg), - make_iterator_property_map(residuals.begin(), e_index_map), - make_iterator_property_map(rev_edges.begin(), e_index_map), + make_iterator_property_map(residuals.begin(), e_index_map), + make_iterator_property_map(rev_edges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), - colors, + colors, make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, lg.root, lg.sink); + v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); - /* remove reverse edges */ - remove_edge_if([&](const LitEdge &e) { - return lg[e].index >= num_real_edges; - }, lg); + /* remove reverse edges */ + remove_edge_if([&](const LitEdge &e) { + return lg[e].index >= num_real_edges; + }, lg); vector<LitEdge> white_cut, black_cut; u64a white_flow = 0, black_flow = 0; for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = get(colors, u); - const auto vcolor = get(colors, v); + const auto ucolor = get(colors, u); + const auto vcolor = get(colors, v); - DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, - describeColor(ucolor), lg[v].index, describeColor(vcolor), + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, + describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); - if (ucolor != small_color::white && vcolor == small_color::white) { - assert(v != lg.sink); + if (ucolor != small_color::white && vcolor == small_color::white) { + assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } - if (ucolor == small_color::black && vcolor != small_color::black) { - assert(v != lg.sink); + if (ucolor == small_color::black && vcolor != small_color::black) { + assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; } @@ -604,17 +604,17 @@ u64a compressAndScore(set<ue2_literal> &s) { LitGraph lg; for (const auto &lit : s) { - addReversedLiteral(lit, lg); + addReversedLiteral(lit, lg); } DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", num_vertices(lg), num_edges(lg)); vector<LitEdge> cutset; - findMinCut(lg, cutset); + findMinCut(lg, cutset); s.clear(); - extractLiterals(cutset, lg, s); + extractLiterals(cutset, lg, s); u64a score = scoreSet(s); DEBUG_PRINTF("compressed score is %llu\n", score); @@ -622,48 +622,48 @@ u64a compressAndScore(set<ue2_literal> &s) { return score; } -/* like compressAndScore, but replaces long mixed sensitivity literals with - * something weaker. */ -u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { - const size_t maxExploded = 8; // only case-explode this far - - /* TODO: the whole compression thing could be made better by systematically - * considering replacing literal sets not just by common suffixes but also - * by nocase literals. */ - - vector<ue2_literal> replacements; - - for (auto it = lits.begin(); it != lits.end();) { - auto jt = it; - ++it; - - if (!bad_mixed_sensitivity(*jt)) { - continue; - } - - /* we have to replace *jt with something... */ - ue2_literal s = *jt; - lits.erase(jt); - - vector<ue2_literal> exploded; - for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) { - exploded.emplace_back(*cit, false); - if (exploded.size() > maxExploded) { - goto dont_explode; - } - } - insert(&replacements, replacements.end(), exploded); - - continue; - dont_explode: - make_nocase(&s); - replacements.push_back(s); - } - - insert(&lits, replacements); - return compressAndScore(lits); -} - +/* like compressAndScore, but replaces long mixed sensitivity literals with + * something weaker. */ +u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { + const size_t maxExploded = 8; // only case-explode this far + + /* TODO: the whole compression thing could be made better by systematically + * considering replacing literal sets not just by common suffixes but also + * by nocase literals. */ + + vector<ue2_literal> replacements; + + for (auto it = lits.begin(); it != lits.end();) { + auto jt = it; + ++it; + + if (!bad_mixed_sensitivity(*jt)) { + continue; + } + + /* we have to replace *jt with something... */ + ue2_literal s = *jt; + lits.erase(jt); + + vector<ue2_literal> exploded; + for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) { + exploded.emplace_back(*cit, false); + if (exploded.size() > maxExploded) { + goto dont_explode; + } + } + insert(&replacements, replacements.end(), exploded); + + continue; + dont_explode: + make_nocase(&s); + replacements.push_back(s); + } + + insert(&lits, replacements); + return compressAndScore(lits); +} + u64a scoreSet(const set<ue2_literal> &s) { if (s.empty()) { return NO_LITERAL_AT_EDGE_SCORE; @@ -714,7 +714,7 @@ set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, return s; } -vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { +vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { assert(hasCorrectlyNumberedEdges(g)); vector<u64a> scores(num_edges(g)); @@ -722,43 +722,43 @@ vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { for (const auto &e : edges_range(g)) { u32 eidx = g[e].index; assert(eidx < scores.size()); - if (contains(known_bad, e)) { - scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; - } else { - set<ue2_literal> ls = getLiteralSet(g, e); - scores[eidx] = compressAndScore(ls); - } + if (contains(known_bad, e)) { + scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; + } else { + set<ue2_literal> ls = getLiteralSet(g, e); + scores[eidx] = compressAndScore(ls); + } } return scores; } -bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - DEBUG_PRINTF("looking for leading floating literal\n"); - set<NFAVertex> s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); +bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, + NGHolder *rhs) { + DEBUG_PRINTF("looking for leading floating literal\n"); + set<NFAVertex> s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); - set<NFAVertex> sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); + set<NFAVertex> sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); - bool floating = is_subset_of(s_succ, sds_succ); - if (!floating) { - DEBUG_PRINTF("not floating\n"); - return false; - } + bool floating = is_subset_of(s_succ, sds_succ); + if (!floating) { + DEBUG_PRINTF("not floating\n"); + return false; + } - sds_succ.erase(g.startDs); - if (sds_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } + sds_succ.erase(g.startDs); + if (sds_succ.size() != 1) { + DEBUG_PRINTF("branchy root\n"); + return false; + } - NFAVertex u = g.startDs; - NFAVertex v = *sds_succ.begin(); + NFAVertex u = g.startDs; + NFAVertex v = *sds_succ.begin(); while (true) { - DEBUG_PRINTF("validating vertex %zu\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod && v != g.accept); @@ -811,8 +811,8 @@ bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, } assert(u != g.startDs); - unordered_map<NFAVertex, NFAVertex> rhs_map; - vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g)); + unordered_map<NFAVertex, NFAVertex> rhs_map; + vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g)); splitRHS(g, pivots, rhs, &rhs_map); DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), @@ -849,49 +849,49 @@ bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { return true; } -bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { - NFAVertex v = g.accept; - - for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { - NGHolder::inv_adjacency_iterator ai, ae; - tie(ai, ae) = inv_adjacent_vertices(v, g); - if (ai == ae) { - assert(0); // no predecessors? - return false; - } - v = *ai++; - if (ai != ae) { - DEBUG_PRINTF("branch, fail\n"); - return false; - } - - if (is_special(v, g)) { - DEBUG_PRINTF("special found, fail\n"); - return false; - } - - const CharReach &cr_g = g[v].char_reach; - const CharReach &cr_l = *it; - - if (!cr_l.isSubsetOf(cr_g)) { - /* running over the prefix is needed to prevent false postives */ - DEBUG_PRINTF("reach fail\n"); - return false; - } - } - - // Our last value for v should have only start states for predecessors. - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_any_start(u, g)) { - DEBUG_PRINTF("pred is not start\n"); - return false; - } - } - - assert(num_vertices(g) == lit.length() + N_SPECIALS); - - DEBUG_PRINTF("ok\n"); - return true; -} - +bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { + NFAVertex v = g.accept; + + for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { + NGHolder::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(v, g); + if (ai == ae) { + assert(0); // no predecessors? + return false; + } + v = *ai++; + if (ai != ae) { + DEBUG_PRINTF("branch, fail\n"); + return false; + } + + if (is_special(v, g)) { + DEBUG_PRINTF("special found, fail\n"); + return false; + } + + const CharReach &cr_g = g[v].char_reach; + const CharReach &cr_l = *it; + + if (!cr_l.isSubsetOf(cr_g)) { + /* running over the prefix is needed to prevent false postives */ + DEBUG_PRINTF("reach fail\n"); + return false; + } + } + + // Our last value for v should have only start states for predecessors. + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_any_start(u, g)) { + DEBUG_PRINTF("pred is not start\n"); + return false; + } + } + + assert(num_vertices(g) == lit.length() + N_SPECIALS); + + DEBUG_PRINTF("ok\n"); + return true; +} + } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h index 6bb8755610..96206a352d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,7 @@ namespace ue2 { #define NO_LITERAL_AT_EDGE_SCORE 10000000ULL -#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */ +#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */ class NGHolder; @@ -57,20 +57,20 @@ std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, bool only_first_encounter = true); std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e); -/** - * Returns true if we are unable to use a mixed sensitivity literal in rose (as - * our literal matchers are generally either case sensitive or not). - * - * Shortish mixed sensitivity literals can be handled by confirm checks in rose - * and are not flagged as bad. - */ -bool bad_mixed_sensitivity(const ue2_literal &s); - -/** - * Score all the edges in the given graph, returning them in \p scores indexed +/** + * Returns true if we are unable to use a mixed sensitivity literal in rose (as + * our literal matchers are generally either case sensitive or not). + * + * Shortish mixed sensitivity literals can be handled by confirm checks in rose + * and are not flagged as bad. + */ +bool bad_mixed_sensitivity(const ue2_literal &s); + +/** + * Score all the edges in the given graph, returning them in \p scores indexed * by edge_index. */ -std::vector<u64a> scoreEdges(const NGHolder &h, - const flat_set<NFAEdge> &known_bad = {}); +std::vector<u64a> scoreEdges(const NGHolder &h, + const flat_set<NFAEdge> &known_bad = {}); /** Returns a score for a literal set. Lower scores are better. */ u64a scoreSet(const std::set<ue2_literal> &s); @@ -78,21 +78,21 @@ u64a scoreSet(const std::set<ue2_literal> &s); /** Compress a literal set to fewer literals. */ u64a compressAndScore(std::set<ue2_literal> &s); -/** - * Compress a literal set to fewer literals and replace any long mixed - * sensitivity literals with supported literals. - */ -u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s); - +/** + * Compress a literal set to fewer literals and replace any long mixed + * sensitivity literals with supported literals. + */ +u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s); + bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, NGHolder *rhs); bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); -/** \brief Returns true if the given literal is the only thing in the graph, - * from (start or startDs) to accept. */ -bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit); - +/** \brief Returns true if the given literal is the only thing in the graph, + * from (start or startDs) to accept. */ +bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp index 4d3965dfe2..6dbd8d263c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,30 +30,30 @@ * \brief Literal Component Splitting. Identifies literals that span the * graph and moves them into Rose. */ - -#include "ng_literal_component.h" - + +#include "ng_literal_component.h" + #include "grey.h" #include "ng.h" #include "ng_prune.h" #include "ng_util.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "rose/rose_build.h" #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/ue2string.h" -#include <unordered_set> - +#include <unordered_set> + using namespace std; namespace ue2 { static -bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, - bool &casefixed) { +bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, + bool &casefixed) { const CharReach &cr = g[v].char_reach; const size_t num = cr.count(); if (num > 2) { @@ -98,9 +98,9 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { } static -bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, +bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, set<NFAVertex> &dead) { - DEBUG_PRINTF("examine vertex %zu\n", g[v].index); + DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; assert(!is_special(v, g)); @@ -114,7 +114,7 @@ bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, assert(edge(g.start, v, g).second); assert(edge(g.startDs, v, g).second); } - if (in_degree(v, g) > reqInDegree) { + if (in_degree(v, g) > reqInDegree) { DEBUG_PRINTF("extra in-edges\n"); return false; } @@ -139,7 +139,7 @@ bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, u = v; // previous vertex v = *(adjacent_vertices(v, g).first); - DEBUG_PRINTF("loop, v=%zu\n", g[v].index); + DEBUG_PRINTF("loop, v=%zu\n", g[v].index); if (is_special(v, g)) { if (v == g.accept || v == g.acceptEod) { @@ -190,15 +190,15 @@ bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, } /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGHolder &g) { - if (!ng.cc.grey.allowLiteral) { +bool splitOffLiterals(NG &ng, NGHolder &g) { + if (!ng.cc.grey.allowLiteral) { return false; } bool changed = false; set<NFAVertex> dead; - unordered_set<NFAVertex> unanchored; // for faster lookup. + unordered_set<NFAVertex> unanchored; // for faster lookup. insert(&unanchored, adjacent_vertices(g.startDs, g)); // Anchored literals. diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h index 1f284ce367..cc7ea38807 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,10 @@ namespace ue2 { class NG; -class NGHolder; +class NGHolder; /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGHolder &g); +bool splitOffLiterals(NG &ng, NGHolder &g); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp index 61a31dbf34..fe46e60d55 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -75,7 +75,7 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) { read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %zu\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); if (v == g.start || v == g.startDs) { @@ -115,7 +115,7 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) { read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); built[g[u].index].clear(); built[g[u].index].shrink_to_fit(); } @@ -136,9 +136,9 @@ bool hasLargeDegreeVertex(const NGHolder &g) { if (is_special(v, g)) { // specials can have large degree continue; } - if (degree(v, g) > MAX_VERTEX_DEGREE) { - DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index, - degree(v, g)); + if (degree(v, g) > MAX_VERTEX_DEGREE) { + DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index, + degree(v, g)); return true; } } @@ -186,13 +186,13 @@ struct PathMask { } // Reports are attached to the second-to-last vertex. - NFAVertex u = *std::next(path.rbegin()); - reports = g[u].reports; + NFAVertex u = *std::next(path.rbegin()); + reports = g[u].reports; assert(!reports.empty()); } vector<CharReach> mask; - flat_set<ReportID> reports; + flat_set<ReportID> reports; bool is_anchored; bool is_eod; }; @@ -208,11 +208,11 @@ bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, return false; } - if (!hasNarrowReachVertex(g)) { - DEBUG_PRINTF("no narrow reach vertices\n"); - return false; - } - + if (!hasNarrowReachVertex(g)) { + DEBUG_PRINTF("no narrow reach vertices\n"); + return false; + } + if (hasLargeDegreeVertex(g)) { DEBUG_PRINTF("large degree\n"); return false; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp index 4ce5dc153b..5a821a99f6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp @@ -41,10 +41,10 @@ #include "ue2common.h" #include "util/bitfield.h" #include "util/determinise.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/hash.h" -#include "util/hash_dynamic_bitset.h" +#include "util/hash.h" +#include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" #include "util/report_manager.h" @@ -52,7 +52,7 @@ #include <functional> #include <map> #include <set> -#include <unordered_map> +#include <unordered_map> #include <vector> #include <boost/dynamic_bitset.hpp> @@ -154,11 +154,11 @@ void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state, template<typename stateset> static -void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, +void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, stateset *init, stateset *init_deep, vector<NFAVertex> *v_by_index) { for (auto v : vertices_range(g)) { - if (contains(unused, v)) { + if (contains(unused, v)) { continue; } @@ -175,11 +175,11 @@ void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, } v_by_index->clear(); - v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); + v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); for (auto v : vertices_range(g)) { u32 vert_id = g[v].index; - assert((*v_by_index)[vert_id] == NGHolder::null_vertex()); + assert((*v_by_index)[vert_id] == NGHolder::null_vertex()); (*v_by_index)[vert_id] = v; } @@ -189,22 +189,22 @@ void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, } template<typename StateSet> -void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused, +void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused, StateSet *accept, StateSet *acceptEod) { for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (contains(unused, v)) { - continue; + if (contains(unused, v)) { + continue; } - accept->set(g[v].index); + accept->set(g[v].index); } for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; } - if (contains(unused, v)) { - continue; + if (contains(unused, v)) { + continue; } - acceptEod->set(g[v].index); + acceptEod->set(g[v].index); } } @@ -284,8 +284,8 @@ static bool triggerAllowed(const NGHolder &g, const NFAVertex v, const vector<vector<CharReach> > &all_triggers, const vector<CharReach> &trigger) { - flat_set<NFAVertex> curr({v}); - flat_set<NFAVertex> next; + flat_set<NFAVertex> curr({v}); + flat_set<NFAVertex> next; for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) { next.clear(); @@ -315,7 +315,7 @@ bool triggerAllowed(const NGHolder &g, const NFAVertex v, return true; } -void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, +void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, bool single_trigger, const vector<vector<CharReach>> &triggers, dynamic_bitset<> *out) { @@ -324,13 +324,13 @@ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, } for (auto v : vertices_range(g)) { - if (contains(unused, v)) { + if (contains(unused, v)) { continue; } for (const auto &trigger : triggers) { if (triggerAllowed(g, v, triggers, trigger)) { - DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index); - out->set(g[v].index); + DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index); + out->set(g[v].index); break; } } @@ -341,27 +341,27 @@ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, namespace { -template<typename Automaton_Traits> -class Automaton_Base { +template<typename Automaton_Traits> +class Automaton_Base { public: - using StateSet = typename Automaton_Traits::StateSet; - using StateMap = typename Automaton_Traits::StateMap; - - Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, - bool single_trigger, - const vector<vector<CharReach>> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), - unused(getRedundantStarts(graph_in)), - init(Automaton_Traits::init_states(numStates)), - initDS(Automaton_Traits::init_states(numStates)), - squash(Automaton_Traits::init_states(numStates)), - accept(Automaton_Traits::init_states(numStates)), - acceptEod(Automaton_Traits::init_states(numStates)), - toppable(Automaton_Traits::init_states(numStates)), - dead(Automaton_Traits::init_states(numStates)), - prunable(prunable_in) { - populateInit(graph, unused, &init, &initDS, &v_by_index); - populateAccepts(graph, unused, &accept, &acceptEod); + using StateSet = typename Automaton_Traits::StateSet; + using StateMap = typename Automaton_Traits::StateMap; + + Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, + bool single_trigger, + const vector<vector<CharReach>> &triggers, bool prunable_in) + : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), + unused(getRedundantStarts(graph_in)), + init(Automaton_Traits::init_states(numStates)), + initDS(Automaton_Traits::init_states(numStates)), + squash(Automaton_Traits::init_states(numStates)), + accept(Automaton_Traits::init_states(numStates)), + acceptEod(Automaton_Traits::init_states(numStates)), + toppable(Automaton_Traits::init_states(numStates)), + dead(Automaton_Traits::init_states(numStates)), + prunable(prunable_in) { + populateInit(graph, unused, &init, &initDS, &v_by_index); + populateAccepts(graph, unused, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; if (initDS == init) { @@ -378,18 +378,18 @@ public: NFAVertex v = sq.first; u32 vert_id = graph[v].index; squash.set(vert_id); - squash_mask[vert_id] - = Automaton_Traits::copy_states(std::move(sq.second), - numStates); + squash_mask[vert_id] + = Automaton_Traits::copy_states(std::move(sq.second), + numStates); } cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - dynamic_bitset<> temp(numStates); - markToppableStarts(graph, unused, single_trigger, triggers, - &temp); - toppable = Automaton_Traits::copy_states(std::move(temp), - numStates); + dynamic_bitset<> temp(numStates); + markToppableStarts(graph, unused, single_trigger, triggers, + &temp); + toppable = Automaton_Traits::copy_states(std::move(temp), + numStates); } } @@ -399,7 +399,7 @@ public: } const vector<StateSet> initial() { - vector<StateSet> rv = {init}; + vector<StateSet> rv = {init}; if (start_floating != DEAD_STATE && start_floating != start_anchored) { rv.push_back(initDS); } @@ -432,13 +432,13 @@ public: } return allExternalReports(*rm, test_reports); } - + private: const ReportManager *rm; public: const NGHolder &graph; u32 numStates; - const flat_set<NFAVertex> unused; + const flat_set<NFAVertex> unused; vector<NFAVertex> v_by_index; vector<CharReach> cr_by_index; /* pre alpha'ed */ StateSet init; @@ -448,7 +448,7 @@ public: StateSet acceptEod; StateSet toppable; /* states which are allowed to be on when a top arrives, * triggered dfas only */ - StateSet dead; + StateSet dead; map<u32, StateSet> squash_mask; bool prunable; array<u16, ALPHABET_SIZE> alpha; @@ -459,83 +459,83 @@ public: u16 start_floating; }; -struct Big_Traits { - using StateSet = dynamic_bitset<>; - using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; +struct Big_Traits { + using StateSet = dynamic_bitset<>; + using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; - static StateSet init_states(u32 num) { - return StateSet(num); - } + static StateSet init_states(u32 num) { + return StateSet(num); + } - static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) { - assert(in.size() == num); - return in; - } -}; + static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) { + assert(in.size() == num); + return in; + } +}; -class Automaton_Big : public Automaton_Base<Big_Traits> { -public: - Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - bool single_trigger, - const vector<vector<CharReach>> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, single_trigger, triggers, - prunable_in) {} -}; +class Automaton_Big : public Automaton_Base<Big_Traits> { +public: + Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, + bool single_trigger, + const vector<vector<CharReach>> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, + prunable_in) {} +}; -struct Graph_Traits { - using StateSet = bitfield<NFA_STATE_LIMIT>; - using StateMap = unordered_map<StateSet, dstate_id_t>; +struct Graph_Traits { + using StateSet = bitfield<NFA_STATE_LIMIT>; + using StateMap = unordered_map<StateSet, dstate_id_t>; - static StateSet init_states(UNUSED u32 num) { - assert(num <= NFA_STATE_LIMIT); - return StateSet(); + static StateSet init_states(UNUSED u32 num) { + assert(num <= NFA_STATE_LIMIT); + return StateSet(); } - static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { - StateSet out = init_states(num); + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { + StateSet out = init_states(num); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); } return out; } -}; +}; -class Automaton_Graph : public Automaton_Base<Graph_Traits> { +class Automaton_Graph : public Automaton_Base<Graph_Traits> { public: - Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - bool single_trigger, - const vector<vector<CharReach>> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, single_trigger, triggers, - prunable_in) {} -}; - -} // namespace - -static -bool startIsRedundant(const NGHolder &g) { - set<NFAVertex> start; - set<NFAVertex> startDs; - - insert(&start, adjacent_vertices(g.start, g)); - insert(&startDs, adjacent_vertices(g.startDs, g)); - - return start == startDs; -} - -flat_set<NFAVertex> getRedundantStarts(const NGHolder &g) { - flat_set<NFAVertex> dead; - if (startIsRedundant(g)) { - dead.insert(g.start); - } - if (proper_out_degree(g.startDs, g) == 0) { - dead.insert(g.startDs); - } - return dead; -} - -unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, - const ReportManager *rm, bool single_trigger, + Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, + bool single_trigger, + const vector<vector<CharReach>> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, + prunable_in) {} +}; + +} // namespace + +static +bool startIsRedundant(const NGHolder &g) { + set<NFAVertex> start; + set<NFAVertex> startDs; + + insert(&start, adjacent_vertices(g.start, g)); + insert(&startDs, adjacent_vertices(g.startDs, g)); + + return start == startDs; +} + +flat_set<NFAVertex> getRedundantStarts(const NGHolder &g) { + flat_set<NFAVertex> dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + +unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, + const ReportManager *rm, bool single_trigger, const vector<vector<CharReach>> &triggers, const Grey &grey, bool finalChance) { if (!grey.allowMcClellan) { @@ -546,9 +546,9 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, to_string(graph.kind).c_str()); assert(allMatchStatesHaveReports(graph)); - bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); - assert(rm || !has_managed_reports(graph)); - if (!has_managed_reports(graph)) { + bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); + assert(rm || !has_managed_reports(graph)); + if (!has_managed_reports(graph)) { rm = nullptr; } @@ -563,18 +563,18 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, const u32 numStates = num_vertices(graph); DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); - if (numStates > FINAL_DFA_STATE_LIMIT) { - DEBUG_PRINTF("rejecting nfa as too many vertices\n"); - return nullptr; - } - - auto rdfa = ue2::make_unique<raw_dfa>(graph.kind); - + if (numStates > FINAL_DFA_STATE_LIMIT) { + DEBUG_PRINTF("rejecting nfa as too many vertices\n"); + return nullptr; + } + + auto rdfa = ue2::make_unique<raw_dfa>(graph.kind); + if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); - if (!determinise(n, rdfa->states, state_limit)) { + Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } @@ -585,8 +585,8 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, single_trigger, triggers, prunable); - if (!determinise(n, rdfa->states, state_limit)) { + Automaton_Big n(rm, graph, single_trigger, triggers, prunable); + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h index f069d7336f..a83e6b0b88 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "nfagraph/ng_holder.h" #include "util/charreach.h" #include "util/graph_range.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <boost/dynamic_bitset.hpp> @@ -63,25 +63,25 @@ void getFullTransitionFromState(const raw_dfa &n, u16 state, u16 *out_table); /** produce a map of states on which it is valid to receive tops */ -void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, +void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused, bool single_trigger, const std::vector<std::vector<CharReach>> &triggers, boost::dynamic_bitset<> *out); -/** - * \brief Returns a set of start vertices that will not participate in an - * implementation of this graph. These are either starts with no successors or - * starts which are redundant with startDs. - */ -flat_set<NFAVertex> getRedundantStarts(const NGHolder &g); - +/** + * \brief Returns a set of start vertices that will not participate in an + * implementation of this graph. These are either starts with no successors or + * starts which are redundant with startDs. + */ +flat_set<NFAVertex> getRedundantStarts(const NGHolder &g); + template<typename autom> void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId, const typename autom::StateSet &in, typename autom::StateSet *next) { typedef typename autom::StateSet StateSet; const NGHolder &graph = nfa.graph; - const auto &unused = nfa.unused; + const auto &unused = nfa.unused; const auto &alpha = nfa.alpha; const StateSet &squash = nfa.squash; const std::map<u32, StateSet> &squash_mask = nfa.squash_mask; @@ -99,7 +99,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId, NFAVertex u = vByStateId[i]; for (const auto &v : adjacent_vertices_range(u, graph)) { - if (contains(unused, v)) { + if (contains(unused, v)) { continue; } succ.set(graph[v].index); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp index 8aaaf99fde..e51307d296 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,20 +69,20 @@ #include "util/charreach.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" -#include "util/flat_containers.h" +#include "util/graph_small_color_map.h" +#include "util/flat_containers.h" #include "ue2common.h" -#include <boost/dynamic_bitset.hpp> -#include <boost/graph/depth_first_search.hpp> -#include <boost/graph/filtered_graph.hpp> - +#include <boost/dynamic_bitset.hpp> +#include <boost/graph/depth_first_search.hpp> +#include <boost/graph/filtered_graph.hpp> + #include <map> #include <set> #include <vector> using namespace std; -using boost::make_filtered_graph; +using boost::make_filtered_graph; namespace ue2 { @@ -101,8 +101,8 @@ void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - auto succ_v = succs(v, g); - flat_set<NFAVertex> succ_u; + auto succ_v = succs(v, g); + flat_set<NFAVertex> succ_u; for (auto u : inv_adjacent_vertices_range(v, g)) { succ_u.clear(); @@ -111,7 +111,7 @@ void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -132,8 +132,8 @@ void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - auto pred_v = preds(v, g); - flat_set<NFAVertex> pred_u; + auto pred_v = preds(v, g); + flat_set<NFAVertex> pred_u; for (auto u : adjacent_vertices_range(v, g)) { pred_u.clear(); @@ -142,7 +142,7 @@ void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -179,7 +179,7 @@ void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { static set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p, bool ignore_starts, const CharReach &new_cr) { - auto cand = preds<set<NFAVertex>>(p, g); + auto cand = preds<set<NFAVertex>>(p, g); if (ignore_starts) { cand.erase(g.startDs); } @@ -215,7 +215,7 @@ set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p, static set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p, const CharReach &new_cr) { - auto cand = succs<set<NFAVertex>>(p, g); + auto cand = succs<set<NFAVertex>>(p, g); /* remove elements from cand until the sustain set property holds */ bool changed; do { @@ -245,7 +245,7 @@ set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p, static bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { - DEBUG_PRINTF("considering vertex %zu\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -264,7 +264,7 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at pred %zu\n", g[p].index); + DEBUG_PRINTF("looking at pred %zu\n", g[p].index); bool ignore_sds = som; /* if we are tracking som, entries into a state from sds are significant. */ @@ -294,13 +294,13 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } static bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { - DEBUG_PRINTF("considering vertex %zu\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -319,7 +319,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at succ %zu\n", g[p].index); + DEBUG_PRINTF("looking at succ %zu\n", g[p].index); set<NFAVertex> sustain = findSustainSet_rev(g, p, add); DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); @@ -344,7 +344,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } @@ -393,7 +393,7 @@ bool improveGraph(NGHolder &g, som_type som) { * enlargeCyclicCR. */ CharReach reduced_cr(NFAVertex v, const NGHolder &g, const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) { - DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index); + DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index); CharReach v_cr = g[v].char_reach; if (proper_in_degree(v, g) != 1) { return v_cr; @@ -551,178 +551,178 @@ bool mergeCyclicDotStars(NGHolder &g) { return true; } -struct PrunePathsInfo { - explicit PrunePathsInfo(const NGHolder &g) - : color_map(make_small_color_map(g)), bad(num_vertices(g)) {} - - void clear() { - no_explore.clear(); - color_map.fill(small_color::white); - bad.reset(); - } - - flat_set<NFAEdge> no_explore; - using color_map_type = decltype(make_small_color_map(NGHolder())); - color_map_type color_map; - boost::dynamic_bitset<> bad; -}; - -/** - * Finds the set of vertices that cannot be on if v is not on, setting their - * indices in bitset PrunePathsInfo::bad. - */ -static -void findDependentVertices(const NGHolder &g, PrunePathsInfo &info, - NFAVertex v) { - /* We need to exclude any vertex that may be reached on a path which is - * incompatible with the vertex v being on. */ - - /* A vertex u is bad if: - * 1) its reach may be incompatible with v (not a subset) - * 2) it if there is an edge from a bad vertex b and there is either not an - * edge v->u or not an edge b->v. - * Note: 2) means v is never bad as it has a selfloop - * - * Can do this with a DFS from all the initial bad states with a conditional - * check down edges. Alternately can just filter these edges out of the - * graph first. - */ - for (NFAVertex t : adjacent_vertices_range(v, g)) { - for (NFAEdge e : in_edges_range(t, g)) { - NFAVertex s = source(e, g); - if (edge(s, v, g).second) { - info.no_explore.insert(e); - } - } - } - - auto filtered_g = - make_filtered_graph(g, make_bad_edge_filter(&info.no_explore)); - - // We use a bitset to track bad vertices, rather than filling a (potentially - // very large) set structure. - auto recorder = make_vertex_index_bitset_recorder(info.bad); - - for (NFAVertex b : vertices_range(g)) { - if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { - continue; - } - boost::depth_first_visit(filtered_g, b, recorder, info.color_map); - } -} - -static -bool willBeEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, - const NGHolder &g) { - return is_subset_of(preds(main_cyclic, g), preds(v, g)); -} - -static -bool sometimesEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, - const NGHolder &g) { - return has_intersection(preds(main_cyclic, g), preds(v, g)); -} - -static -bool pruneUsingSuccessors(NGHolder &g, PrunePathsInfo &info, NFAVertex u, - som_type som) { - if (som && (is_virtual_start(u, g) || u == g.startDs)) { - return false; - } - - bool changed = false; - DEBUG_PRINTF("using cyclic %zu as base\n", g[u].index); - info.clear(); - findDependentVertices(g, info, u); - vector<NFAVertex> u_succs; - for (NFAVertex v : adjacent_vertices_range(u, g)) { - if (som && is_virtual_start(v, g)) { - /* as v is virtual start, its som has been reset so can not override - * existing in progress matches. */ - continue; - } - u_succs.push_back(v); - } - - stable_sort(u_succs.begin(), u_succs.end(), - [&](NFAVertex a, NFAVertex b) { - return g[a].char_reach.count() > g[b].char_reach.count(); - }); - - flat_set<NFAEdge> dead; - - for (NFAVertex v : u_succs) { - DEBUG_PRINTF(" using %zu as killer\n", g[v].index); - /* Need to distinguish between vertices that are switched on after the - * cyclic vs vertices that are switched on concurrently with the cyclic - * if (subject to a suitable reach) */ - bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); - for (NFAVertex s : adjacent_vertices_range(v, g)) { - DEBUG_PRINTF(" looking at preds of %zu\n", g[s].index); - for (NFAEdge e : in_edges_range(s, g)) { - NFAVertex p = source(e, g); - if (info.bad.test(g[p].index) || p == v || p == u - || p == g.accept) { - DEBUG_PRINTF("%zu not a cand\n", g[p].index); - continue; - } - if (is_any_accept(s, g) && g[p].reports != g[v].reports) { - DEBUG_PRINTF("%zu bad reports\n", g[p].index); - continue; - } - /* the out-edges of a vertex that may be enabled on the same - * byte as the cyclic can only be killed by the out-edges of a - * peer vertex which will be enabled with the cyclic (a non-peer - * may not be switched on until another byte is processed). */ - if (!v_peer_of_cyclic - && sometimesEnabledConcurrently(u, p, g)) { - DEBUG_PRINTF("%zu can only be squashed by a proper peer\n", - g[p].index); - continue; - } - - if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { - dead.insert(e); - changed = true; - DEBUG_PRINTF("removing edge %zu->%zu\n", g[p].index, - g[s].index); - } else if (is_subset_of(succs(p, g), succs(u, g))) { - if (is_match_vertex(p, g) - && !is_subset_of(g[p].reports, g[v].reports)) { - continue; - } - DEBUG_PRINTF("updating reach on %zu\n", g[p].index); - changed |= (g[p].char_reach & g[v].char_reach).any(); - g[p].char_reach &= ~g[v].char_reach; - } - - } - } - remove_edges(dead, g); - dead.clear(); - } - - DEBUG_PRINTF("changed %d\n", (int)changed); - return changed; -} - -bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { - /* TODO: the reverse form of this is also possible */ - bool changed = false; - PrunePathsInfo info(g); - - for (NFAVertex v : vertices_range(g)) { - if (hasSelfLoop(v, g) && g[v].char_reach.all()) { - changed |= pruneUsingSuccessors(g, info, v, som); - } - } - - if (changed) { - pruneUseless(g); - clearReports(g); - } - - return changed; -} - +struct PrunePathsInfo { + explicit PrunePathsInfo(const NGHolder &g) + : color_map(make_small_color_map(g)), bad(num_vertices(g)) {} + + void clear() { + no_explore.clear(); + color_map.fill(small_color::white); + bad.reset(); + } + + flat_set<NFAEdge> no_explore; + using color_map_type = decltype(make_small_color_map(NGHolder())); + color_map_type color_map; + boost::dynamic_bitset<> bad; +}; + +/** + * Finds the set of vertices that cannot be on if v is not on, setting their + * indices in bitset PrunePathsInfo::bad. + */ +static +void findDependentVertices(const NGHolder &g, PrunePathsInfo &info, + NFAVertex v) { + /* We need to exclude any vertex that may be reached on a path which is + * incompatible with the vertex v being on. */ + + /* A vertex u is bad if: + * 1) its reach may be incompatible with v (not a subset) + * 2) it if there is an edge from a bad vertex b and there is either not an + * edge v->u or not an edge b->v. + * Note: 2) means v is never bad as it has a selfloop + * + * Can do this with a DFS from all the initial bad states with a conditional + * check down edges. Alternately can just filter these edges out of the + * graph first. + */ + for (NFAVertex t : adjacent_vertices_range(v, g)) { + for (NFAEdge e : in_edges_range(t, g)) { + NFAVertex s = source(e, g); + if (edge(s, v, g).second) { + info.no_explore.insert(e); + } + } + } + + auto filtered_g = + make_filtered_graph(g, make_bad_edge_filter(&info.no_explore)); + + // We use a bitset to track bad vertices, rather than filling a (potentially + // very large) set structure. + auto recorder = make_vertex_index_bitset_recorder(info.bad); + + for (NFAVertex b : vertices_range(g)) { + if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { + continue; + } + boost::depth_first_visit(filtered_g, b, recorder, info.color_map); + } +} + +static +bool willBeEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return is_subset_of(preds(main_cyclic, g), preds(v, g)); +} + +static +bool sometimesEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return has_intersection(preds(main_cyclic, g), preds(v, g)); +} + +static +bool pruneUsingSuccessors(NGHolder &g, PrunePathsInfo &info, NFAVertex u, + som_type som) { + if (som && (is_virtual_start(u, g) || u == g.startDs)) { + return false; + } + + bool changed = false; + DEBUG_PRINTF("using cyclic %zu as base\n", g[u].index); + info.clear(); + findDependentVertices(g, info, u); + vector<NFAVertex> u_succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (som && is_virtual_start(v, g)) { + /* as v is virtual start, its som has been reset so can not override + * existing in progress matches. */ + continue; + } + u_succs.push_back(v); + } + + stable_sort(u_succs.begin(), u_succs.end(), + [&](NFAVertex a, NFAVertex b) { + return g[a].char_reach.count() > g[b].char_reach.count(); + }); + + flat_set<NFAEdge> dead; + + for (NFAVertex v : u_succs) { + DEBUG_PRINTF(" using %zu as killer\n", g[v].index); + /* Need to distinguish between vertices that are switched on after the + * cyclic vs vertices that are switched on concurrently with the cyclic + * if (subject to a suitable reach) */ + bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); + for (NFAVertex s : adjacent_vertices_range(v, g)) { + DEBUG_PRINTF(" looking at preds of %zu\n", g[s].index); + for (NFAEdge e : in_edges_range(s, g)) { + NFAVertex p = source(e, g); + if (info.bad.test(g[p].index) || p == v || p == u + || p == g.accept) { + DEBUG_PRINTF("%zu not a cand\n", g[p].index); + continue; + } + if (is_any_accept(s, g) && g[p].reports != g[v].reports) { + DEBUG_PRINTF("%zu bad reports\n", g[p].index); + continue; + } + /* the out-edges of a vertex that may be enabled on the same + * byte as the cyclic can only be killed by the out-edges of a + * peer vertex which will be enabled with the cyclic (a non-peer + * may not be switched on until another byte is processed). */ + if (!v_peer_of_cyclic + && sometimesEnabledConcurrently(u, p, g)) { + DEBUG_PRINTF("%zu can only be squashed by a proper peer\n", + g[p].index); + continue; + } + + if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { + dead.insert(e); + changed = true; + DEBUG_PRINTF("removing edge %zu->%zu\n", g[p].index, + g[s].index); + } else if (is_subset_of(succs(p, g), succs(u, g))) { + if (is_match_vertex(p, g) + && !is_subset_of(g[p].reports, g[v].reports)) { + continue; + } + DEBUG_PRINTF("updating reach on %zu\n", g[p].index); + changed |= (g[p].char_reach & g[v].char_reach).any(); + g[p].char_reach &= ~g[v].char_reach; + } + + } + } + remove_edges(dead, g); + dead.clear(); + } + + DEBUG_PRINTF("changed %d\n", (int)changed); + return changed; +} + +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { + /* TODO: the reverse form of this is also possible */ + bool changed = false; + PrunePathsInfo info(g); + + for (NFAVertex v : vertices_range(g)) { + if (hasSelfLoop(v, g) && g[v].char_reach.all()) { + changed |= pruneUsingSuccessors(g, info, v, som); + } + } + + if (changed) { + pruneUseless(g); + clearReports(g); + } + + return changed; +} + } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h index 5ed089dc05..9d89a87ede 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,8 +60,8 @@ struct BoundedRepeatSummary { bool improveGraph(NGHolder &g, som_type som); /** Sometimes the reach of a vertex is greater than it needs to be to reduce - * stop chars for the benefit of the rest of our code base (accel, etc). In - * these circumstances, we can treat the reach as the smaller one as + * stop chars for the benefit of the rest of our code base (accel, etc). In + * these circumstances, we can treat the reach as the smaller one as * the graphs are equivalent. */ CharReach reduced_cr(NFAVertex v, const NGHolder &g, const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic); @@ -72,13 +72,13 @@ std::vector<CharReach> reduced_cr(const NGHolder &g, /** Remove cyclic stars connected to start */ bool mergeCyclicDotStars(NGHolder &g); -/** - * Given a cyclic state 'c' with a broad reach and a later state 'v' that is - * only reachable if c is still on, then any edges to a successor of a direct - * successor of c with reach a superset of v are redundant. - */ -bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); - +/** + * Given a cyclic state 'c' with a broad reach and a later state 'v' that is + * only reachable if c is still on, then any edges to a successor of a direct + * successor of c with reach a superset of v are redundant. + */ +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp index 780a319f5d..a3631f6d85 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "ue2common.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include <algorithm> #include <boost/graph/boykov_kolmogorov_max_flow.hpp> @@ -93,7 +93,7 @@ void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge, if (it == allEdges.end()) { // No reverse edge, add one. NFAVertex u = source(fwd, g), v = target(fwd, g); - NFAEdge rev = add_edge(v, u, g); + NFAEdge rev = add_edge(v, u, g); it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; // Add to capacity map. u32 revIndex = g[rev].index; @@ -112,14 +112,14 @@ static void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) { remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g); capacityMap.resize(idx); - renumber_edges(g); + renumber_edges(g); } /** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and * colour map (from which we can find the min cut). */ static u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in, - decltype(make_small_color_map(NGHolder())) &colorMap) { + decltype(make_small_color_map(NGHolder())) &colorMap) { vector<u64a> capacityMap = capacityMap_in; NFAVertex src = h.start; NFAVertex sink = h.acceptEod; @@ -143,22 +143,22 @@ u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in, vector<NFAEdge> predecessors(numVertices); vector<s32> distances(numVertices); - auto v_index_map = get(vertex_index, h); - auto e_index_map = get(edge_index, h); + auto v_index_map = get(vertex_index, h); + auto e_index_map = get(edge_index, h); - u64a flow = boykov_kolmogorov_max_flow(h, + u64a flow = boykov_kolmogorov_max_flow(h, make_iterator_property_map(capacityMap.begin(), e_index_map), make_iterator_property_map(edgeResiduals.begin(), e_index_map), make_iterator_property_map(reverseEdges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), - colorMap, + colorMap, make_iterator_property_map(distances.begin(), v_index_map), v_index_map, src, sink); // Remove reverse edges from graph. removeEdgesFromIndex(h, capacityMap, numRealEdges); - assert(num_edges(h) == numRealEdges); + assert(num_edges(h) == numRealEdges); DEBUG_PRINTF("flow = %llu\n", flow); return flow; @@ -169,8 +169,8 @@ vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) { assert(hasCorrectlyNumberedEdges(h)); assert(hasCorrectlyNumberedVertices(h)); - auto colors = make_small_color_map(h); - u64a flow = getMaxFlow(h, scores, colors); + auto colors = make_small_color_map(h); + u64a flow = getMaxFlow(h, scores, colors); vector<NFAEdge> picked_white; vector<NFAEdge> picked_black; @@ -185,19 +185,19 @@ vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) { continue; // skips, among other things, reverse edges } - auto fromColor = get(colors, from); - auto toColor = get(colors, to); + auto fromColor = get(colors, from); + auto toColor = get(colors, to); - if (fromColor != small_color::white && toColor == small_color::white) { + if (fromColor != small_color::white && toColor == small_color::white) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", + DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_white_flow += ec; picked_white.push_back(e); } - if (fromColor == small_color::black && toColor != small_color::black) { + if (fromColor == small_color::black && toColor != small_color::black) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", + DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_black_flow += ec; picked_black.push_back(e); @@ -206,7 +206,7 @@ vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) { DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow, observed_black_flow, observed_white_flow); - if (min(observed_white_flow, observed_black_flow) != flow) { + if (min(observed_white_flow, observed_black_flow) != flow) { DEBUG_PRINTF("bad cut\n"); } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp index 04611872a4..849fa09ad4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Prefilter Reductions. * * This file contains routines for reducing the size of an NFA graph that we @@ -58,8 +58,8 @@ #include "util/graph_range.h" #include <queue> -#include <unordered_map> -#include <unordered_set> +#include <unordered_map> +#include <unordered_set> #include <boost/range/adaptor/map.hpp> @@ -82,10 +82,10 @@ static const size_t BOUNDED_REPEAT_COUNT = 4; /** Scoring penalty for boundary regions. */ static const size_t PENALTY_BOUNDARY = 32; -/** Regions with max bounds greater than this value will have their max bound - * replaced with inf. */ -static const size_t MAX_REPLACE_BOUND = 10000; - +/** Regions with max bounds greater than this value will have their max bound + * replaced with inf. */ +static const size_t MAX_REPLACE_BOUND = 10000; + namespace { /** Information describing a region. */ @@ -94,13 +94,13 @@ struct RegionInfo { u32 id; //!< region id deque<NFAVertex> vertices; //!< vertices in the region CharReach reach; //!< union of region reach - depth minWidth{0}; //!< min width of region subgraph - depth maxWidth{depth::infinity()}; //!< max width of region subgraph + depth minWidth{0}; //!< min width of region subgraph + depth maxWidth{depth::infinity()}; //!< max width of region subgraph bool atBoundary = false; //!< region is next to an accept // Bigger score is better. size_t score() const { - // TODO: charreach should be a signal? + // TODO: charreach should be a signal? size_t numVertices = vertices.size(); if (atBoundary) { return numVertices - min(PENALTY_BOUNDARY, numVertices); @@ -128,16 +128,16 @@ struct RegionInfoQueueComp { static void findWidths(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, RegionInfo &ri) { NGHolder rg; - unordered_map<NFAVertex, NFAVertex> mapping; + unordered_map<NFAVertex, NFAVertex> mapping; fillHolder(&rg, g, ri.vertices, &mapping); // Wire our entries to start and our exits to accept. for (auto v : ri.vertices) { NFAVertex v_new = mapping[v]; - assert(v_new != NGHolder::null_vertex()); + assert(v_new != NGHolder::null_vertex()); if (isRegionEntry(g, v, region_map) && !edge(rg.start, v_new, rg).second) { @@ -156,7 +156,7 @@ void findWidths(const NGHolder &g, // acc can be either h.accept or h.acceptEod. static void markBoundaryRegions(const NGHolder &h, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, map<u32, RegionInfo> ®ions, NFAVertex acc) { for (auto v : inv_adjacent_vertices_range(acc, h)) { if (is_special(v, h)) { @@ -164,7 +164,7 @@ void markBoundaryRegions(const NGHolder &h, } u32 id = region_map.at(v); - auto ri = regions.find(id); + auto ri = regions.find(id); if (ri == regions.end()) { continue; // Not tracking this region as it's too small. } @@ -175,21 +175,21 @@ void markBoundaryRegions(const NGHolder &h, static map<u32, RegionInfo> findRegionInfo(const NGHolder &h, - const unordered_map<NFAVertex, u32> ®ion_map) { + const unordered_map<NFAVertex, u32> ®ion_map) { map<u32, RegionInfo> regions; for (auto v : vertices_range(h)) { if (is_special(v, h)) { continue; } u32 id = region_map.at(v); - RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second; + RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second; ri.vertices.push_back(v); ri.reach |= h[v].char_reach; } // There's no point tracking more information about regions that we won't // consider replacing, so we remove them from the region map. - for (auto it = regions.begin(); it != regions.end();) { + for (auto it = regions.begin(); it != regions.end();) { if (it->second.vertices.size() < MIN_REPLACE_VERTICES) { regions.erase(it++); } else { @@ -214,15 +214,15 @@ map<u32, RegionInfo> findRegionInfo(const NGHolder &h, } static -void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) { +void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) { for (const auto &e : in_edges_range(from, g)) { NFAVertex u = source(e, g); - add_edge_if_not_present(u, to, g[e], g); + add_edge_if_not_present(u, to, g[e], g); } } static -void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) { +void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) { for (const auto &e : out_edges_range(from, g)) { NFAVertex t = target(e, g); add_edge_if_not_present(to, t, g[e], g); @@ -235,48 +235,48 @@ void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) { } static -void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) { - // Set of vertices in region, for quick lookups. - const unordered_set<NFAVertex> rverts(ri.vertices.begin(), - ri.vertices.end()); - - auto is_interior_in_edge = [&](const NFAEdge &e) { - return contains(rverts, source(e, g)); - }; - - for (auto v : ri.vertices) { - remove_in_edge_if(v, is_interior_in_edge, g); - } -} - -static +void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) { + // Set of vertices in region, for quick lookups. + const unordered_set<NFAVertex> rverts(ri.vertices.begin(), + ri.vertices.end()); + + auto is_interior_in_edge = [&](const NFAEdge &e) { + return contains(rverts, source(e, g)); + }; + + for (auto v : ri.vertices) { + remove_in_edge_if(v, is_interior_in_edge, g); + } +} + +static void replaceRegion(NGHolder &g, const RegionInfo &ri, size_t *verticesAdded, size_t *verticesRemoved) { // TODO: more complex replacements. assert(ri.vertices.size() >= MIN_REPLACE_VERTICES); assert(ri.minWidth.is_finite()); - depth minWidth = ri.minWidth; - depth maxWidth = ri.maxWidth; - - if (maxWidth > depth(MAX_REPLACE_BOUND)) { - DEBUG_PRINTF("using inf instead of large bound %s\n", - maxWidth.str().c_str()); - maxWidth = depth::infinity(); - } - + depth minWidth = ri.minWidth; + depth maxWidth = ri.maxWidth; + + if (maxWidth > depth(MAX_REPLACE_BOUND)) { + DEBUG_PRINTF("using inf instead of large bound %s\n", + maxWidth.str().c_str()); + maxWidth = depth::infinity(); + } + size_t replacementSize; - if (minWidth == maxWidth || maxWidth.is_infinite()) { - replacementSize = minWidth; // {N} or {N,} + if (minWidth == maxWidth || maxWidth.is_infinite()) { + replacementSize = minWidth; // {N} or {N,} } else { - replacementSize = maxWidth; // {N,M} case + replacementSize = maxWidth; // {N,M} case } DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(), replacementSize); - vector<NFAVertex> verts; - verts.reserve(replacementSize); + vector<NFAVertex> verts; + verts.reserve(replacementSize); for (size_t i = 0; i < replacementSize; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = ri.reach; @@ -286,21 +286,21 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, verts.push_back(v); } - if (maxWidth.is_infinite()) { + if (maxWidth.is_infinite()) { add_edge(verts.back(), verts.back(), g); } - removeInteriorEdges(g, ri); + removeInteriorEdges(g, ri); for (size_t i = 0; i < replacementSize; i++) { NFAVertex v_new = verts[i]; for (auto v_old : ri.vertices) { if (i == 0) { - copyInEdges(g, v_old, v_new); + copyInEdges(g, v_old, v_new); } if (i + 1 >= ri.minWidth) { - copyOutEdges(g, v_old, v_new); + copyOutEdges(g, v_old, v_new); } } } @@ -360,7 +360,7 @@ void reduceRegions(NGHolder &h) { // We may have vertices that have edges to both accept and acceptEod: in // this case, we can optimize for performance by removing the acceptEod // edges. - remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h); + remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h); } void prefilterReductions(NGHolder &h, const CompileContext &cc) { @@ -374,20 +374,20 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { return; } - DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", - num_vertices(h), num_edges(h)); + DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); - renumber_vertices(h); - renumber_edges(h); + renumber_vertices(h); + renumber_edges(h); reduceRegions(h); - renumber_vertices(h); - renumber_edges(h); - - DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", - num_vertices(h), num_edges(h)); - + renumber_vertices(h); + renumber_edges(h); + + DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); + } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp index adda70312f..88b499950b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include "util/report_manager.h" #include <deque> @@ -58,8 +58,8 @@ namespace ue2 { void pruneUnreachable(NGHolder &g) { deque<NFAVertex> dead; - if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) - && edge(g.accept, g.acceptEod, g).second) { + if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) + && edge(g.accept, g.acceptEod, g).second) { // Trivial case: there are no in-edges to our accepts (other than // accept->acceptEod), so all non-specials are unreachable. for (auto v : vertices_range(g)) { @@ -70,10 +70,10 @@ void pruneUnreachable(NGHolder &g) { } else { // Walk a reverse graph from acceptEod with Boost's depth_first_visit // call. - typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph; - RevNFAGraph revg(g); + typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph; + RevNFAGraph revg(g); - map<RevNFAGraph::vertex_descriptor, default_color_type> colours; + map<RevNFAGraph::vertex_descriptor, default_color_type> colours; depth_first_visit(revg, g.acceptEod, make_dfs_visitor(boost::null_visitor()), @@ -104,23 +104,23 @@ void pruneUnreachable(NGHolder &g) { template<class nfag_t> static -bool pruneForwardUseless(NGHolder &h, const nfag_t &g, - typename nfag_t::vertex_descriptor s, - decltype(make_small_color_map(NGHolder())) &colors) { +bool pruneForwardUseless(NGHolder &h, const nfag_t &g, + typename nfag_t::vertex_descriptor s, + decltype(make_small_color_map(NGHolder())) &colors) { // Begin with all vertices set to white, as DFV only marks visited // vertices. - colors.fill(small_color::white); + colors.fill(small_color::white); - depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors); + depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors); vector<NFAVertex> dead; // All non-special vertices that are still white can be removed. for (auto v : vertices_range(g)) { - if (!is_special(v, g) && get(colors, v) == small_color::white) { - DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", + if (!is_special(v, g) && get(colors, v) == small_color::white) { + DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", g[v].index, g[s].index); - dead.push_back(NFAVertex(v)); + dead.push_back(NFAVertex(v)); } } @@ -139,19 +139,19 @@ bool pruneForwardUseless(NGHolder &h, const nfag_t &g, void pruneUseless(NGHolder &g, bool renumber) { DEBUG_PRINTF("pruning useless vertices\n"); assert(hasCorrectlyNumberedVertices(g)); - auto colors = make_small_color_map(g); + auto colors = make_small_color_map(g); - bool work_done = pruneForwardUseless(g, g, g.start, colors); - work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g), - g.acceptEod, colors); + bool work_done = pruneForwardUseless(g, g, g.start, colors); + work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g), + g.acceptEod, colors); if (!work_done) { return; } if (renumber) { - renumber_edges(g); - renumber_vertices(g); + renumber_edges(g); + renumber_vertices(g); } } @@ -168,7 +168,7 @@ void pruneEmptyVertices(NGHolder &g) { const CharReach &cr = g[v].char_reach; if (cr.none()) { - DEBUG_PRINTF("empty: %zu\n", g[v].index); + DEBUG_PRINTF("empty: %zu\n", g[v].index); dead.push_back(v); } } @@ -223,14 +223,14 @@ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { static bool isDominatedByReporter(const NGHolder &g, - const unordered_map<NFAVertex, NFAVertex> &dom, + const unordered_map<NFAVertex, NFAVertex> &dom, NFAVertex v, ReportID report_id) { for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) { NFAVertex u = it->second; // Note: reporters with edges only to acceptEod are not considered to // dominate. if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { - DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n", + DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n", g[v].index, g[u].index, report_id); return true; } @@ -292,7 +292,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { } - sort(begin(reporters), end(reporters)); + sort(begin(reporters), end(reporters)); reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", @@ -311,14 +311,14 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { continue; } if (isDominatedByReporter(g, dom, v, report_id)) { - DEBUG_PRINTF("removed dominated report %u from vertex %zu\n", + DEBUG_PRINTF("removed dominated report %u from vertex %zu\n", report_id, g[v].index); g[v].reports.erase(report_id); } } if (g[v].reports.empty()) { - DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n", + DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n", g[v].index); remove_edge(v, g.accept, g); remove_edge(v, g.acceptEod, g); @@ -333,7 +333,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { remove_edge(v, v, g); modified = true; - DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index); + DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index); } } @@ -345,7 +345,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { // We may have only removed self-loops, in which case pruneUseless wouldn't // renumber, so we do edge renumbering explicitly here. - renumber_edges(g); + renumber_edges(g); } /** Removes the given Report ID from vertices connected to accept, and then @@ -384,8 +384,8 @@ void pruneReport(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - renumber_vertices(g); - renumber_edges(g); + renumber_vertices(g); + renumber_edges(g); } /** Removes all Report IDs bar the given one from vertices connected to accept, @@ -427,8 +427,8 @@ void pruneAllOtherReports(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - renumber_vertices(g); - renumber_edges(g); + renumber_vertices(g); + renumber_edges(g); } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp index 984518b0fc..76996b6da8 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -59,7 +59,7 @@ static size_t countChain(const NGHolder &g, NFAVertex v) { size_t count = 0; while (v) { - DEBUG_PRINTF("counting vertex %zu\n", g[v].index); + DEBUG_PRINTF("counting vertex %zu\n", g[v].index); if (is_special(v, g)) { break; } @@ -79,7 +79,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, continue; } - DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index); + DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index); assert(!edge(u, g.accept, g).second); assert(!edge(u, g.acceptEod, g).second); add_edge(u, g.accept, g); @@ -94,7 +94,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, static bool isFixedDepth(const NGHolder &g, NFAVertex v) { // If the vertex is reachable from startDs, it can't be fixed depth. - auto depthFromStartDs = calcDepthsFrom(g, g.startDs); + auto depthFromStartDs = calcDepthsFrom(g, g.startDs); u32 idx = g[v].index; const DepthMinMax &ds = depthFromStartDs.at(idx); @@ -103,7 +103,7 @@ bool isFixedDepth(const NGHolder &g, NFAVertex v) { return false; } - auto depthFromStart = calcDepthsFrom(g, g.start); + auto depthFromStart = calcDepthsFrom(g, g.start); /* we can still consider the head of a puff chain as at fixed depth if * it has a self-loop: so we look at all the preds of v (other than v @@ -134,13 +134,13 @@ bool singleStart(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %zu\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } for (auto v : adjacent_vertices_range(g.startDs, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %zu\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } @@ -156,7 +156,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { for (auto u : inv_adjacent_vertices_range(head, g)) { if (!g[u].char_reach.isSubsetOf(puff_escapes)) { - DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index, + DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index, g[head].index); return false; } @@ -170,7 +170,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { * */ static bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { - DEBUG_PRINTF("head = %zu\n", g[head].index); + DEBUG_PRINTF("head = %zu\n", g[head].index); const CharReach &puff_cr = g[head].char_reach; @@ -184,14 +184,14 @@ bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { if (proper_in_degree(head, g) == 1 && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { head = getSoleSourceVertex(g, head); - DEBUG_PRINTF("temp new head = %zu\n", g[head].index); + DEBUG_PRINTF("temp new head = %zu\n", g[head].index); } for (auto s : inv_adjacent_vertices_range(head, g)) { - DEBUG_PRINTF("s = %zu\n", g[s].index); + DEBUG_PRINTF("s = %zu\n", g[s].index); if (!puff_cr.isSubsetOf(g[s].char_reach)) { - DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index, - g[head].index); + DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index, + g[head].index); return false; } @@ -266,18 +266,18 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, RoseBuild &rose, ReportManager &rm, flat_set<ReportID> &chain_reports, bool prefilter) { DEBUG_PRINTF("constructing Puff for report %u\n", report); - DEBUG_PRINTF("a = %zu\n", g[a].index); - - const Report &puff_report = rm.getReport(report); - const bool simple_exhaust = isSimpleExhaustible(puff_report); + DEBUG_PRINTF("a = %zu\n", g[a].index); + const Report &puff_report = rm.getReport(report); + const bool simple_exhaust = isSimpleExhaustible(puff_report); + const bool pureAnchored = a == g.start && singleStart(g); if (!pureAnchored) { if (a == g.startDs || a == g.start) { DEBUG_PRINTF("add outfix ar(false)\n"); - raw_puff rp(width, unbounded, report, cr, auto_restart, - simple_exhaust); + raw_puff rp(width, unbounded, report, cr, auto_restart, + simple_exhaust); rose.addOutfix(rp); return; } @@ -291,7 +291,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, u32 squashDistance = allowedSquashDistance(cr, width, g, puffv, prefilter); - Report ir = makeMpvTrigger(event, squashDistance); + Report ir = makeMpvTrigger(event, squashDistance); /* only need to trigger once if floatingUnboundedDot */ bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth; if (floatingUnboundedDot) { @@ -302,7 +302,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, } else { DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart); assert(!auto_restart || unbounded); - raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); + raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); rose.addOutfix(rp); } } @@ -347,7 +347,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, } nodes.push_back(a); - DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index, + DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index, in_degree(a, g)); a = getSoleSourceVertex(g, a); @@ -385,10 +385,10 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, bool auto_restart = false; - DEBUG_PRINTF("a = %zu\n", g[a].index); + DEBUG_PRINTF("a = %zu\n", g[a].index); if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { - DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index); + DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index); if (nodes.size() < MIN_PUFF_LENGTH) { return false; } else { @@ -470,7 +470,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, } NFAVertex puffv = nodes.back(); - assert(puffv != NGHolder::null_vertex()); + assert(puffv != NGHolder::null_vertex()); u32 width = countChain(g, nodes.back()); flat_set<ReportID> chain_reports; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp index 06b9daeeca..9f475b5345 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,7 +78,7 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include <algorithm> @@ -158,7 +158,7 @@ void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { static void inplaceIntersection(vector<NFAVertex> &vset1, const flat_set<NFAVertex> &vset2) { - const NFAVertex GONE = NGHolder::null_vertex(); + const NFAVertex GONE = NGHolder::null_vertex(); vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end(); flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end(); @@ -307,8 +307,8 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, static bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { - NFAEdge e = edge(g.start, v, g); - return e && !g[e].tops.empty(); + NFAEdge e = edge(g.start, v, g); + return e && !g[e].tops.empty(); } /** Transform (1), removal of redundant vertices. */ @@ -342,7 +342,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, } if (info.pred.empty() || info.succ.empty()) { - DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index); + DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index); assert(0); // non-special states should always have succ/pred lists continue; } @@ -441,7 +441,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; if (currReach.isSubsetOf(otherReach)) { - DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", + DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -568,8 +568,8 @@ bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; otherReach |= currReach; // v can be removed - DEBUG_PRINTF("removing redundant vertex %zu and merging " - "reachability with vertex %zu\n", + DEBUG_PRINTF("removing redundant vertex %zu and merging " + "reachability with vertex %zu\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -635,14 +635,14 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = source(e, g); - using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; + using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor; // Walk the graph backwards from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(RevGraph(g), start, + depth_first_visit(RevGraph(g), start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom)); @@ -664,15 +664,15 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = target(e, g); - map<NFAVertex, boost::default_color_type> vertexColor; + map<NFAVertex, boost::default_color_type> vertexColor; // Walk the graph forward from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(g, start, ReachSubsetVisitor(domReach), + depth_first_visit(g, start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), - VertexIs<NGHolder, NFAVertex>(dom)); + VertexIs<NGHolder, NFAVertex>(dom)); } catch(ReachMismatch&) { return false; } @@ -735,9 +735,9 @@ u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { for (auto v : vertices_range(g)) { assert(g[v].index < cyclic.size()); - if (hasSelfLoop(v, g)) { + if (hasSelfLoop(v, g)) { count++; - cyclic[g[v].index] = true; + cyclic[g[v].index] = true; } } @@ -747,7 +747,7 @@ u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { static void findCyclicDom(NGHolder &g, vector<bool> &cyclic, set<NFAEdge> &dead, som_type som) { - auto dominators = findDominators(g); + auto dominators = findDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { @@ -763,8 +763,8 @@ void findCyclicDom(NGHolder &g, vector<bool> &cyclic, continue; } - DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " - "vertex %zu\n", g[v].index, g[dom].index); + DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " + "vertex %zu\n", g[v].index, g[dom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -774,8 +774,8 @@ void findCyclicDom(NGHolder &g, vector<bool> &cyclic, } if (reversePathReachSubset(e, dom, g)) { - DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " - "paths share dom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " + "paths share dom reach\n", g[source(e, g)].index, g[target(e, g)].index); dead.insert(e); if (source(e, g) == v) { @@ -791,7 +791,7 @@ void findCyclicDom(NGHolder &g, vector<bool> &cyclic, static void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, set<NFAEdge> &dead) { - auto postdominators = findPostDominators(g); + auto postdominators = findPostDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { @@ -800,9 +800,9 @@ void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, // Path out through a post-dominator (e.g. a?.+foobar') NFAVertex postdom = postdominators[v]; - if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { - DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " - "cyclic vertex %zu\n", g[v].index, g[postdom].index); + if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { + DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " + "cyclic vertex %zu\n", g[v].index, g[postdom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -812,8 +812,8 @@ void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, } if (forwardPathReachSubset(e, postdom, g)) { - DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " - "paths share postdom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " + "paths share postdom reach\n", g[source(e, g)].index, g[target(e, g)].index); if (target(e, g) == v) { cyclic[g[v].index] = false; @@ -828,7 +828,7 @@ void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, bool removeRedundancy(NGHolder &g, som_type som) { DEBUG_PRINTF("rr som = %d\n", (int)som); - renumber_vertices(g); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp index 2675be643f..e025bccda3 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,9 +56,9 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include <set> #include <utility> @@ -71,61 +71,61 @@ using namespace std; namespace ue2 { -using BackEdgeSet = unordered_set<NFAEdge>; -using AcyclicGraph = - boost::filtered_graph<NGHolder, bad_edge_filter<BackEdgeSet>>; +using BackEdgeSet = unordered_set<NFAEdge>; +using AcyclicGraph = + boost::filtered_graph<NGHolder, bad_edge_filter<BackEdgeSet>>; namespace { struct exit_info { explicit exit_info(NFAVertex v) : exit(v) {} NFAVertex exit; - flat_set<NFAVertex> open; + flat_set<NFAVertex> open; }; } static void checkAndAddExitCandidate(const AcyclicGraph &g, - const unordered_set<NFAVertex> &r, NFAVertex v, - vector<exit_info> &exits) { - exit_info v_exit(v); - auto &open = v_exit.open; + const unordered_set<NFAVertex> &r, NFAVertex v, + vector<exit_info> &exits) { + exit_info v_exit(v); + auto &open = v_exit.open; /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { if (!contains(r, w)) { - open.insert(w); + open.insert(w); } } - if (!open.empty()) { - DEBUG_PRINTF("exit %zu\n", g[v].index); - exits.push_back(move(v_exit)); + if (!open.empty()) { + DEBUG_PRINTF("exit %zu\n", g[v].index); + exits.push_back(move(v_exit)); } } static -void findExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, - vector<exit_info> &exits) { - exits.clear(); +void findExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, + vector<exit_info> &exits) { + exits.clear(); for (auto v : r) { checkAndAddExitCandidate(g, r, v, exits); } } static -void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, - NFAVertex new_v, vector<exit_info> &exits) { - /* new_v is no long an open edge */ - for (auto &exit : exits) { - exit.open.erase(new_v); +void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, + NFAVertex new_v, vector<exit_info> &exits) { + /* new_v is no long an open edge */ + for (auto &exit : exits) { + exit.open.erase(new_v); } - /* no open edges: no longer an exit */ - exits.erase(remove_if(exits.begin(), exits.end(), - [&](const exit_info &exit) { return exit.open.empty(); }), - exits.end()); - + /* no open edges: no longer an exit */ + exits.erase(remove_if(exits.begin(), exits.end(), + [&](const exit_info &exit) { return exit.open.empty(); }), + exits.end()); + checkAndAddExitCandidate(g, r, new_v, exits); } @@ -133,12 +133,12 @@ void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r, */ static bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits, - const flat_set<NFAVertex> &open_jumps) { + const flat_set<NFAVertex> &open_jumps) { if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { return true; } if (exits.size() == 1 && open_jumps.size() == 1) { - DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, + DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, g[exits[0].exit].index); if (*open_jumps.begin() == exits[0].exit) { return true; @@ -162,8 +162,8 @@ bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits, } static -void setRegion(const unordered_set<NFAVertex> &r, u32 rid, - unordered_map<NFAVertex, u32> ®ions) { +void setRegion(const unordered_set<NFAVertex> &r, u32 rid, + unordered_map<NFAVertex, u32> ®ions) { for (auto v : r) { regions[v] = rid; } @@ -173,36 +173,36 @@ static void buildInitialCandidate(const AcyclicGraph &g, vector<NFAVertex>::const_reverse_iterator &it, const vector<NFAVertex>::const_reverse_iterator &ite, - unordered_set<NFAVertex> &candidate, + unordered_set<NFAVertex> &candidate, /* in exits of prev region; * out exits from candidate */ - vector<exit_info> &exits, - flat_set<NFAVertex> &open_jumps) { + vector<exit_info> &exits, + flat_set<NFAVertex> &open_jumps) { if (it == ite) { - candidate.clear(); - exits.clear(); + candidate.clear(); + exits.clear(); return; } - if (exits.empty()) { + if (exits.empty()) { DEBUG_PRINTF("odd\n"); - candidate.clear(); - DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate.insert(*it); - open_jumps.erase(*it); - checkAndAddExitCandidate(g, candidate, *it, exits); + candidate.clear(); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); + candidate.insert(*it); + open_jumps.erase(*it); + checkAndAddExitCandidate(g, candidate, *it, exits); ++it; return; } - // Note: findExits() will clear exits, so it's safe to mutate/move its - // elements here. - auto &enters = exits.front().open; - candidate.clear(); + // Note: findExits() will clear exits, so it's safe to mutate/move its + // elements here. + auto &enters = exits.front().open; + candidate.clear(); for (; it != ite; ++it) { - DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate.insert(*it); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); + candidate.insert(*it); if (contains(enters, *it)) { break; } @@ -210,35 +210,35 @@ void buildInitialCandidate(const AcyclicGraph &g, if (it != ite) { enters.erase(*it); - open_jumps = move(enters); - DEBUG_PRINTF("oj size = %zu\n", open_jumps.size()); + open_jumps = move(enters); + DEBUG_PRINTF("oj size = %zu\n", open_jumps.size()); ++it; } else { - open_jumps.clear(); + open_jumps.clear(); } - findExits(g, candidate, exits); + findExits(g, candidate, exits); } static void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, const vector<NFAVertex> &topo, - unordered_map<NFAVertex, u32> ®ions) { + unordered_map<NFAVertex, u32> ®ions) { assert(!topo.empty()); u32 curr_id = 0; - auto t_it = topo.rbegin(); - unordered_set<NFAVertex> candidate; - flat_set<NFAVertex> open_jumps; - DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); + auto t_it = topo.rbegin(); + unordered_set<NFAVertex> candidate; + flat_set<NFAVertex> open_jumps; + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - vector<exit_info> exits; - findExits(g, candidate, exits); - + vector<exit_info> exits; + findExits(g, candidate, exits); + while (t_it != topo.rend()) { assert(!candidate.empty()); @@ -253,14 +253,14 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("setting region %u\n", curr_id); } setRegion(candidate, curr_id++, regions); - buildInitialCandidate(g, t_it, topo.rend(), candidate, exits, - open_jumps); + buildInitialCandidate(g, t_it, topo.rend(), candidate, exits, + open_jumps); } else { NFAVertex curr = *t_it; - DEBUG_PRINTF("adding %zu to current\n", g[curr].index); + DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); - refineExits(g, candidate, *t_it, exits); + refineExits(g, candidate, *t_it, exits); DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), exits.size()); ++t_it; @@ -273,7 +273,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, static void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo, const BackEdgeSet &backEdges, - unordered_map<NFAVertex, u32> ®ions) { + unordered_map<NFAVertex, u32> ®ions) { for (const auto &e : backEdges) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); @@ -284,7 +284,7 @@ void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo, continue; } - DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, + DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, g[u].index, ru); assert(rv < ru); @@ -343,15 +343,15 @@ void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, static void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { - unordered_set<NFAVertex> sinks; + unordered_set<NFAVertex> sinks; for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g)) { continue; } if (isLeafNode(v, acyclic_g)) { - DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); - sinks.insert(NFAVertex(v)); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); } } @@ -365,18 +365,18 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { DEBUG_PRINTF("look\n"); changed = false; for (auto v : vertices_range(acyclic_g)) { - if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { + if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { continue; } for (auto w : adjacent_vertices_range(v, acyclic_g)) { - if (!contains(sinks, NFAVertex(w))) { + if (!contains(sinks, NFAVertex(w))) { goto next; } } - DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); - sinks.insert(NFAVertex(v)); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); changed = true; next:; } @@ -387,10 +387,10 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { continue; } NFAVertex s = *ri; - DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); - unordered_set<NFAVertex> parents; + DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); + unordered_set<NFAVertex> parents; for (const auto &e : in_edges_range(s, acyclic_g)) { - parents.insert(NFAVertex(source(e, acyclic_g))); + parents.insert(NFAVertex(source(e, acyclic_g))); } /* vertex has no children not reachable on a back edge, bubble the @@ -408,20 +408,20 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) { } } -using ColorMap = decltype(make_small_color_map(NGHolder())); - +using ColorMap = decltype(make_small_color_map(NGHolder())); + /** Build a reverse topo ordering (with only the specials that are in use). We * also want to ensure vertices which only lead to back edges are placed near * their parents. */ static vector<NFAVertex> buildTopoOrder(const NGHolder &w, const AcyclicGraph &acyclic_g, - ColorMap &colours) { + ColorMap &colours) { vector<NFAVertex> topoOrder; - topoOrder.reserve(num_vertices(w)); + topoOrder.reserve(num_vertices(w)); - topological_sort(acyclic_g, back_inserter(topoOrder), - color_map(colours)); + topological_sort(acyclic_g, back_inserter(topoOrder), + color_map(colours)); reorderSpecials(w, acyclic_g, topoOrder); @@ -433,35 +433,35 @@ vector<NFAVertex> buildTopoOrder(const NGHolder &w, DEBUG_PRINTF("TOPO ORDER\n"); for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { - DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); + DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); } DEBUG_PRINTF("----------\n"); return topoOrder; } -unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) { +unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const u32 numVertices = num_vertices(g); DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); - auto colours = make_small_color_map(g); + auto colours = make_small_color_map(g); // Build an acyclic graph for this NGHolder. BackEdgeSet deadEdges; - depth_first_search(g, - visitor(BackEdges<BackEdgeSet>(deadEdges)) - .root_vertex(g.start) - .color_map(colours)); + depth_first_search(g, + visitor(BackEdges<BackEdgeSet>(deadEdges)) + .root_vertex(g.start) + .color_map(colours)); - auto af = make_bad_edge_filter(&deadEdges); - AcyclicGraph acyclic_g(g, af); + auto af = make_bad_edge_filter(&deadEdges); + AcyclicGraph acyclic_g(g, af); // Build a (reverse) topological ordering. vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours); // Everybody starts in region 0. - unordered_map<NFAVertex, u32> regions; + unordered_map<NFAVertex, u32> regions; regions.reserve(numVertices); for (auto v : vertices_range(g)) { regions.emplace(v, 0); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.h b/contrib/libs/hyperscan/src/nfagraph/ng_region.h index a4708a582e..27572492e1 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,18 +37,18 @@ #include "util/container.h" #include "util/graph_range.h" -#include <unordered_map> +#include <unordered_map> #include <vector> namespace ue2 { /** \brief Assign a region ID to every vertex in the graph. */ -std::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g); +std::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g); /** \brief True if vertices \p a and \p b are in the same region. */ template <class Graph> bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); return region_map.at(a) == region_map.at(b) && @@ -58,7 +58,7 @@ bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p b is in a later region than vertex \p a. */ template <class Graph> bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); u32 aa = g[a].index; @@ -85,7 +85,7 @@ bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p b is in an earlier region than vertex \p a. */ template <class Graph> bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); u32 aa = g[a].index; @@ -112,7 +112,7 @@ bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p v is an entry vertex for its region. */ template <class Graph> bool isRegionEntry(const Graph &g, NFAVertex v, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { // Note that some graph types do not have inv_adjacent_vertices, so we must // use in_edges here. for (const auto &e : in_edges_range(v, g)) { @@ -127,7 +127,7 @@ bool isRegionEntry(const Graph &g, NFAVertex v, /** \brief True if vertex \p v is an exit vertex for its region. */ template <class Graph> bool isRegionExit(const Graph &g, NFAVertex v, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { for (auto w : adjacent_vertices_range(v, g)) { if (!inSameRegion(g, v, w, region_map)) { return true; @@ -140,7 +140,7 @@ bool isRegionExit(const Graph &g, NFAVertex v, /** \brief True if vertex \p v is in a region all on its own. */ template <class Graph> bool isSingletonRegion(const Graph &g, NFAVertex v, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { for (const auto &e : in_edges_range(v, g)) { auto u = source(e, g); if (u != v && inSameRegion(g, v, u, region_map)) { @@ -178,10 +178,10 @@ bool isSingletonRegion(const Graph &g, NFAVertex v, */ template <class Graph> bool isOptionalRegion(const Graph &g, NFAVertex v, - const std::unordered_map<NFAVertex, u32> ®ion_map) { + const std::unordered_map<NFAVertex, u32> ®ion_map) { assert(isRegionEntry(g, v, region_map)); - DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", + DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", region_map.at(v), g[v].index); // Region zero is never optional. @@ -198,12 +198,12 @@ bool isOptionalRegion(const Graph &g, NFAVertex v, if (inSameRegion(g, v, u, region_map)) { continue; } - DEBUG_PRINTF(" searching from u=%zu\n", g[u].index); + DEBUG_PRINTF(" searching from u=%zu\n", g[u].index); assert(inEarlierRegion(g, v, u, region_map)); for (auto w : adjacent_vertices_range(u, g)) { - DEBUG_PRINTF(" searching to w=%zu\n", g[w].index); + DEBUG_PRINTF(" searching to w=%zu\n", g[w].index); if (inLaterRegion(g, v, w, region_map)) { return true; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp index 1126d4d6c9..3ea73e78ab 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,7 +60,7 @@ struct RegionInfo { static bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, const flat_set<ReportID> &expected_reports, - const unordered_map<NFAVertex, u32> ®ion_map) { + const unordered_map<NFAVertex, u32> ®ion_map) { /* TODO: only check vertices connected to accept/acceptEOD */ for (auto v : vertices_range(g)) { if (region != region_map.at(v)) { @@ -84,13 +84,13 @@ bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, static void processCyclicStateForward(NGHolder &h, NFAVertex cyc, const map<u32, RegionInfo> &info, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, set<u32> &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index, + DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index, region); map<u32, RegionInfo>::const_iterator it; @@ -98,7 +98,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %zu\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -107,8 +107,8 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %zu leads to optional region leader" - " %zu\n", h[cyc].index, h[v].index); + DEBUG_PRINTF("cyclic state %zu leads to optional region leader" + " %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { /* we can use this region as straw and suck in optional regions on @@ -130,20 +130,20 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, static void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, const map<u32, RegionInfo> &info, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, set<u32> &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region); + DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region); map<u32, RegionInfo>::const_iterator it; while ((it = info.find(--region)) != info.end()) { NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %zu\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -152,7 +152,7 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n", + DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { @@ -179,7 +179,7 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, static map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ion_map) { + const unordered_map<NFAVertex, u32> ®ion_map) { map<u32, RegionInfo> info; for (auto v : vertices_range(g)) { diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp index 1f63ad3c6f..95d52e855b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp @@ -46,16 +46,16 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include "util/graph_undirected.h" #include "util/report_manager.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <map> #include <queue> -#include <unordered_map> -#include <unordered_set> +#include <unordered_map> +#include <unordered_set> #include <boost/graph/connected_components.hpp> #include <boost/graph/depth_first_search.hpp> @@ -65,9 +65,9 @@ #include <boost/icl/interval_set.hpp> using namespace std; -using boost::depth_first_search; -using boost::depth_first_visit; -using boost::make_assoc_property_map; +using boost::depth_first_search; +using boost::depth_first_visit; +using boost::make_assoc_property_map; namespace ue2 { @@ -111,8 +111,8 @@ using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>, struct ReachSubgraph { vector<NFAVertex> vertices; - depth repeatMin{0}; - depth repeatMax{0}; + depth repeatMin{0}; + depth repeatMax{0}; u32 minPeriod = 1; bool is_reset = false; enum RepeatType historyType = REPEAT_RING; @@ -123,59 +123,59 @@ struct ReachSubgraph { static void findInitDepths(const NGHolder &g, - unordered_map<NFAVertex, NFAVertexDepth> &depths) { - auto d = calcDepths(g); + unordered_map<NFAVertex, NFAVertexDepth> &depths) { + auto d = calcDepths(g); for (auto v : vertices_range(g)) { - size_t idx = g[v].index; + size_t idx = g[v].index; assert(idx < d.size()); - depths.emplace(v, d[idx]); + depths.emplace(v, d[idx]); } } static -vector<NFAVertex> buildTopoOrder(const RepeatGraph &g) { - /* Note: RepeatGraph is a filtered version of NGHolder and still has - * NFAVertex as its vertex descriptor */ - - typedef unordered_set<NFAEdge> EdgeSet; +vector<NFAVertex> buildTopoOrder(const RepeatGraph &g) { + /* Note: RepeatGraph is a filtered version of NGHolder and still has + * NFAVertex as its vertex descriptor */ + + typedef unordered_set<NFAEdge> EdgeSet; EdgeSet deadEdges; // We don't have indices spanning [0,N] on our filtered graph, so we // provide a colour map. - unordered_map<NFAVertex, boost::default_color_type> colours; + unordered_map<NFAVertex, boost::default_color_type> colours; depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)). color_map(make_assoc_property_map(colours))); - auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); - vector<NFAVertex> topoOrder; + vector<NFAVertex> topoOrder; topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_assoc_property_map(colours))); reverse(topoOrder.begin(), topoOrder.end()); - - return topoOrder; + + return topoOrder; } static void proper_pred(const NGHolder &g, NFAVertex v, - unordered_set<NFAVertex> &p) { + unordered_set<NFAVertex> &p) { pred(g, v, &p); p.erase(v); // self-loops } static void proper_succ(const NGHolder &g, NFAVertex v, - unordered_set<NFAVertex> &s) { + unordered_set<NFAVertex> &s) { succ(g, v, &s); s.erase(v); // self-loops } static bool roguePredecessor(const NGHolder &g, NFAVertex v, - const unordered_set<NFAVertex> &involved, - const unordered_set<NFAVertex> &pred) { + const unordered_set<NFAVertex> &involved, + const unordered_set<NFAVertex> &pred) { u32 seen = 0; for (auto u : inv_adjacent_vertices_range(v, g)) { @@ -183,7 +183,7 @@ bool roguePredecessor(const NGHolder &g, NFAVertex v, continue; } if (!contains(pred, u)) { - DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index); + DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index); return true; } @@ -200,8 +200,8 @@ bool roguePredecessor(const NGHolder &g, NFAVertex v, static bool rogueSuccessor(const NGHolder &g, NFAVertex v, - const unordered_set<NFAVertex> &involved, - const unordered_set<NFAVertex> &succ) { + const unordered_set<NFAVertex> &involved, + const unordered_set<NFAVertex> &succ) { u32 seen = 0; for (auto w : adjacent_vertices_range(v, g)) { if (contains(involved, w)) { @@ -209,7 +209,7 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, } if (!contains(succ, w)) { - DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index); + DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index); return true; } @@ -226,8 +226,8 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, static bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) { - /* TODO: check that we need this now that we allow multiple tops */ - const flat_set<u32> *tops = nullptr; + /* TODO: check that we need this now that we allow multiple tops */ + const flat_set<u32> *tops = nullptr; for (auto v : verts) { for (const auto &e : in_edges_range(v, g)) { @@ -235,12 +235,12 @@ bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) { if (u != g.start && u != g.startDs) { continue; // Only edges from starts have valid top properties. } - DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index, - g[v].index, g[e].tops.size()); - if (!tops) { - tops = &g[e].tops; - } else if (g[e].tops != *tops) { - return true; // More than one set of tops. + DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index, + g[v].index, g[e].tops.size()); + if (!tops) { + tops = &g[e].tops; + } else if (g[e].tops != *tops) { + return true; // More than one set of tops. } } } @@ -250,19 +250,19 @@ bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) { static bool vertexIsBad(const NGHolder &g, NFAVertex v, - const unordered_set<NFAVertex> &involved, - const unordered_set<NFAVertex> &tail, - const unordered_set<NFAVertex> &pred, - const unordered_set<NFAVertex> &succ, + const unordered_set<NFAVertex> &involved, + const unordered_set<NFAVertex> &tail, + const unordered_set<NFAVertex> &pred, + const unordered_set<NFAVertex> &succ, const flat_set<ReportID> &reports) { - DEBUG_PRINTF("check vertex %zu\n", g[v].index); + DEBUG_PRINTF("check vertex %zu\n", g[v].index); // We must drop any vertex that is the target of a back-edge within // our subgraph. The tail set contains all vertices that are after v in a // topo ordering. for (auto u : inv_adjacent_vertices_range(v, g)) { if (contains(tail, u)) { - DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n", + DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n", g[u].index, g[v].index); return true; } @@ -272,18 +272,18 @@ bool vertexIsBad(const NGHolder &g, NFAVertex v, // edges from *all* the vertices in pred and no other external entries. // Similarly for exits. if (roguePredecessor(g, v, involved, pred)) { - DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index); + DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index); return true; } if (rogueSuccessor(g, v, involved, succ)) { - DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index); + DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index); return true; } // All reporting vertices should have the same reports. if (is_match_vertex(v, g) && reports != g[v].reports) { - DEBUG_PRINTF("report mismatch to %zu\n", g[v].index); + DEBUG_PRINTF("report mismatch to %zu\n", g[v].index); return true; } @@ -298,7 +298,7 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts, // We construct a copy of the graph using just the vertices we want, rather // than using a filtered_graph -- this way is faster. NGHolder verts_g; - unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g + unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g fillHolder(&verts_g, g, verts, &verts_map); const auto ug = make_undirected_graph(verts_g); @@ -388,10 +388,10 @@ void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, continue; } - unordered_set<NFAVertex> involved(rsi.vertices.begin(), - rsi.vertices.end()); - unordered_set<NFAVertex> tail(involved); // to look for back-edges. - unordered_set<NFAVertex> pred, succ; + unordered_set<NFAVertex> involved(rsi.vertices.begin(), + rsi.vertices.end()); + unordered_set<NFAVertex> tail(involved); // to look for back-edges. + unordered_set<NFAVertex> pred, succ; proper_pred(g, rsi.vertices.front(), pred); proper_succ(g, rsi.vertices.back(), succ); @@ -525,7 +525,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, NFAVertex first = rsi.vertices.front(); NFAVertex last = rsi.vertices.back(); - typedef unordered_map<NFAVertex, DistanceSet> DistanceMap; + typedef unordered_map<NFAVertex, DistanceSet> DistanceMap; DistanceMap dist; // Initial distance sets. @@ -533,7 +533,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, if (u == first) { continue; // no self-loops } - DEBUG_PRINTF("pred vertex %zu\n", g[u].index); + DEBUG_PRINTF("pred vertex %zu\n", g[u].index); dist[u].insert(0); } @@ -597,8 +597,8 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, range.first, range.second); return false; } - rsi.repeatMin = depth(range.first); - rsi.repeatMax = depth(range.second); + rsi.repeatMin = depth(range.first); + rsi.repeatMax = depth(range.second); // If we've got a self-loop anywhere, we've got inf max. if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) { @@ -619,7 +619,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, static bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, - const unordered_set<NFAVertex> &involved) { + const unordered_set<NFAVertex> &involved) { for (auto u : inv_adjacent_vertices_range(v, g)) { if (!contains(involved, u)) { return false; @@ -630,12 +630,12 @@ bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, static void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, - const unordered_set<NFAVertex> &involved, - unordered_map<NFAVertex, NFAVertexDepth> &depths, + const unordered_set<NFAVertex> &involved, + unordered_map<NFAVertex, NFAVertexDepth> &depths, vector<NFAVertex> &tugs) { if (allPredsInSubgraph(v, g, involved)) { // We can transform this vertex into a tug trigger in-place. - DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n", + DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n", g[v].index); add_edge(cyclic, v, g); tugs.push_back(v); @@ -647,7 +647,7 @@ void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, NFAVertex t = clone_vertex(g, v); depths[t] = depths[v]; - DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n", + DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n", g[t].index, g[v].index); tugs.push_back(t); @@ -664,7 +664,7 @@ NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) { NFAVertex cyclic = clone_vertex(g, last); add_edge(cyclic, cyclic, g); - DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index); + DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index); return cyclic; } @@ -675,7 +675,7 @@ NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) { g[pos].char_reach = g[first].char_reach; - DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index); + DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index); return pos; } @@ -710,7 +710,7 @@ u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) { static void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, - unordered_map<NFAVertex, NFAVertexDepth> &depths, + unordered_map<NFAVertex, NFAVertexDepth> &depths, vector<NFAVertex> *succs) { u32 unpeel = unpeelAmount(g, rsi); DEBUG_PRINTF("unpeeling %u vertices\n", unpeel); @@ -721,7 +721,7 @@ void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, NFAVertex d = clone_vertex(g, last); depths[d] = depths[last]; - DEBUG_PRINTF("created vertex %zu\n", g[d].index); + DEBUG_PRINTF("created vertex %zu\n", g[d].index); for (auto v : *succs) { add_edge(d, v, g); @@ -769,24 +769,24 @@ void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi, * NFA graph and replace it with a cyclic state. */ static void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, - vector<BoundedRepeatData> *repeats, - unordered_map<NFAVertex, NFAVertexDepth> &depths, - unordered_set<NFAVertex> &created) { + vector<BoundedRepeatData> *repeats, + unordered_map<NFAVertex, NFAVertexDepth> &depths, + unordered_set<NFAVertex> &created) { assert(!rsi.bad); - /* As we may need to unpeel 2 vertices, we need the width to be more than 2. - * This should only happen if the graph did not have redundancy pass - * performed on as vertex count checks would be prevent us reaching here. - */ - if (rsi.repeatMax <= depth(2)) { - return; - } + /* As we may need to unpeel 2 vertices, we need the width to be more than 2. + * This should only happen if the graph did not have redundancy pass + * performed on as vertex count checks would be prevent us reaching here. + */ + if (rsi.repeatMax <= depth(2)) { + return; + } assert(rsi.repeatMin > depth(0)); assert(rsi.repeatMax >= rsi.repeatMin); - assert(rsi.repeatMax > depth(2)); + assert(rsi.repeatMax > depth(2)); DEBUG_PRINTF("entry\n"); - const unordered_set<NFAVertex> involved(rsi.vertices.begin(), + const unordered_set<NFAVertex> involved(rsi.vertices.begin(), rsi.vertices.end()); vector<NFAVertex> succs; getSuccessors(g, rsi, &succs); @@ -847,16 +847,16 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, static void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, vector<BoundedRepeatData> *repeats, - unordered_map<NFAVertex, NFAVertexDepth> &depths, - unordered_set<NFAVertex> &created) { + unordered_map<NFAVertex, NFAVertexDepth> &depths, + unordered_set<NFAVertex> &created) { assert(!rsi.bad); assert(rsi.repeatMin); assert(rsi.repeatMax >= rsi.repeatMin); DEBUG_PRINTF("entry\n"); - const unordered_set<NFAVertex> involved(rsi.vertices.begin(), - rsi.vertices.end()); + const unordered_set<NFAVertex> involved(rsi.vertices.begin(), + rsi.vertices.end()); vector<NFAVertex> succs; getSuccessors(g, rsi, &succs); @@ -950,7 +950,7 @@ void reprocessSubgraph(const NGHolder &h, const Grey &grey, * involved in other repeats as a result of earlier repeat transformations. */ static bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, - const unordered_set<NFAVertex> &created) { + const unordered_set<NFAVertex> &created) { assert(!rsi.bad); if (created.empty()) { @@ -969,7 +969,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it; break; } else { - DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from front\n", @@ -986,7 +986,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it.base(); // Note: erases everything after it. break; } else { - DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from back\n", @@ -997,7 +997,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, // no-no. for (auto v : rsi.vertices) { if (contains(created, v)) { - DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index); + DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index); return false; } } @@ -1012,15 +1012,15 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, * idea to extend to cyclic states, too. */ static void peelStartDotStar(const NGHolder &g, - const unordered_map<NFAVertex, NFAVertexDepth> &depths, - const Grey &grey, ReachSubgraph &rsi) { + const unordered_map<NFAVertex, NFAVertexDepth> &depths, + const Grey &grey, ReachSubgraph &rsi) { if (rsi.vertices.size() < 1) { return; } NFAVertex first = rsi.vertices.front(); if (depths.at(first).fromStartDotStar.min == depth(1)) { - DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index); + DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index); rsi.vertices.erase(rsi.vertices.begin()); reprocessSubgraph(g, grey, rsi); } @@ -1029,7 +1029,7 @@ void peelStartDotStar(const NGHolder &g, static void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, const u32 minNumVertices) { - const ReachFilter<NGHolder> fil(&g); + const ReachFilter<NGHolder> fil(&g); const RepeatGraph rg(g, fil, fil); if (!isCompBigEnough(rg, minNumVertices)) { @@ -1046,7 +1046,7 @@ void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, DEBUG_PRINTF("found %u connected repeat components\n", num); // Now, we build a set of topo-ordered ReachSubgraphs. - vector<NFAVertex> topoOrder = buildTopoOrder(rg); + vector<NFAVertex> topoOrder = buildTopoOrder(rg); rs.resize(num); @@ -1089,14 +1089,14 @@ bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) { /* depth info is valid as calculated at entry */ static bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, - const unordered_map<NFAVertex, NFAVertexDepth> &depths, - const unordered_set<NFAVertex> &reached_by_fixed_tops) { + const unordered_map<NFAVertex, NFAVertexDepth> &depths, + const unordered_set<NFAVertex> &reached_by_fixed_tops) { DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n", reached_by_fixed_tops.size()); if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { /* can't do this for infix/suffixes unless we know trigger literals * can only occur at one offset */ - DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index); + DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index); return false; } @@ -1116,8 +1116,8 @@ bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, for (auto u : inv_adjacent_vertices_range(v, g)) { const depth &u_max_depth = depths.at(u).fromStart.max; - DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index, - u_max_depth.str().c_str()); + DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index, + u_max_depth.str().c_str()); if (u_max_depth != first - depth(1)) { return false; } @@ -1135,12 +1135,12 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger, g[v].char_reach = cr; add_edge(u, v, g); if (u == g.start) { - g[edge(u, v, g)].tops.insert(top); + g[edge(u, v, g)].tops.insert(top); } u = v; } - DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index); + DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index); return u; } @@ -1165,21 +1165,21 @@ void addTriggers(NGHolder &g, continue; } - const auto &tops = g[e].tops; + const auto &tops = g[e].tops; // The caller may not have given us complete trigger information. If we // don't have any triggers for a particular top, we should just leave // it alone. - for (u32 top : tops) { - if (!contains(triggers, top)) { - DEBUG_PRINTF("no triggers for top %u\n", top); - goto next_edge; - } - - starts_by_top[top].push_back(v); + for (u32 top : tops) { + if (!contains(triggers, top)) { + DEBUG_PRINTF("no triggers for top %u\n", top); + goto next_edge; + } + + starts_by_top[top].push_back(v); } dead.push_back(e); - next_edge:; + next_edge:; } remove_edges(dead, g); @@ -1216,12 +1216,12 @@ CharReach predReach(const NGHolder &g, NFAVertex v) { */ static void filterMap(const NGHolder &subg, - unordered_map<NFAVertex, NFAVertex> &vmap) { - NGHolder::vertex_iterator vi, ve; + unordered_map<NFAVertex, NFAVertex> &vmap) { + NGHolder::vertex_iterator vi, ve; tie(vi, ve) = vertices(subg); - const unordered_set<NFAVertex> remaining_verts(vi, ve); + const unordered_set<NFAVertex> remaining_verts(vi, ve); - unordered_map<NFAVertex, NFAVertex> fmap; // filtered map + unordered_map<NFAVertex, NFAVertex> fmap; // filtered map for (const auto &m : vmap) { if (contains(remaining_verts, m.second)) { @@ -1236,7 +1236,7 @@ void filterMap(const NGHolder &subg, * the bounded repeat. */ static void buildRepeatGraph(NGHolder &rg, - unordered_map<NFAVertex, NFAVertex> &rg_map, + unordered_map<NFAVertex, NFAVertex> &rg_map, const NGHolder &g, const ReachSubgraph &rsi, const map<u32, vector<vector<CharReach>>> &triggers) { cloneHolder(rg, g, &rg_map); @@ -1247,7 +1247,7 @@ void buildRepeatGraph(NGHolder &rg, add_edge(rg.accept, rg.acceptEod, rg); // Find the set of vertices in rg involved in the repeat. - unordered_set<NFAVertex> rg_involved; + unordered_set<NFAVertex> rg_involved; for (const auto &v : rsi.vertices) { assert(contains(rg_map, v)); rg_involved.insert(rg_map.at(v)); @@ -1270,7 +1270,7 @@ void buildRepeatGraph(NGHolder &rg, if (is_triggered(rg)) { // Add vertices for all our triggers addTriggers(rg, triggers); - renumber_vertices(rg); + renumber_vertices(rg); // We don't know anything about how often this graph is triggered, so we // make the start vertex cyclic for the purposes of this analysis ONLY. @@ -1289,29 +1289,29 @@ void buildRepeatGraph(NGHolder &rg, */ static void buildInputGraph(NGHolder &lhs, - unordered_map<NFAVertex, NFAVertex> &lhs_map, + unordered_map<NFAVertex, NFAVertex> &lhs_map, const NGHolder &g, const NFAVertex first, const map<u32, vector<vector<CharReach>>> &triggers) { - DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); + DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); cloneHolder(lhs, g, &lhs_map); assert(g.kind == lhs.kind); addTriggers(lhs, triggers); - renumber_vertices(lhs); + renumber_vertices(lhs); // Replace each back-edge (u,v) with an edge (startDs,v), which will // generate entries at at least the rate of the loop created by that // back-edge. set<NFAEdge> dead; BackEdges<set<NFAEdge> > backEdgeVisitor(dead); - depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start)); + depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start)); for (const auto &e : dead) { const NFAVertex u = source(e, lhs), v = target(e, lhs); if (u == v) { continue; // Self-loops are OK. } - DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n", - lhs[u].index, lhs[v].index, lhs[v].index); + DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n", + lhs[u].index, lhs[v].index, lhs[v].index); add_edge_if_not_present(lhs.startDs, v, lhs); remove_edge(e, lhs); @@ -1343,8 +1343,8 @@ static const size_t MAX_SOLE_ENTRY_VERTICES = 10000; * single offset at runtime. See UE-1361. */ static bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, - const unordered_map<NFAVertex, NFAVertexDepth> &depths, - const unordered_set<NFAVertex> &reached_by_fixed_tops, + const unordered_map<NFAVertex, NFAVertexDepth> &depths, + const unordered_set<NFAVertex> &reached_by_fixed_tops, const map<u32, vector<vector<CharReach>>> &triggers) { DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); @@ -1374,12 +1374,12 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, } NGHolder rg; - unordered_map<NFAVertex, NFAVertex> rg_map; + unordered_map<NFAVertex, NFAVertex> rg_map; buildRepeatGraph(rg, rg_map, g, rsi, triggers); assert(rg.kind == g.kind); NGHolder lhs; - unordered_map<NFAVertex, NFAVertex> lhs_map; + unordered_map<NFAVertex, NFAVertex> lhs_map; buildInputGraph(lhs, lhs_map, g, first, triggers); assert(lhs.kind == g.kind); @@ -1393,18 +1393,18 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, // are in one region, vertices in the bounded repeat are in another. const u32 lhs_region = 1; const u32 repeat_region = 2; - unordered_map<NFAVertex, u32> region_map; + unordered_map<NFAVertex, u32> region_map; for (const auto &v : rsi.vertices) { assert(!is_special(v, g)); // no specials in repeats assert(contains(rg_map, v)); - DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index); + DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index); region_map.emplace(rg_map.at(v), repeat_region); } for (const auto &v : vertices_range(rg)) { if (!contains(region_map, v)) { - DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index); + DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index); region_map.emplace(v, lhs_region); } } @@ -1446,7 +1446,7 @@ struct StrawWalker { if (next == v) { // Ignore self loop. ++ai; if (ai == ae) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } next = *ai; } @@ -1461,7 +1461,7 @@ struct StrawWalker { succs.erase(v); for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { next = *ai; - DEBUG_PRINTF("checking %zu\n", g[next].index); + DEBUG_PRINTF("checking %zu\n", g[next].index); if (next == v) { continue; } @@ -1482,31 +1482,31 @@ struct StrawWalker { return next; } DEBUG_PRINTF("bailing\n"); - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } return next; } NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const { - DEBUG_PRINTF("walk from %zu\n", g[v].index); - unordered_set<NFAVertex> visited; + DEBUG_PRINTF("walk from %zu\n", g[v].index); + unordered_set<NFAVertex> visited; straw.clear(); while (!is_special(v, g)) { - DEBUG_PRINTF("checking %zu\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); NFAVertex next = step(v); - if (next == NGHolder::null_vertex()) { + if (next == NGHolder::null_vertex()) { break; } if (!visited.insert(next).second) { - DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index); + DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index); break; /* don't want to get stuck in any complicated loops */ } const CharReach &reach_v = g[v].char_reach; const CharReach &reach_next = g[next].char_reach; if (!reach_v.isSubsetOf(reach_next)) { - DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n", + DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n", g[next].index, g[v].index); break; } @@ -1514,7 +1514,7 @@ struct StrawWalker { // If this is cyclic with the right reach, we're done. Note that // startDs fulfils this requirement. if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) { - DEBUG_PRINTF("found cyclic %zu\n", g[next].index); + DEBUG_PRINTF("found cyclic %zu\n", g[next].index); return next; } @@ -1523,7 +1523,7 @@ struct StrawWalker { } straw.clear(); - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } private: @@ -1538,8 +1538,8 @@ static NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v, const vector<BoundedRepeatData> &all_repeats, vector<NFAVertex> &straw) { - typedef boost::reverse_graph<NGHolder, const NGHolder &> RevGraph; - const RevGraph revg(g); + typedef boost::reverse_graph<NGHolder, const NGHolder &> RevGraph; + const RevGraph revg(g); auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw); reverse(begin(straw), end(straw)); // path comes from cyclic @@ -1550,7 +1550,7 @@ static NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v, const vector<BoundedRepeatData> &all_repeats, vector<NFAVertex> &straw) { - return StrawWalker<NGHolder>(g, g, all_repeats).walk(v, straw); + return StrawWalker<NGHolder>(g, g, all_repeats).walk(v, straw); } /** True if entries to this subgraph must pass through a cyclic state with @@ -1566,7 +1566,7 @@ bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi, // until we encounter our cyclic, all of which must have superset reach. vector<NFAVertex> straw; return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) != - NGHolder::null_vertex(); + NGHolder::null_vertex(); } static @@ -1574,7 +1574,7 @@ bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi, const vector<BoundedRepeatData> &all_repeats) { vector<NFAVertex> straw; return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) != - NGHolder::null_vertex(); + NGHolder::null_vertex(); } static @@ -1610,7 +1610,7 @@ vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) { vector<CharReach> trigger; - flat_set<NFAVertex> curr, next; + flat_set<NFAVertex> curr, next; insert(&curr, inv_adjacent_vertices(v, g)); if (contains(curr, g.start)) { @@ -1711,7 +1711,7 @@ vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g, assert(!done.empty()); // Convert our path list into a set of unique triggers. - ue2_unordered_set<vector<CharReach>> unique_triggers; + ue2_unordered_set<vector<CharReach>> unique_triggers; for (const auto &path : done) { vector<CharReach> reach_path; for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) { @@ -1759,8 +1759,8 @@ static void selectHistoryScheme(const NGHolder &g, const ReportManager *rm, ReachSubgraph &rsi, - const unordered_map<NFAVertex, NFAVertexDepth> &depths, - const unordered_set<NFAVertex> &reached_by_fixed_tops, + const unordered_map<NFAVertex, NFAVertexDepth> &depths, + const unordered_set<NFAVertex> &reached_by_fixed_tops, const map<u32, vector<vector<CharReach>>> &triggers, const vector<BoundedRepeatData> &all_repeats, const bool simple_model_selection) { @@ -1828,7 +1828,7 @@ selectHistoryScheme(const NGHolder &g, const ReportManager *rm, static void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, - unordered_set<NFAVertex> &created, + unordered_set<NFAVertex> &created, const vector<NFAVertex> &straw) { if (!g[rd.cyclic].char_reach.all()) { // Create another cyclic feeder state with flipped reach. It has an @@ -1857,7 +1857,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, add_edge(u, feeder, g); } - DEBUG_PRINTF("added feeder %zu\n", g[feeder].index); + DEBUG_PRINTF("added feeder %zu\n", g[feeder].index); } else { // No neg trigger means feeder is empty, and unnecessary. assert(g[rd.pos_trigger].char_reach.all()); @@ -1875,7 +1875,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, */ static bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, - unordered_set<NFAVertex> &created, + unordered_set<NFAVertex> &created, const vector<BoundedRepeatData> &all_repeats) { assert(edge(g.startDs, g.startDs, g).second); @@ -1905,13 +1905,13 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, // This transformation is only safe if the straw path from startDs that // we've discovered can *only* lead to this repeat, since we're going to // remove the self-loop on startDs. - if (proper_out_degree(g.startDs, g) > 1) { + if (proper_out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("startDs has other successors\n"); return false; } for (const auto &v : straw) { if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n", + DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n", g[v].index); return false; } @@ -1979,7 +1979,7 @@ vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, */ static bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, - unordered_set<NFAVertex> &created, + unordered_set<NFAVertex> &created, const vector<BoundedRepeatData> &all_repeats) { assert(g.kind == NFA_OUTFIX); @@ -2077,12 +2077,12 @@ bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) { namespace { class pfti_visitor : public boost::default_dfs_visitor { public: - pfti_visitor(unordered_map<NFAVertex, depth> &top_depths_in, + pfti_visitor(unordered_map<NFAVertex, depth> &top_depths_in, const depth &our_depth_in) : top_depths(top_depths_in), our_depth(our_depth_in) {} - void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) { - DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index, + void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) { + DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index, our_depth.str().c_str()); auto it = top_depths.find(v); @@ -2093,7 +2093,7 @@ public: top_depths[v] = our_depth; } } - unordered_map<NFAVertex, depth> &top_depths; + unordered_map<NFAVertex, depth> &top_depths; const depth &our_depth; }; } // namespace @@ -2101,51 +2101,51 @@ public: static void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops, const NGHolder &g, - unordered_set<NFAVertex> *reached_by_fixed_tops) { + unordered_set<NFAVertex> *reached_by_fixed_tops) { if (fixed_depth_tops.empty()) { return; /* we will never find anything */ } assert(!proper_out_degree(g.startDs, g)); - unordered_map<NFAVertex, depth> top_depths; - auto colours = make_small_color_map(g); + unordered_map<NFAVertex, depth> top_depths; + auto colours = make_small_color_map(g); for (const auto &e : out_edges_range(g.start, g)) { NFAVertex v = target(e, g); if (v == g.startDs) { continue; } - + depth td = depth::infinity(); - for (u32 top : g[e].tops) { - if (!contains(fixed_depth_tops, top)) { - td = depth::infinity(); - break; - } - depth td_t(fixed_depth_tops.at(top)); - if (td == td_t) { - continue; - } else if (td == depth::infinity()) { - td = td_t; - } else { - td = depth::infinity(); - break; - } - } - - DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index, - td.str().c_str()); + for (u32 top : g[e].tops) { + if (!contains(fixed_depth_tops, top)) { + td = depth::infinity(); + break; + } + depth td_t(fixed_depth_tops.at(top)); + if (td == td_t) { + continue; + } else if (td == depth::infinity()) { + td = td_t; + } else { + td = depth::infinity(); + break; + } + } + + DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index, + td.str().c_str()); /* for each vertex reachable from v update its map to reflect that it is * reachable from a top of depth td. */ - depth_first_visit(g, v, pfti_visitor(top_depths, td), colours); + depth_first_visit(g, v, pfti_visitor(top_depths, td), colours); } for (const auto &v_depth : top_depths) { const NFAVertex v = v_depth.first; const depth &d = v_depth.second; if (d.is_finite()) { - DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n", + DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n", g[v].index, d.str().c_str()); reached_by_fixed_tops->insert(v); } @@ -2158,20 +2158,20 @@ void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops, static bool hasOverlappingRepeats(UNUSED const NGHolder &g, const vector<BoundedRepeatData> &repeats) { - unordered_set<NFAVertex> involved; + unordered_set<NFAVertex> involved; for (const auto &br : repeats) { if (contains(involved, br.cyclic)) { - DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index); + DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index); return true; } if (contains(involved, br.pos_trigger)) { - DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index); + DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index); return true; } for (auto v : br.tug_triggers) { if (contains(involved, v)) { - DEBUG_PRINTF("already seen tug %zu\n", g[v].index); + DEBUG_PRINTF("already seen tug %zu\n", g[v].index); return true; } } @@ -2193,7 +2193,7 @@ bool hasOverlappingRepeats(UNUSED const NGHolder &g, */ static bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi, - const unordered_map<NFAVertex, NFAVertexDepth> &depths) { + const unordered_map<NFAVertex, NFAVertexDepth> &depths) { if (num_vertices(g) > NFA_MAX_STATES) { // We may have no choice but to implement this repeat to get the graph // down to a tractable number of vertices. @@ -2246,13 +2246,13 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, #ifndef NDEBUG // So we can assert that the number of tops hasn't changed at the end of // this analysis. - const flat_set<u32> allTops = getTops(g); + const flat_set<u32> allTops = getTops(g); #endif // Later on, we're (a little bit) dependent on depth information for // unpeeling and so forth. Note that these depths MUST be maintained when // new vertices are added. - unordered_map<NFAVertex, NFAVertexDepth> depths; + unordered_map<NFAVertex, NFAVertexDepth> depths; findInitDepths(g, depths); // Construct our list of subgraphs with the same reach using BGL magic. @@ -2309,15 +2309,15 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // could make this unnecessary? const unique_ptr<const NGHolder> orig_g(cloneHolder(g)); - unordered_set<NFAVertex> reached_by_fixed_tops; + unordered_set<NFAVertex> reached_by_fixed_tops; if (is_triggered(g)) { populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops); } // Go to town on the remaining acceptable subgraphs. - unordered_set<NFAVertex> created; + unordered_set<NFAVertex> created; for (auto &rsi : rs) { - DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", + DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", g[rsi.vertices.front()].index, rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); @@ -2350,7 +2350,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Some of our analyses require correctly numbered vertices, so we // renumber after changes. - renumber_vertices(g); + renumber_vertices(g); } bool modified_start_ds = false; @@ -2391,8 +2391,8 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // We have modified the graph, so we need to ensure that our edges // and vertices are correctly numbered. - renumber_vertices(g); - renumber_edges(g); + renumber_vertices(g); + renumber_edges(g); // Remove stray report IDs. clearReports(g); } @@ -2431,20 +2431,20 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // Must be start anchored. assert(edge(g.startDs, g.startDs, g).second); - if (out_degree(g.startDs, g) > 1) { + if (out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("Unanchored\n"); return false; } // Must not be EOD-anchored. assert(edge(g.accept, g.acceptEod, g).second); - if (in_degree(g.acceptEod, g) > 1) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("EOD anchored\n"); return false; } // Must have precisely one top. - if (is_triggered(g) && !onlyOneTop(g)) { + if (is_triggered(g) && !onlyOneTop(g)) { DEBUG_PRINTF("Too many tops\n"); return false; } @@ -2493,7 +2493,7 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // have the same report set as the vertices in the repeat. if (repeat.bounds.min == depth(1) && g[g.start].reports == g[v].reports) { - repeat.bounds.min = depth(0); + repeat.bounds.min = depth(0); DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str()); } else { DEBUG_PRINTF("not a supported repeat\n"); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h index cfd804b7ef..7e04edf571 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "ue2common.h" #include "nfa/repeat_internal.h" #include "util/depth.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <map> #include <vector> @@ -122,7 +122,7 @@ void findRepeats(const NGHolder &h, u32 minRepeatVertices, struct PureRepeat { CharReach reach; DepthMinMax bounds; - flat_set<ReportID> reports; + flat_set<ReportID> reports; bool operator==(const PureRepeat &a) const { return reach == a.reach && bounds == a.bounds && reports == a.reports; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp index 4e9b498df0..bb88aa0d88 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,26 +65,26 @@ bool can_exhaust(const NGHolder &g, const ReportManager &rm) { return true; } -void set_report(NGHolder &g, ReportID internal_report) { - // First, wipe the report IDs on all vertices. - for (auto v : vertices_range(g)) { - g[v].reports.clear(); - } - - // Any predecessors of accept get our id. - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - g[v].reports.insert(internal_report); - } - - // Same for preds of acceptEod, except accept itself. - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - g[v].reports.insert(internal_report); - } -} - +void set_report(NGHolder &g, ReportID internal_report) { + // First, wipe the report IDs on all vertices. + for (auto v : vertices_range(g)) { + g[v].reports.clear(); + } + + // Any predecessors of accept get our id. + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + g[v].reports.insert(internal_report); + } + + // Same for preds of acceptEod, except accept itself. + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + g[v].reports.insert(internal_report); + } +} + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) { diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h index 31c9530880..49570c1191 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,10 +48,10 @@ std::set<ReportID> all_reports(const NGHolder &g); /** True if *all* reports in the graph are exhaustible. */ bool can_exhaust(const NGHolder &g, const ReportManager &rm); -/** Replaces all existing reports on the holder with the provided internal - * report id. */ -void set_report(NGHolder &g, ReportID internal_report); - +/** Replaces all existing reports on the holder with the provided internal + * report id. */ +void set_report(NGHolder &g, ReportID internal_report); + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp index 704697e57f..151814200b 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,71 +49,71 @@ namespace ue2 { /** Connect the start vertex to each of the vertices in \p tops. This is useful * temporarily for when we need to run a graph algorithm that expects a single * source vertex. */ -static -void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops, - vector<NFAEdge> &tempEdges) { - for (NFAVertex v : tops) { +static +void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops, + vector<NFAEdge> &tempEdges) { + for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); - const NFAEdge &e = add_edge(g.start, v, g); - tempEdges.push_back(e); - } -} - -/** - * Returns true if start's successors (aside from startDs) are subset of - * startDs's proper successors or if start has no successors other than startDs. - */ -static -bool startIsRedundant(const NGHolder &g) { - /* We ignore startDs as the self-loop may have been stripped as an - * optimisation for repeats (improveLeadingRepeats()). */ - set<NFAVertex> start; - insert(&start, adjacent_vertices_range(g.start, g)); - start.erase(g.startDs); - - // Trivial case: start has no successors other than startDs. - if (start.empty()) { - DEBUG_PRINTF("start has no out-edges other than to startDs\n"); - return true; - } - - set<NFAVertex> startDs; - insert(&startDs, adjacent_vertices_range(g.startDs, g)); - startDs.erase(g.startDs); - - if (!is_subset_of(start, startDs)) { - DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); - return false; + const NFAEdge &e = add_edge(g.start, v, g); + tempEdges.push_back(e); } - - return true; } +/** + * Returns true if start's successors (aside from startDs) are subset of + * startDs's proper successors or if start has no successors other than startDs. + */ static -void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops, +bool startIsRedundant(const NGHolder &g) { + /* We ignore startDs as the self-loop may have been stripped as an + * optimisation for repeats (improveLeadingRepeats()). */ + set<NFAVertex> start; + insert(&start, adjacent_vertices_range(g.start, g)); + start.erase(g.startDs); + + // Trivial case: start has no successors other than startDs. + if (start.empty()) { + DEBUG_PRINTF("start has no out-edges other than to startDs\n"); + return true; + } + + set<NFAVertex> startDs; + insert(&startDs, adjacent_vertices_range(g.startDs, g)); + startDs.erase(g.startDs); + + if (!is_subset_of(start, startDs)) { + DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); + return false; + } + + return true; +} + +static +void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops, vector<NFAVertex> &ordering) { // First, wire up our "tops" to start so that we have a single source, // which will give a nicer topo order. - vector<NFAEdge> tempEdges; - wireStartToTops(g, tops, tempEdges); + vector<NFAEdge> tempEdges; + wireStartToTops(g, tops, tempEdges); - renumber_vertices(g); + renumber_vertices(g); vector<NFAVertex> temp = getTopoOrdering(g); - remove_edges(tempEdges, g); + remove_edges(tempEdges, g); // Move {start, startDs} to the end, so they'll be first when we reverse - // the ordering (if they are required). + // the ordering (if they are required). temp.erase(remove(temp.begin(), temp.end(), g.startDs)); temp.erase(remove(temp.begin(), temp.end(), g.start)); - if (proper_out_degree(g.startDs, g)) { - temp.push_back(g.startDs); - } - if (!startIsRedundant(g)) { - temp.push_back(g.start); - } + if (proper_out_degree(g.startDs, g)) { + temp.push_back(g.startDs); + } + if (!startIsRedundant(g)) { + temp.push_back(g.start); + } // Walk ordering, remove vertices that shouldn't be participating in state // numbering, such as accepts. @@ -131,16 +131,16 @@ void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops, // Returns the number of states. static -unordered_map<NFAVertex, u32> +unordered_map<NFAVertex, u32> getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) { - unordered_map<NFAVertex, u32> states; + unordered_map<NFAVertex, u32> states; for (const auto &v : vertices_range(h)) { states[v] = NO_STATE; } u32 stateNum = 0; for (auto v : ordering) { - DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum, + DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum, h[v].index); states[v] = stateNum++; } @@ -183,15 +183,15 @@ void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) { continue; } - DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index); + DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index); ordering.erase(v_it); ordering.insert(++u_it, v); } } -unordered_map<NFAVertex, u32> -numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) { +unordered_map<NFAVertex, u32> +numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) { DEBUG_PRINTF("numbering states for holder %p\n", &h); vector<NFAVertex> ordering; @@ -199,10 +199,10 @@ numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) { optimiseTightLoops(h, ordering); - return getStateIndices(h, ordering); + return getStateIndices(h, ordering); } -u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) { +u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) { if (state_ids.empty()) { return 0; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h index 75d19c6294..0ed4acb6e2 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,16 +28,16 @@ /** \file * \brief State numbering and late graph restructuring code. - */ + */ #ifndef NG_RESTRUCTURING_H #define NG_RESTRUCTURING_H #include "ng_holder.h" #include "ue2common.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" -#include <unordered_map> +#include <unordered_map> namespace ue2 { @@ -50,14 +50,14 @@ static constexpr u32 NO_STATE = ~0; /** * \brief Gives each participating vertex in the graph a unique state index. */ -std::unordered_map<NFAVertex, u32> -numberStates(NGHolder &h, const flat_set<NFAVertex> &tops); +std::unordered_map<NFAVertex, u32> +numberStates(NGHolder &h, const flat_set<NFAVertex> &tops); /** * \brief Counts the number of states (vertices with state indices) in the * graph. */ -u32 countStates(const std::unordered_map<NFAVertex, u32> &state_ids); +u32 countStates(const std::unordered_map<NFAVertex, u32> &state_ids); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp index 0f932668c9..af85f01b9f 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,8 @@ #include "util/charreach.h" #include "util/graph_range.h" -#include <set> - +#include <set> + using namespace std; namespace ue2 { diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp index 9c2d9ba38d..1bca34eff6 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,7 +33,7 @@ #include "ng_small_literal_set.h" #include "grey.h" -#include "ng_holder.h" +#include "ng_holder.h" #include "ng_util.h" #include "rose/rose_build.h" #include "util/compare.h" @@ -100,7 +100,7 @@ bool operator<(const sls_literal &a, const sls_literal &b) { static bool checkLongMixedSensitivityLiterals( - const map<sls_literal, flat_set<ReportID>> &literals) { + const map<sls_literal, flat_set<ReportID>> &literals) { const size_t len = MAX_MASK2_WIDTH; for (const sls_literal &lit : literals | map_keys) { @@ -114,7 +114,7 @@ bool checkLongMixedSensitivityLiterals( static bool findLiterals(const NGHolder &g, - map<sls_literal, flat_set<ReportID>> *literals) { + map<sls_literal, flat_set<ReportID>> *literals) { vector<NFAVertex> order = getTopoOrdering(g); vector<set<sls_literal>> built(num_vertices(g)); @@ -125,7 +125,7 @@ bool findLiterals(const NGHolder &g, set<sls_literal> &out = built[g[v].index]; read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %zu\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); assert(out.empty()); @@ -154,7 +154,7 @@ bool findLiterals(const NGHolder &g, } set<sls_literal> &in = built[g[u].index]; - DEBUG_PRINTF("getting from %zu (%zu reads to go)\n", + DEBUG_PRINTF("getting from %zu (%zu reads to go)\n", g[u].index, read_count[g[u].index]); assert(!in.empty()); assert(read_count[g[u].index]); @@ -188,7 +188,7 @@ bool findLiterals(const NGHolder &g, read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); in.clear(); } } @@ -198,7 +198,7 @@ bool findLiterals(const NGHolder &g, } static -size_t min_period(const map<sls_literal, flat_set<ReportID>> &literals) { +size_t min_period(const map<sls_literal, flat_set<ReportID>> &literals) { size_t rv = SIZE_MAX; for (const sls_literal &lit : literals | map_keys) { @@ -222,14 +222,14 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g, return false; } - if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) { - DEBUG_PRINTF("vertex with wide reach found\n"); - return false; - } - + if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) { + DEBUG_PRINTF("vertex with wide reach found\n"); + return false; + } + DEBUG_PRINTF("looking for literals\n"); - map<sls_literal, flat_set<ReportID>> literals; + map<sls_literal, flat_set<ReportID>> literals; if (!findLiterals(g, &literals)) { DEBUG_PRINTF(":(\n"); return false; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp index d23ac408b0..90942def3e 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,13 +26,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief SOM ("Start of Match") analysis. */ - -#include "ng_som.h" - + +#include "ng_som.h" + #include "ng.h" #include "ng_dump.h" #include "ng_equivalence.h" @@ -48,11 +48,11 @@ #include "ng_som_util.h" #include "ng_split.h" #include "ng_util.h" -#include "ng_violet.h" +#include "ng_violet.h" #include "ng_width.h" #include "grey.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "parser/position.h" @@ -69,8 +69,8 @@ #include <algorithm> #include <map> -#include <unordered_map> -#include <unordered_set> +#include <unordered_map> +#include <unordered_set> #include <vector> using namespace std; @@ -105,7 +105,7 @@ struct som_plan { static bool regionCanEstablishSom(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const u32 region, const vector<NFAVertex> &r_exits, const vector<DepthMinMax> &depths) { if (region == regions.at(g.accept) || @@ -116,7 +116,7 @@ bool regionCanEstablishSom(const NGHolder &g, DEBUG_PRINTF("region %u\n", region); for (UNUSED auto v : r_exits) { - DEBUG_PRINTF(" exit %zu\n", g[v].index); + DEBUG_PRINTF(" exit %zu\n", g[v].index); } /* simple if each region exit is at fixed distance from SOM. Note SOM does @@ -125,12 +125,12 @@ bool regionCanEstablishSom(const NGHolder &g, assert(regions.at(v) == region); const DepthMinMax &d = depths.at(g[v].index); if (d.min != d.max) { - DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index, + DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index, d.min.str().c_str(), d.max.str().c_str()); return false; } } - DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]), + DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]), g[r_exits[0]].index); return true; @@ -151,7 +151,7 @@ struct region_info { static void buildRegionMapping(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, map<u32, region_info> &info, bool include_region_0 = false) { for (auto v : vertices_range(g)) { @@ -184,7 +184,7 @@ void buildRegionMapping(const NGHolder &g, set<NFAEdge> be; BackEdges<set<NFAEdge> > backEdgeVisitor(be); - boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); for (const auto &e : be) { NFAVertex u = source(e, g); @@ -211,17 +211,17 @@ void buildRegionMapping(const NGHolder &g, r_i.optional ? " (optional)" : ""); DEBUG_PRINTF(" enters:"); for (u32 i = 0; i < r_i.enters.size(); i++) { - printf(" %zu", g[r_i.enters[i]].index); + printf(" %zu", g[r_i.enters[i]].index); } printf("\n"); DEBUG_PRINTF(" exits:"); for (u32 i = 0; i < r_i.exits.size(); i++) { - printf(" %zu", g[r_i.exits[i]].index); + printf(" %zu", g[r_i.exits[i]].index); } printf("\n"); DEBUG_PRINTF(" all:"); for (u32 i = 0; i < r_i.full.size(); i++) { - printf(" %zu", g[r_i.full[i]].index); + printf(" %zu", g[r_i.full[i]].index); } printf("\n"); } @@ -230,7 +230,7 @@ void buildRegionMapping(const NGHolder &g, static bool validateXSL(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const u32 region, const CharReach &escapes, u32 *bad_region) { /* need to check that the escapes escape all of the graph past region */ u32 first_bad_region = ~0U; @@ -238,7 +238,7 @@ bool validateXSL(const NGHolder &g, u32 v_region = regions.at(v); if (!is_special(v, g) && v_region > region && (escapes & g[v].char_reach).any()) { - DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index); + DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index); first_bad_region = MIN(first_bad_region, v_region); } } @@ -253,7 +253,7 @@ bool validateXSL(const NGHolder &g, static bool validateEXSL(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const u32 region, const CharReach &escapes, const NGHolder &prefix, u32 *bad_region) { /* EXSL: To be a valid EXSL with escapes e, we require that all states @@ -267,7 +267,7 @@ bool validateEXSL(const NGHolder &g, const vector<CharReach> escapes_vec(1, escapes); const vector<CharReach> notescapes_vec(1, ~escapes); - flat_set<NFAVertex> states; + flat_set<NFAVertex> states; /* turn on all states past the prefix */ DEBUG_PRINTF("region %u is cutover\n", region); for (auto v : vertices_range(g)) { @@ -280,7 +280,7 @@ bool validateEXSL(const NGHolder &g, states = execute_graph(g, escapes_vec, states); /* flood with any number of not escapes */ - flat_set<NFAVertex> prev_states; + flat_set<NFAVertex> prev_states; while (prev_states != states) { prev_states = states; states = execute_graph(g, notescapes_vec, states); @@ -290,7 +290,7 @@ bool validateEXSL(const NGHolder &g, /* find input starts to use for when we are running the prefix through as * when the escape character arrives we may be in matching the prefix * already */ - flat_set<NFAVertex> prefix_start_states; + flat_set<NFAVertex> prefix_start_states; for (auto v : vertices_range(prefix)) { if (v != prefix.accept && v != prefix.acceptEod /* and as we have already made it past the prefix once */ @@ -355,7 +355,7 @@ bool isPossibleLock(const NGHolder &g, static unique_ptr<NGHolder> -makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, +makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, const region_info &curr, const region_info &next, bool renumber = true) { const vector<NFAVertex> &curr_exits = curr.exits; @@ -370,12 +370,12 @@ makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, deque<NFAVertex> lhs_verts; insert(&lhs_verts, lhs_verts.end(), vertices(g)); - unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix + unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix fillHolder(&prefix, g, lhs_verts, &lhs_map); prefix.kind = NFA_OUTFIX; // We need a reverse mapping to track regions. - unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g + unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g for (const auto &e : lhs_map) { rev_map.emplace(e.second, e.first); } @@ -385,7 +385,7 @@ makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, add_edge(prefix.accept, prefix.acceptEod, prefix); assert(!next_enters.empty()); - assert(next_enters.front() != NGHolder::null_vertex()); + assert(next_enters.front() != NGHolder::null_vertex()); u32 dead_region = regions.at(next_enters.front()); DEBUG_PRINTF("curr_region %u, dead_region %u\n", regions.at(curr_exits.front()), dead_region); @@ -404,7 +404,7 @@ makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, vector<NFAVertex> to_clear; assert(contains(lhs_map, curr_exits.front())); NFAVertex p_u = lhs_map[curr_exits.front()]; - DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index); + DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index); for (auto p_v : adjacent_vertices_range(p_u, prefix)) { auto v = rev_map.at(p_v); if (p_v == prefix.accept || regions.at(v) < dead_region) { @@ -414,7 +414,7 @@ makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, } for (auto v : to_clear) { - DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index); + DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index); clear_in_edges(v, prefix); } @@ -446,9 +446,9 @@ void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) { } static -void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, - u32 som_loc, const vector<DepthMinMax> &depths, - bool prefix_by_rev) { +void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, + u32 som_loc, const vector<DepthMinMax> &depths, + bool prefix_by_rev) { Report ir = makeCallback(0U, 0); ir.type = ir_type; ir.onmatch = som_loc; @@ -472,7 +472,7 @@ void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, } static -void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) { +void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) { /* update the som action on the prefix report */ for (auto v : inv_adjacent_vertices_range(g.accept, g)) { auto &reports = g[v].reports; @@ -543,7 +543,7 @@ void setMidfixReports(ReportManager &rm, const som_plan &item, static bool finalRegion(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, NFAVertex v) { u32 region = regions.at(v); for (auto w : adjacent_vertices_range(v, g)) { @@ -557,8 +557,8 @@ bool finalRegion(const NGHolder &g, static void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, - NFAVertex v, ReportType ir_type, - u64a param) { + NFAVertex v, ReportType ir_type, + u64a param) { assert(!g[v].reports.empty()); flat_set<ReportID> r_new; @@ -577,7 +577,7 @@ void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, ir.somDistance = param; ReportID rep = rm.getInternalId(ir); - DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n", + DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n", g[v].index, report_id, rep, ir_type); r_new.insert(rep); } @@ -691,7 +691,7 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, map<u32, region_info>::const_iterator picked) { /* NOTE: This is appropriate for firstMatchIsFirst */ DEBUG_PRINTF("prepping for lock check\n"); - + NGHolder &midfix = *out; map<NFAVertex, NFAVertex> v_map; @@ -699,18 +699,18 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, v_map[g.startDs] = midfix.startDs; /* include the lock region */ - assert(picked != info.end()); - auto graph_last = next(picked); - - assert(!graph_last->second.dag); - assert(graph_last->second.full.size() == 1); + assert(picked != info.end()); + auto graph_last = next(picked); - for (auto jt = graph_last; ; --jt) { + assert(!graph_last->second.dag); + assert(graph_last->second.full.size() == 1); + + for (auto jt = graph_last; ; --jt) { DEBUG_PRINTF("adding r %u to midfix\n", jt->first); /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); if (contains(v_map, v)) { continue; } @@ -742,38 +742,38 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, } } - if (jt == info.begin()) { - break; - } - } - - /* add edges from startds to the enters of all the initial optional - * regions and the first mandatory region. */ - for (auto jt = info.begin(); ; ++jt) { + if (jt == info.begin()) { + break; + } + } + + /* add edges from startds to the enters of all the initial optional + * regions and the first mandatory region. */ + for (auto jt = info.begin(); ; ++jt) { for (auto enter : jt->second.enters) { assert(contains(v_map, enter)); NFAVertex v = v_map[enter]; add_edge_if_not_present(midfix.startDs, v, midfix); } - if (!jt->second.optional) { - break; - } - - if (jt == graph_last) { - /* all regions are optional - add a direct edge to accept */ - add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); + if (!jt->second.optional) { break; } + + if (jt == graph_last) { + /* all regions are optional - add a direct edge to accept */ + add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); + break; + } } assert(in_degree(midfix.accept, midfix)); - renumber_vertices(midfix); + renumber_vertices(midfix); } static void fillRoughMidfix(NGHolder *out, const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, map<u32, region_info>::const_iterator picked) { /* as we are not the first prefix, we are probably not acyclic. We need to @@ -795,7 +795,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; } @@ -835,7 +835,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, do { for (auto v : jt->second.exits) { - DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; @@ -943,7 +943,7 @@ bool isMandRegionBetween(map<u32, region_info>::const_iterator a, // (woot!); updates picked, plan and bad_region. static bool advancePlan(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const NGHolder &prefix, bool stuck, map<u32, region_info>::const_iterator &picked, const map<u32, region_info>::const_iterator furthest, @@ -1022,7 +1022,7 @@ bool addPlan(vector<som_plan> &plan, u32 parent) { // Fetches all preds of {accept, acceptEod} for this graph. static void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) { - set<NFAVertex> tmp; + set<NFAVertex> tmp; insert(&tmp, inv_adjacent_vertices(g.accept, g)); insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); tmp.erase(g.accept); @@ -1030,7 +1030,7 @@ void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) { #ifdef DEBUG DEBUG_PRINTF("add reporters:"); for (UNUSED auto v : tmp) { - printf(" %zu", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1044,7 +1044,7 @@ void addReporterVertices(const region_info &r, const NGHolder &g, vector<NFAVertex> &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("add reporter %zu\n", g[v].index); + DEBUG_PRINTF("add reporter %zu\n", g[v].index); reporters.push_back(v); } } @@ -1053,12 +1053,12 @@ void addReporterVertices(const region_info &r, const NGHolder &g, // Fetches the mappings of all preds of {accept, acceptEod} in this region. static void addMappedReporterVertices(const region_info &r, const NGHolder &g, - const unordered_map<NFAVertex, NFAVertex> &mapping, + const unordered_map<NFAVertex, NFAVertex> &mapping, vector<NFAVertex> &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("adding v=%zu\n", g[v].index); - auto it = mapping.find(v); + DEBUG_PRINTF("adding v=%zu\n", g[v].index); + auto it = mapping.find(v); assert(it != mapping.end()); reporters.push_back(it->second); } @@ -1069,9 +1069,9 @@ void addMappedReporterVertices(const region_info &r, const NGHolder &g, // from earlier regions. static void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, NFAVertex entry, const vector<NFAVertex> &enters, - unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { + unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { orig_to_copy.clear(); cloneHolder(out, g, &orig_to_copy); @@ -1096,7 +1096,7 @@ void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, } static -void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, +void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, vector<NFAVertex> &enters) { assert(!enters.empty()); const u32 split_region = regions.at(enters.front()); @@ -1113,7 +1113,7 @@ void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, } for (auto enter : enters) { - DEBUG_PRINTF("processing enter %zu\n", g[enter].index); + DEBUG_PRINTF("processing enter %zu\n", g[enter].index); map<NFAVertex, NFAVertex> orig_to_copy; // Make a copy of all of the tail vertices, storing region info along @@ -1163,7 +1163,7 @@ void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, [&](const NFAEdge &e) { NFAVertex u = source(e, g); return regions.at(u) < split_region; - }, g); + }, g); } new_enters.push_back(orig_to_copy[enter]); @@ -1179,11 +1179,11 @@ void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, static bool doTreePlanningIntl(NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, map<u32, region_info>::const_iterator picked, u32 bad_region, u32 parent_plan, - const unordered_map<NFAVertex, NFAVertex> ©_to_orig, + const unordered_map<NFAVertex, NFAVertex> ©_to_orig, vector<som_plan> &plan, const Grey &grey) { assert(picked != info.end()); @@ -1335,14 +1335,14 @@ bool doTreePlanning(NGHolder &g, dumpHolder(g, g_regions, 14, "som_expandedtree", grey); for (auto v : enters) { - DEBUG_PRINTF("enter %zu\n", g[v].index); + DEBUG_PRINTF("enter %zu\n", g[v].index); // For this entry vertex, construct a version of the graph without the // other entries in this region (g_path), and calculate its depths and // regions. NGHolder g_path; - unordered_map<NFAVertex, NFAVertex> orig_to_copy; + unordered_map<NFAVertex, NFAVertex> orig_to_copy; cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy); auto regions = assignRegions(g_path); dumpHolder(g_path, regions, 14, "som_treepath", grey); @@ -1376,7 +1376,7 @@ bool doTreePlanning(NGHolder &g, } // Construct reverse mapping from vertices in g_path to g. - unordered_map<NFAVertex, NFAVertex> copy_to_orig; + unordered_map<NFAVertex, NFAVertex> copy_to_orig; for (const auto &m : orig_to_copy) { copy_to_orig.insert(make_pair(m.second, m.first)); } @@ -1399,7 +1399,7 @@ enum dsp_behaviour { static bool doSomPlanning(NGHolder &g, bool stuck_in, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, map<u32, region_info>::const_iterator picked, vector<som_plan> &plan, @@ -1570,12 +1570,12 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, p.is_reset, p.parent); printf(" reporters:"); for (auto v : p.reporters) { - printf(" %zu", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); printf(" reporters_in:"); for (auto v : p.reporters_in) { - printf(" %zu", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1589,9 +1589,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, * implement the full pattern. */ static -void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, - NGHolder &g, vector<som_plan> &plan, - const u32 first_som_slot) { +void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, + NGHolder &g, vector<som_plan> &plan, + const u32 first_som_slot) { ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; @@ -1604,14 +1604,14 @@ void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, // Root plan, which already has a SOM slot assigned (first_som_slot). dumpSomPlan(g, plan.front(), 0); - dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, - ng.cc.grey); + dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, + ng.cc.grey); assert(plan.front().prefix); if (plan.front().escapes.any() && !plan.front().is_reset) { /* setup escaper for first som location */ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, first_som_slot)) { - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -1623,7 +1623,7 @@ void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, for (++it; it != plan.end(); ++it) { const u32 plan_num = it - plan.begin(); dumpSomPlan(g, *it, plan_num); - dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, + dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, plan_num, ng.cc.grey); assert(it->parent < plan_num); @@ -1634,7 +1634,7 @@ void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, assert(!it->no_implement); if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); @@ -1642,10 +1642,10 @@ void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, /* create prefix to set the som_loc */ if (!plan.front().no_implement) { - renumber_vertices(*plan.front().prefix); + renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } } @@ -1733,17 +1733,17 @@ void clearProperInEdges(NGHolder &g, const NFAVertex sink) { namespace { struct SomRevNfa { - SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n) + SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n) : sink(s), report(r), nfa(move(n)) {} NFAVertex sink; ReportID report; - bytecode_ptr<NFA> nfa; + bytecode_ptr<NFA> nfa; }; } static -bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g, - const CompileContext &cc) { +bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g, + const CompileContext &cc) { // Create a reversed anchored version of this NFA which fires a zero report // ID on accept. NGHolder g_rev; @@ -1752,14 +1752,14 @@ bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g, setZeroReports(g_rev); // Prep for actual construction. - renumber_vertices(g_rev); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev)); - auto nfa = constructReversedNFA(g_rev, cc); + auto nfa = constructReversedNFA(g_rev, cc); if (!nfa) { return nfa; } @@ -1792,9 +1792,9 @@ bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g, return true; } - renumber_vertices(g2); // for findMinWidth, findMaxWidth. + renumber_vertices(g2); // for findMinWidth, findMaxWidth. - auto nfa = makeBareSomRevNfa(g2, cc); + auto nfa = makeBareSomRevNfa(g2, cc); if (!nfa) { DEBUG_PRINTF("couldn't build rev nfa\n"); return false; @@ -1856,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { } static -u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, +u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, const CompileContext &cc) { depth maxWidth = findMaxWidth(g); @@ -1865,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, auto nfa = makeBareSomRevNfa(g, cc); if (!nfa) { - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } if (ng.cc.streaming) { @@ -1939,7 +1939,7 @@ map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g, static bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, map<u32, region_info>::const_iterator picked, const Grey &grey, @@ -2013,7 +2013,7 @@ void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { static bool tryHaig(RoseBuild &rose, NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, som_type som, u32 somPrecision, map<u32, region_info>::const_iterator picked, shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix, @@ -2059,9 +2059,9 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix, } static -sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, - u32 comp_id, som_type som, - const unordered_map<NFAVertex, u32> ®ions, +sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, map<u32, region_info>::const_iterator lower_bound) { DEBUG_PRINTF("entry\n"); @@ -2070,18 +2070,18 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; - if (!cc.grey.allowHaigLit) { + if (!cc.grey.allowHaigLit) { return SOMBE_FAIL; } const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ u32 som_loc = ssm.getPrivateSomSlot(); - if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { + if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { // This is an optimisation: if we can't build a Haig from a portion of // the graph, then we won't be able to manage it as an outfix either // when we fall back. - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } while (1) { @@ -2156,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, goto next_try; } - implementSomPlan(ng, expr, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); Report ir = makeCallback(0U, 0); assert(!plan.empty()); @@ -2227,7 +2227,7 @@ bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, for (const auto &m : curr) { const NFAVertex u = m.first; const vector<ue2_literal> &base = m.second; - DEBUG_PRINTF("expanding from %zu\n", g[u].index); + DEBUG_PRINTF("expanding from %zu\n", g[u].index); for (auto v : adjacent_vertices_range(u, g)) { if (v == g.startDs) { continue; @@ -2240,7 +2240,7 @@ bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, DEBUG_PRINTF("match\n"); goto skip_to_next_terminal; } - if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { + if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { DEBUG_PRINTF("wide\n"); goto skip_to_next_terminal; } @@ -2256,8 +2256,8 @@ bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, CharReach cr = g[v].char_reach; vector<ue2_literal> &out = next[v]; - DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index, - cr.count()); + DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index, + cr.count()); for (size_t c = cr.find_first(); c != CharReach::npos; c = cr.find_next(c)) { bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) @@ -2333,7 +2333,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, set<NFAVertex> adj_term1; insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); for (auto v : terms) { - DEBUG_PRINTF("term %zu\n", g[v].index); + DEBUG_PRINTF("term %zu\n", g[v].index); set<NFAVertex> temp; insert(&temp, adjacent_vertices(v, g)); if (temp != adj_term1) { @@ -2342,7 +2342,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, } } - unordered_map<NFAVertex, NFAVertex> rhs_map; + unordered_map<NFAVertex, NFAVertex> rhs_map; vector<NFAVertex> pivots; insert(&pivots, pivots.end(), adj_term1); splitRHS(g, pivots, rhs, &rhs_map); @@ -2353,14 +2353,14 @@ bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, static void findBestLiteral(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, ue2_literal *lit_out, NFAVertex *v, const CompileContext &cc) { map<u32, region_info> info; buildRegionMapping(g, regions, info, false); ue2_literal best; - NFAVertex best_v = NGHolder::null_vertex(); + NFAVertex best_v = NGHolder::null_vertex(); map<u32, region_info>::const_iterator lit = info.begin(); while (1) { @@ -2393,10 +2393,10 @@ void findBestLiteral(const NGHolder &g, static bool splitOffBestLiteral(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, const CompileContext &cc) { - NFAVertex v = NGHolder::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); findBestLiteral(g, regions, lit_out, &v, cc); if (lit_out->empty()) { @@ -2405,43 +2405,43 @@ bool splitOffBestLiteral(const NGHolder &g, DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str()); - unordered_map<NFAVertex, NFAVertex> lhs_map; - unordered_map<NFAVertex, NFAVertex> rhs_map; + unordered_map<NFAVertex, NFAVertex> lhs_map; + unordered_map<NFAVertex, NFAVertex> rhs_map; splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); - DEBUG_PRINTF("v = %zu\n", g[v].index); + DEBUG_PRINTF("v = %zu\n", g[v].index); return true; } -/** - * Replace the given graph's EXTERNAL_CALLBACK reports with - * EXTERNAL_CALLBACK_SOM_PASS reports. - */ -void makeReportsSomPass(ReportManager &rm, NGHolder &g) { - for (const auto &v : vertices_range(g)) { - const auto &reports = g[v].reports; - if (reports.empty()) { - continue; - } - - flat_set<ReportID> new_reports; - for (const ReportID &id : reports) { - const Report &report = rm.getReport(id); - if (report.type != EXTERNAL_CALLBACK) { - new_reports.insert(id); - continue; - } - Report report2 = report; - report2.type = EXTERNAL_CALLBACK_SOM_PASS; - new_reports.insert(rm.getInternalId(report2)); - } - - g[v].reports = new_reports; - } -} - +/** + * Replace the given graph's EXTERNAL_CALLBACK reports with + * EXTERNAL_CALLBACK_SOM_PASS reports. + */ +void makeReportsSomPass(ReportManager &rm, NGHolder &g) { + for (const auto &v : vertices_range(g)) { + const auto &reports = g[v].reports; + if (reports.empty()) { + continue; + } + + flat_set<ReportID> new_reports; + for (const ReportID &id : reports) { + const Report &report = rm.getReport(id); + if (report.type != EXTERNAL_CALLBACK) { + new_reports.insert(id); + continue; + } + Report report2 = report; + report2.type = EXTERNAL_CALLBACK_SOM_PASS; + new_reports.insert(rm.getInternalId(report2)); + } + + g[v].reports = new_reports; + } +} + static bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { ue2_literal lit; @@ -2464,8 +2464,8 @@ bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - makeReportsSomPass(ng.rm, *rhs); - + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); vector<vector<CharReach> > triggers; @@ -2497,7 +2497,7 @@ bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { static bool doHaigLitHaigSom(NG &ng, NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, som_type som) { if (!ng.cc.grey.allowLitHaig) { return false; @@ -2528,8 +2528,8 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, return false; /* TODO: handle */ } - makeReportsSomPass(ng.rm, *rhs); - + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey); dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey); @@ -2541,7 +2541,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); bool lhs_all_vac = true; - NGHolder::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs); ai != ae && lhs_all_vac; ++ai) { if (!is_special(*ai, *lhs)) { @@ -2630,7 +2630,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, } } else { DEBUG_PRINTF("has start->accept edge\n"); - if (in_degree(g.acceptEod, g) > 1) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("also has a path to EOD\n"); return false; } @@ -2665,8 +2665,8 @@ bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { return false; } - makeReportsSomPass(ng.rm, *rhs); - + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); vector<vector<CharReach>> triggers; @@ -2731,7 +2731,7 @@ bool trySombe(NG &ng, NGHolder &g, som_type som) { static map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, const vector<DepthMinMax> &depths) { map<u32, region_info>::const_iterator picked = info.end(); @@ -2756,7 +2756,7 @@ map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g, static map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, const vector<DepthMinMax> &depths, const map<u32, region_info>::const_iterator &orig, @@ -2831,7 +2831,7 @@ map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, reverseHolder(*prefix, g_rev); anchorStarts(g_rev); - renumber_vertices(g_rev); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); @@ -2848,7 +2848,7 @@ map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, static unique_ptr<NGHolder> makePrefixForChain(NGHolder &g, - const unordered_map<NFAVertex, u32> ®ions, + const unordered_map<NFAVertex, u32> ®ions, const map<u32, region_info> &info, const map<u32, region_info>::const_iterator &picked, vector<DepthMinMax> *depths, bool prefix_by_rev, @@ -2875,13 +2875,13 @@ unique_ptr<NGHolder> makePrefixForChain(NGHolder &g, } depths->clear(); /* renumbering invalidates depths */ - renumber_vertices(*prefix); + renumber_vertices(*prefix); DEBUG_PRINTF("done\n"); return prefix; } -sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som hello\n"); @@ -2891,7 +2891,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, // Special case: if g is completely anchored or begins with a dot-star, we // know that we have an absolute SOM of zero all the time. - if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { + if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { makeSomAbsReports(rm, g, g.accept); makeSomAbsReports(rm, g, g.acceptEod); return SOMBE_HANDLED_INTERNAL; @@ -3005,10 +3005,10 @@ sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, /* create prefix to set the som_loc */ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); if (prefix_by_rev) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - renumber_vertices(*prefix); + renumber_vertices(*prefix); if (!ng.addHolder(*prefix)) { DEBUG_PRINTF("failed to add holder\n"); clear_graph(g); @@ -3088,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); } if (prefix_by_rev && !plan.front().no_implement) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - implementSomPlan(ng, expr, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); DEBUG_PRINTF("success\n"); return SOMBE_HANDLED_INTERNAL; } -sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, - u32 comp_id, som_type som) { +sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som+haig hello\n"); @@ -3136,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, buildRegionMapping(g, regions, info, true); sombe_rv rv = - doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); + doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); if (rv == SOMBE_FAIL) { clear_graph(g); cloneHolder(g, g_pristine); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.h b/contrib/libs/hyperscan/src/nfagraph/ng_som.h index ecae4c67fb..31631c7458 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,14 +34,14 @@ #define NG_SOM_H #include "som/som.h" -#include "ue2common.h" +#include "ue2common.h" namespace ue2 { -class ExpressionInfo; +class ExpressionInfo; class NG; class NGHolder; -class ReportManager; +class ReportManager; struct Grey; enum sombe_rv { @@ -65,17 +65,17 @@ enum sombe_rv { * May throw a "Pattern too large" exception if prefixes of the * pattern are too large to compile. */ -sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, som_type som); /** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. * May also throw pattern too large if prefixes of the pattern are too large to * compile. */ -sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, - u32 comp_id, som_type som); - -void makeReportsSomPass(ReportManager &rm, NGHolder &g); +sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, + u32 comp_id, som_type som); +void makeReportsSomPass(ReportManager &rm, NGHolder &g); + } // namespace ue2 #endif // NG_SOM_H diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp index 33544ec173..ba7a2c40c8 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -155,13 +155,13 @@ bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) { if (is_special(v, g)) { continue; } - if (!in_degree(v, g)) { + if (!in_degree(v, g)) { continue; // unreachable, probably killed } const DepthMinMax &d = getDepth(v, g, depths); - DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index, + DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index, d.str().c_str()); if (d.min == d.max) { diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp index 1e7a41bb0c..d6ccc24aa0 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,7 +54,7 @@ vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { // We operate on a temporary copy of the original graph here, so we don't // have to mutate the original. NGHolder g; - unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g + unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g cloneHolder(g, g_orig, &vmap); vector<NFAVertex> vstarts; @@ -76,10 +76,10 @@ vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { clear_in_edges(v, g); } - //dumpGraph("som_depth.dot", g); + //dumpGraph("som_depth.dot", g); - // Find depths, indexed by vertex index in g - auto temp_depths = calcDepthsFrom(g, g.start); + // Find depths, indexed by vertex index in g + auto temp_depths = calcDepthsFrom(g, g.start); // Transfer depths, indexed by vertex index in g_orig. vector<DepthMinMax> depths(num_vertices(g_orig)); @@ -94,7 +94,7 @@ vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { // StartDs and virtual starts always have zero depth. - d = DepthMinMax(depth(0), depth(0)); + d = DepthMinMax(depth(0), depth(0)); } else { u32 new_idx = g[v_new].index; d = temp_depths.at(new_idx); @@ -136,14 +136,14 @@ bool firstMatchIsFirst(const NGHolder &p) { return false; } - flat_set<NFAVertex> states; + flat_set<NFAVertex> states; /* turn on all states (except starts - avoid suffix matches) */ /* If we were doing (1) we would also except states leading to accepts - avoid prefix matches */ for (auto v : vertices_range(p)) { assert(!is_virtual_start(v, p)); if (!is_special(v, p)) { - DEBUG_PRINTF("turning on %zu\n", p[v].index); + DEBUG_PRINTF("turning on %zu\n", p[v].index); states.insert(v); } } @@ -154,9 +154,9 @@ bool firstMatchIsFirst(const NGHolder &p) { for (auto v : states) { /* need to check if this vertex may represent an infix match - ie * it does not have an edge to accept. */ - DEBUG_PRINTF("check %zu\n", p[v].index); + DEBUG_PRINTF("check %zu\n", p[v].index); if (!edge(v, p.accept, p).second) { - DEBUG_PRINTF("fail %zu\n", p[v].index); + DEBUG_PRINTF("fail %zu\n", p[v].index); return false; } } @@ -166,7 +166,7 @@ bool firstMatchIsFirst(const NGHolder &p) { } bool somMayGoBackwards(NFAVertex u, const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { /* Need to ensure all matches of the graph g up to u contain no infixes * which are also matches of the graph to u. @@ -186,11 +186,11 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, return cache.smgb[u]; } - DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index); + DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index); set<NFAEdge> be; BackEdges<set<NFAEdge>> backEdgeVisitor(be); - boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); bool rv; if (0) { @@ -207,7 +207,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, NFAVertex s = source(e, g); NFAVertex t = target(e, g); /* only need to worry about big cycles including/before u */ - DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index); if (s != t && region_map.at(s) <= u_region) { DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ @@ -215,11 +215,11 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } } - unordered_map<NFAVertex, NFAVertex> orig_to_copy; + unordered_map<NFAVertex, NFAVertex> orig_to_copy; NGHolder c_g; cloneHolder(c_g, g, &orig_to_copy); - /* treat virtual starts as unconditional - wire to startDs instead */ + /* treat virtual starts as unconditional - wire to startDs instead */ for (NFAVertex v : vertices_range(g)) { if (!is_virtual_start(v, g)) { continue; @@ -232,7 +232,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, clear_vertex(c_v, c_g); } - /* treat u as the only accept state */ + /* treat u as the only accept state */ NFAVertex c_u = orig_to_copy[u]; clear_in_edges(c_g.acceptEod, c_g); add_edge(c_g.accept, c_g.acceptEod, c_g); @@ -253,9 +253,9 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } for (auto v : adjacent_vertices_range(t, g)) { if (contains(u_succ, v)) { - /* due to virtual starts being aliased with normal starts in the - * copy of the graph, we may have already added the edges. */ - add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g); + /* due to virtual starts being aliased with normal starts in the + * copy of the graph, we may have already added the edges. */ + add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g); break; } } @@ -264,13 +264,13 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, pruneUseless(c_g); be.clear(); - boost::depth_first_search(c_g, visitor(backEdgeVisitor) - .root_vertex(c_g.start)); + boost::depth_first_search(c_g, visitor(backEdgeVisitor) + .root_vertex(c_g.start)); for (const auto &e : be) { NFAVertex s = source(e, c_g); NFAVertex t = target(e, c_g); - DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); if (s != t) { assert(0); DEBUG_PRINTF("eek big cycle\n"); @@ -287,7 +287,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } bool sentClearsTail(const NGHolder &g, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, const NGHolder &sent, u32 last_head_region, u32 *bad_region) { /* if a subsequent match from the prefix clears the rest of the pattern @@ -312,7 +312,7 @@ bool sentClearsTail(const NGHolder &g, */ u32 first_bad_region = ~0U; - flat_set<NFAVertex> states; + flat_set<NFAVertex> states; /* turn on all states */ DEBUG_PRINTF("region %u is cutover\n", last_head_region); for (auto v : vertices_range(g)) { @@ -322,7 +322,7 @@ bool sentClearsTail(const NGHolder &g, } for (UNUSED auto v : states) { - DEBUG_PRINTF("start state: %zu\n", g[v].index); + DEBUG_PRINTF("start state: %zu\n", g[v].index); } /* run the prefix the main graph */ @@ -334,7 +334,7 @@ bool sentClearsTail(const NGHolder &g, continue; /* not in tail */ } - DEBUG_PRINTF("v %zu is still on\n", g[v].index); + DEBUG_PRINTF("v %zu is still on\n", g[v].index); assert(v != g.accept && v != g.acceptEod); /* no cr */ assert(contains(region_map, v)); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h index e2d38642c4..c06260fb9f 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "util/depth.h" #include <map> -#include <unordered_map> +#include <unordered_map> #include <vector> namespace ue2 { @@ -61,7 +61,7 @@ struct smgb_cache : public mbsb_cache { }; bool somMayGoBackwards(NFAVertex u, const NGHolder &g, - const std::unordered_map<NFAVertex, u32> ®ion_map, + const std::unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache); /** @@ -75,7 +75,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, * region ID associated with a tail state that is still on. */ bool sentClearsTail(const NGHolder &g, - const std::unordered_map<NFAVertex, u32> ®ion_map, + const std::unordered_map<NFAVertex, u32> ®ion_map, const NGHolder &sent, u32 last_head_region, u32 *bad_region); diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp index 91a099fc38..ac1531881c 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,13 +62,13 @@ void clearAccepts(NGHolder &g) { } static -void filterSplitMap(const NGHolder &g, - unordered_map<NFAVertex, NFAVertex> *out_map) { - unordered_set<NFAVertex> verts; +void filterSplitMap(const NGHolder &g, + unordered_map<NFAVertex, NFAVertex> *out_map) { + unordered_set<NFAVertex> verts; insert(&verts, vertices(g)); - auto it = out_map->begin(); + auto it = out_map->begin(); while (it != out_map->end()) { - auto jt = it; + auto jt = it; ++it; if (!contains(verts, jt->second)) { out_map->erase(jt); @@ -78,8 +78,8 @@ void filterSplitMap(const NGHolder &g, static void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, - const vector<NFAVertex> &rhs_pivots, NGHolder *lhs, - unordered_map<NFAVertex, NFAVertex> *lhs_map) { + const vector<NFAVertex> &rhs_pivots, NGHolder *lhs, + unordered_map<NFAVertex, NFAVertex> *lhs_map) { assert(lhs && lhs_map); cloneHolder(*lhs, base, lhs_map); @@ -87,7 +87,7 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, clearAccepts(*lhs); for (auto pivot : pivots) { - DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index, + DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index, num_vertices(*lhs), lhs_map->size()); assert(contains(*lhs_map, pivot)); @@ -100,12 +100,12 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, add_edge((*lhs_map)[pivot], lhs->accept, *lhs); } - /* should do the renumbering unconditionally as we know edges are already - * misnumbered */ - pruneUseless(*lhs, false); - renumber_edges(*lhs); - renumber_vertices(*lhs); - + /* should do the renumbering unconditionally as we know edges are already + * misnumbered */ + pruneUseless(*lhs, false); + renumber_edges(*lhs); + renumber_vertices(*lhs); + filterSplitMap(*lhs, lhs_map); switch (base.kind) { @@ -117,21 +117,21 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots, case NFA_SUFFIX: lhs->kind = NFA_INFIX; break; - case NFA_EAGER_PREFIX: - /* Current code should not be assigning eager until well after all the - * splitting is done. */ - assert(0); - lhs->kind = NFA_EAGER_PREFIX; - break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + lhs->kind = NFA_EAGER_PREFIX; + break; case NFA_REV_PREFIX: - case NFA_OUTFIX_RAW: + case NFA_OUTFIX_RAW: assert(0); break; } } void splitLHS(const NGHolder &base, NFAVertex pivot, - NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) { + NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) { vector<NFAVertex> pivots(1, pivot); vector<NFAVertex> rhs_pivots; insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base)); @@ -139,7 +139,7 @@ void splitLHS(const NGHolder &base, NFAVertex pivot, } void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots, - NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { + NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { assert(rhs && rhs_map); cloneHolder(*rhs, base, rhs_map); @@ -151,15 +151,15 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots, for (auto pivot : pivots) { assert(contains(*rhs_map, pivot)); - NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); - (*rhs)[e].tops.insert(DEFAULT_TOP); + NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + (*rhs)[e].tops.insert(DEFAULT_TOP); } - - /* should do the renumbering unconditionally as we know edges are already - * misnumbered */ - pruneUseless(*rhs, false); - renumber_edges(*rhs); - renumber_vertices(*rhs); + + /* should do the renumbering unconditionally as we know edges are already + * misnumbered */ + pruneUseless(*rhs, false); + renumber_edges(*rhs); + renumber_vertices(*rhs); filterSplitMap(*rhs, rhs_map); switch (base.kind) { @@ -171,14 +171,14 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots, case NFA_OUTFIX: rhs->kind = NFA_SUFFIX; break; - case NFA_EAGER_PREFIX: - /* Current code should not be assigning eager until well after all the - * splitting is done. */ - assert(0); - rhs->kind = NFA_INFIX; - break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + rhs->kind = NFA_INFIX; + break; case NFA_REV_PREFIX: - case NFA_OUTFIX_RAW: + case NFA_OUTFIX_RAW: assert(0); break; } @@ -191,8 +191,8 @@ void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots, vector<NFAVertex> &succ) { assert(!pivots.empty()); - set<NFAVertex> adj; - set<NFAVertex> adj_temp; + set<NFAVertex> adj; + set<NFAVertex> adj_temp; insert(&adj, adjacent_vertices(pivots.at(0), g)); @@ -211,12 +211,12 @@ void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots, } void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots, - NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, - NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { + NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, + NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); assert(!has_parallel_edge(base)); - assert(isCorrectlyTopped(base)); + assert(isCorrectlyTopped(base)); /* RHS pivots are built from the common set of successors of pivots. */ vector<NFAVertex> rhs_pivots; @@ -230,13 +230,13 @@ void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots, assert(!has_parallel_edge(*lhs)); assert(!has_parallel_edge(*rhs)); - assert(isCorrectlyTopped(*lhs)); - assert(isCorrectlyTopped(*rhs)); + assert(isCorrectlyTopped(*lhs)); + assert(isCorrectlyTopped(*rhs)); } void splitGraph(const NGHolder &base, NFAVertex pivot, - NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, - NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { + NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map, + NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) { vector<NFAVertex> pivots(1, pivot); splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map); } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.h b/contrib/libs/hyperscan/src/nfagraph/ng_split.h index 9ddc033257..3762bca170 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_split.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,9 +33,9 @@ #ifndef NG_SPLIT_H #define NG_SPLIT_H -#include "ng_holder.h" - -#include <unordered_map> +#include "ng_holder.h" + +#include <unordered_map> #include <vector> namespace ue2 { @@ -47,29 +47,29 @@ class NGHolder; * is in the lhs if it is reachable from start without going through the * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS. * - * Note: The RHS is setup to be triggered by TOP 0 - * + * Note: The RHS is setup to be triggered by TOP 0 + * * When multiple split vertices are provided: * - RHS contains all vertices reachable from every pivot * - LHS contains all vertices which are reachable from start ignoring any * vertices which have an edge to every pivot */ void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, - std::unordered_map<NFAVertex, NFAVertex> *lhs_map, + std::unordered_map<NFAVertex, NFAVertex> *lhs_map, NGHolder *rhs, - std::unordered_map<NFAVertex, NFAVertex> *rhs_map); + std::unordered_map<NFAVertex, NFAVertex> *rhs_map); void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots, NGHolder *lhs, - std::unordered_map<NFAVertex, NFAVertex> *lhs_map, + std::unordered_map<NFAVertex, NFAVertex> *lhs_map, NGHolder *rhs, - std::unordered_map<NFAVertex, NFAVertex> *rhs_map); + std::unordered_map<NFAVertex, NFAVertex> *rhs_map); void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, - std::unordered_map<NFAVertex, NFAVertex> *lhs_map); + std::unordered_map<NFAVertex, NFAVertex> *lhs_map); void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots, - NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map); + NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp index 03495d1441..c288415c01 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -111,8 +111,8 @@ #include <deque> #include <map> -#include <unordered_map> -#include <unordered_set> +#include <unordered_map> +#include <unordered_set> #include <boost/graph/depth_first_search.hpp> #include <boost/graph/reverse_graph.hpp> @@ -121,26 +121,26 @@ using namespace std; namespace ue2 { -using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>; +using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>; static -PostDomTree buildPDomTree(const NGHolder &g) { - PostDomTree tree; - tree.reserve(num_vertices(g)); - - auto postdominators = findPostDominators(g); +PostDomTree buildPDomTree(const NGHolder &g) { + PostDomTree tree; + tree.reserve(num_vertices(g)); + auto postdominators = findPostDominators(g); + for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } NFAVertex pdom = postdominators[v]; if (pdom) { - DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index); + DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index); tree[pdom].insert(v); } } - return tree; + return tree; } /** @@ -153,13 +153,13 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, const CharReach &cr, const NFAStateSet &init, const vector<NFAVertex> &vByIndex, const PostDomTree &tree, som_type som, const vector<DepthMinMax> &som_depths, - const unordered_map<NFAVertex, u32> ®ion_map, + const unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { - DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); + DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); vector<NFAVertex> q; - auto it = tree.find(v); + auto it = tree.find(v); if (it != tree.end()) { q.insert(q.end(), it->second.begin(), it->second.end()); } @@ -275,9 +275,9 @@ void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { static void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, const PostDomTree &pdom_tree, const NFAStateSet &init, - unordered_map<NFAVertex, NFAStateSet> *squash, - som_type som, const vector<DepthMinMax> &som_depths, - const unordered_map<NFAVertex, u32> ®ion_map, + unordered_map<NFAVertex, NFAStateSet> *squash, + som_type som, const vector<DepthMinMax> &som_depths, + const unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { deque<NFAVertex> remaining; for (const auto &m : *squash) { @@ -302,7 +302,7 @@ void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, } NFAStateSet u_squash(init.size()); - size_t u_index = g[u].index; + size_t u_index = g[u].index; buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, pdom_tree, som, som_depths, region_map, cache); @@ -310,7 +310,7 @@ void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, u_squash.set(u_index); /* never clear ourselves */ if ((~u_squash).any()) { // i.e. some bits unset in mask - DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index, + DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index, g[v].index); (*squash)[u] = u_squash; remaining.push_back(u); @@ -319,61 +319,61 @@ void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, } } -/* If there are redundant states in the graph, it may be possible for two - * sibling .* states to try to squash each other -- which should be prevented. - * - * Note: this situation should only happen if ng_equivalence has not been run. - */ -static -void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, - unordered_map<NFAVertex, NFAStateSet> &squash) { - for (auto it = squash.begin(); it != squash.end();) { - NFAVertex a = it->first; - u32 a_index = g[a].index; - - NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ - for (auto b_index = a_squash.find_first(); b_index != a_squash.npos; - b_index = a_squash.find_next(b_index)) { - assert(b_index != a_index); - NFAVertex b = vByIndex[b_index]; - - auto b_it = squash.find(b); - if (b_it == squash.end()) { - continue; - } - auto &b_squash = b_it->second; - if (!b_squash.test(a_index)) { - /* b and a squash each other, prevent this */ - DEBUG_PRINTF("removing mutual squash %u %zu\n", - a_index, b_index); - b_squash.set(a_index); - it->second.set(b_index); - } - } - - if (it->second.all()) { - DEBUG_PRINTF("%u is no longer an effective squash state\n", - a_index); - it = squash.erase(it); - } else { - ++it; - } - } -} - -unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, - som_type som) { - unordered_map<NFAVertex, NFAStateSet> squash; - +/* If there are redundant states in the graph, it may be possible for two + * sibling .* states to try to squash each other -- which should be prevented. + * + * Note: this situation should only happen if ng_equivalence has not been run. + */ +static +void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, + unordered_map<NFAVertex, NFAStateSet> &squash) { + for (auto it = squash.begin(); it != squash.end();) { + NFAVertex a = it->first; + u32 a_index = g[a].index; + + NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ + for (auto b_index = a_squash.find_first(); b_index != a_squash.npos; + b_index = a_squash.find_next(b_index)) { + assert(b_index != a_index); + NFAVertex b = vByIndex[b_index]; + + auto b_it = squash.find(b); + if (b_it == squash.end()) { + continue; + } + auto &b_squash = b_it->second; + if (!b_squash.test(a_index)) { + /* b and a squash each other, prevent this */ + DEBUG_PRINTF("removing mutual squash %u %zu\n", + a_index, b_index); + b_squash.set(a_index); + it->second.set(b_index); + } + } + + if (it->second.all()) { + DEBUG_PRINTF("%u is no longer an effective squash state\n", + a_index); + it = squash.erase(it); + } else { + ++it; + } + } +} + +unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, + som_type som) { + unordered_map<NFAVertex, NFAStateSet> squash; + // Number of bits to use for all our masks. If we're a triggered graph, // tops have already been assigned, so we don't have to account for them. const u32 numStates = num_vertices(g); // Build post-dominator tree. - auto pdom_tree = buildPDomTree(g); + auto pdom_tree = buildPDomTree(g); // Build list of vertices by state ID and a set of init states. - vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex()); + vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex()); NFAStateSet initStates(numStates); smgb_cache cache(g); @@ -398,7 +398,7 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, for (u32 i = 0; i < numStates; i++) { NFAVertex v = vByIndex[i]; - assert(v != NGHolder::null_vertex()); + assert(v != NGHolder::null_vertex()); const CharReach &cr = g[v].char_reach; /* only non-init cyclics can be squashers */ @@ -502,8 +502,8 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, som_depths, region_map, cache); - clearMutualSquashers(g, vByIndex, squash); - + clearMutualSquashers(g, vByIndex, squash); + return squash; } @@ -515,11 +515,11 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, * -# squash only a few acyclic states */ void filterSquashers(const NGHolder &g, - unordered_map<NFAVertex, NFAStateSet> &squash) { - assert(hasCorrectlyNumberedVertices(g)); - + unordered_map<NFAVertex, NFAStateSet> &squash) { + assert(hasCorrectlyNumberedVertices(g)); + DEBUG_PRINTF("filtering\n"); - vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */ + vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */ for (auto v : vertices_range(g)) { rev[g[v].index] = v; } @@ -528,7 +528,7 @@ void filterSquashers(const NGHolder &g, if (!contains(squash, v)) { continue; } - DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index); + DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index); if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("acyclic\n"); @@ -538,8 +538,8 @@ void filterSquashers(const NGHolder &g, NFAStateSet squashed = squash[v]; squashed.flip(); /* default sense for mask of survivors */ - for (auto sq = squashed.find_first(); sq != squashed.npos; - sq = squashed.find_next(sq)) { + for (auto sq = squashed.find_first(); sq != squashed.npos; + sq = squashed.find_next(sq)) { NFAVertex u = rev[sq]; if (hasSelfLoop(u, g)) { DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); @@ -606,7 +606,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %zu\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -615,7 +615,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %zu\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -626,9 +626,9 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { static vector<NFAVertex> findUnreachable(const NGHolder &g) { - const boost::reverse_graph<NGHolder, const NGHolder &> revg(g); + const boost::reverse_graph<NGHolder, const NGHolder &> revg(g); - unordered_map<NFAVertex, boost::default_color_type> colours; + unordered_map<NFAVertex, boost::default_color_type> colours; colours.reserve(num_vertices(g)); depth_first_visit(revg, g.acceptEod, @@ -639,7 +639,7 @@ vector<NFAVertex> findUnreachable(const NGHolder &g) { vector<NFAVertex> unreach; for (auto v : vertices_range(revg)) { if (!contains(colours, v)) { - unreach.push_back(NFAVertex(v)); + unreach.push_back(NFAVertex(v)); } } return unreach; @@ -647,9 +647,9 @@ vector<NFAVertex> findUnreachable(const NGHolder &g) { /** Populates squash masks for states that can be switched off by highlander * (single match) reporters. */ -unordered_map<NFAVertex, NFAStateSet> +unordered_map<NFAVertex, NFAStateSet> findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { - unordered_map<NFAVertex, NFAStateSet> squash; + unordered_map<NFAVertex, NFAStateSet> squash; set<NFAVertex> verts; getHighlanderReporters(g, g.accept, rm, verts); @@ -662,7 +662,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { const u32 numStates = num_vertices(g); for (auto v : verts) { - DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index, + DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index, g[v].reports.size()); // Find the set of vertices that lead to v or any other reporter with a @@ -670,7 +670,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { // cutting the appropriate out-edges to accept and seeing which // vertices become unreachable. - unordered_map<NFAVertex, NFAVertex> orig_to_copy; + unordered_map<NFAVertex, NFAVertex> orig_to_copy; NGHolder h; cloneHolder(h, g, &orig_to_copy); removeEdgesToAccept(h, orig_to_copy[v]); @@ -689,7 +689,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { NFAStateSet &mask = squash[v]; for (auto uv : unreach) { - DEBUG_PRINTF("squashes index %zu\n", h[uv].index); + DEBUG_PRINTF("squashes index %zu\n", h[uv].index); mask.reset(h[uv].index); } } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h index 489f541e84..f2d66744a5 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #include "som/som.h" #include "ue2common.h" -#include <unordered_map> +#include <unordered_map> #include <boost/dynamic_bitset.hpp> namespace ue2 { @@ -44,10 +44,10 @@ namespace ue2 { class NGHolder; class ReportManager; -/** - * Dynamically-sized bitset, as an NFA can have an arbitrary number of states. - */ -using NFAStateSet = boost::dynamic_bitset<>; +/** + * Dynamically-sized bitset, as an NFA can have an arbitrary number of states. + */ +using NFAStateSet = boost::dynamic_bitset<>; /** * Populates the squash mask for each vertex (i.e. the set of states to be left @@ -55,16 +55,16 @@ using NFAStateSet = boost::dynamic_bitset<>; * * The NFAStateSet in the output map is indexed by vertex_index. */ -std::unordered_map<NFAVertex, NFAStateSet> -findSquashers(const NGHolder &g, som_type som = SOM_NONE); +std::unordered_map<NFAVertex, NFAStateSet> +findSquashers(const NGHolder &g, som_type som = SOM_NONE); /** Filters out squash states intended only for use in DFA construction. */ void filterSquashers(const NGHolder &g, - std::unordered_map<NFAVertex, NFAStateSet> &squash); + std::unordered_map<NFAVertex, NFAStateSet> &squash); /** Populates squash masks for states that can be switched off by highlander * (single match) reporters. */ -std::unordered_map<NFAVertex, NFAStateSet> +std::unordered_map<NFAVertex, NFAStateSet> findHighlanderSquashers(const NGHolder &g, const ReportManager &rm); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp index 5e627bb593..2b4adf5c64 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp @@ -60,9 +60,9 @@ namespace { /** Depths from start, startDs for this graph. */ struct InitDepths { - explicit InitDepths(const NGHolder &g) - : start(calcDepthsFrom(g, g.start)), - startDs(calcDepthsFrom(g, g.startDs)) {} + explicit InitDepths(const NGHolder &g) + : start(calcDepthsFrom(g, g.start)), + startDs(calcDepthsFrom(g, g.startDs)) {} depth maxDist(const NGHolder &g, NFAVertex v) const { u32 idx = g[v].index; diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp index 4ad5ff7875..1bdc0980b9 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,52 +54,52 @@ #include <set> #include <vector> -#include <boost/range/adaptor/map.hpp> - +#include <boost/range/adaptor/map.hpp> + using namespace std; -using boost::adaptors::map_values; +using boost::adaptors::map_values; namespace ue2 { static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ /** Sentinel value meaning no component has yet been selected. */ -static const u32 NO_COMPONENT = ~0U; - -static const u32 UNUSED_STATE = ~0U; - -namespace { -struct ranking_info { - explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { - u32 rank = 0; - - reverse(to_vertex.begin(), to_vertex.end()); - - for (NFAVertex v : to_vertex) { - to_rank[v] = rank++; - } - - for (NFAVertex v : vertices_range(h)) { - if (!contains(to_rank, v)) { - to_rank[v] = UNUSED_STATE; - } +static const u32 NO_COMPONENT = ~0U; + +static const u32 UNUSED_STATE = ~0U; + +namespace { +struct ranking_info { + explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { + u32 rank = 0; + + reverse(to_vertex.begin(), to_vertex.end()); + + for (NFAVertex v : to_vertex) { + to_rank[v] = rank++; } + + for (NFAVertex v : vertices_range(h)) { + if (!contains(to_rank, v)) { + to_rank[v] = UNUSED_STATE; + } + } } - NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } - u32 get(NFAVertex v) const { return to_rank.at(v); } - u32 size() const { return (u32)to_vertex.size(); } - u32 add_to_tail(NFAVertex v) { - u32 rank = size(); - to_rank[v] = rank; - to_vertex.push_back(v); - return rank; + NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } + u32 get(NFAVertex v) const { return to_rank.at(v); } + u32 size() const { return (u32)to_vertex.size(); } + u32 add_to_tail(NFAVertex v) { + u32 rank = size(); + to_rank[v] = rank; + to_vertex.push_back(v); + return rank; } -private: - vector<NFAVertex> to_vertex; - unordered_map<NFAVertex, u32> to_rank; -}; +private: + vector<NFAVertex> to_vertex; + unordered_map<NFAVertex, u32> to_rank; +}; } static never_inline @@ -131,9 +131,9 @@ bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, } static never_inline -u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, - const NGHolder &gb, const ranking_info &b_ranking) { - u32 ml = min(a_ranking.size(), b_ranking.size()); +u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { + u32 ml = min(a_ranking.size(), b_ranking.size()); if (ml > 65535) { ml = 65535; } @@ -142,7 +142,7 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, // "startedness" properties. u32 max = 0; for (; max < ml; max++) { - if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { + if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { break; } } @@ -150,30 +150,30 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, return max; } -static -u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, - const NGHolder &gb, const ranking_info &b_ranking) { +static +u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { /* upper bound on the common region based on local properties */ - u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); + u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); DEBUG_PRINTF("cpl upper bound %u\n", max); while (max > 0) { /* shrink max region based on in-edges from outside the region */ for (size_t j = max; j > 0; j--) { - NFAVertex a_v = a_ranking.at(j - 1); - NFAVertex b_v = b_ranking.at(j - 1); - for (auto u : inv_adjacent_vertices_range(a_v, ga)) { - u32 state_id = a_ranking.get(u); - if (state_id != UNUSED_STATE && state_id >= max) { + NFAVertex a_v = a_ranking.at(j - 1); + NFAVertex b_v = b_ranking.at(j - 1); + for (auto u : inv_adjacent_vertices_range(a_v, ga)) { + u32 state_id = a_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; } } - for (auto u : inv_adjacent_vertices_range(b_v, gb)) { - u32 state_id = b_ranking.get(u); - if (state_id != UNUSED_STATE && state_id >= max) { + for (auto u : inv_adjacent_vertices_range(b_v, gb)) { + u32 state_id = b_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; @@ -185,37 +185,37 @@ u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, /* Ensure that every pair of vertices has same out-edges to vertices in the region. */ - for (size_t i = 0; i < max; i++) { + for (size_t i = 0; i < max; i++) { size_t a_count = 0; size_t b_count = 0; - for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { - u32 sid = a_ranking.get(target(a_edge, ga)); - if (sid == UNUSED_STATE || sid >= max) { + for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { + u32 sid = a_ranking.get(target(a_edge, ga)); + if (sid == UNUSED_STATE || sid >= max) { continue; } a_count++; - NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); + NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); - if (!b_edge) { + if (!b_edge) { max = i; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); - goto try_smaller; + goto try_smaller; } - if (ga[a_edge].tops != gb[b_edge].tops) { + if (ga[a_edge].tops != gb[b_edge].tops) { max = i; - DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); - goto try_smaller; + DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); + goto try_smaller; } } - for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { - u32 sid = b_ranking.get(b_v); - if (sid == UNUSED_STATE || sid >= max) { + for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { + u32 sid = b_ranking.get(b_v); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -224,54 +224,54 @@ u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, if (a_count != b_count) { max = i; - DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," - " b_count=%zu)\n", max, a_count, b_count); - goto try_smaller; + DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," + " b_count=%zu)\n", max, a_count, b_count); + goto try_smaller; } } - DEBUG_PRINTF("survived checks, returning cpl %u\n", max); - return max; - try_smaller:; + DEBUG_PRINTF("survived checks, returning cpl %u\n", max); + return max; + try_smaller:; } DEBUG_PRINTF("failed to find any common region\n"); return 0; } -u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { - return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); -} - +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { + return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); +} + static never_inline -void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { - assert(&dest != &vic); - - auto dest_info = ranking_info(dest); - auto vic_info = ranking_info(vic); - +void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { + assert(&dest != &vic); + + auto dest_info = ranking_info(dest); + auto vic_info = ranking_info(vic); + map<NFAVertex, NFAVertex> vmap; // vic -> dest vmap[vic.start] = dest.start; vmap[vic.startDs] = dest.startDs; vmap[vic.accept] = dest.accept; vmap[vic.acceptEod] = dest.acceptEod; - vmap[NGHolder::null_vertex()] = NGHolder::null_vertex(); + vmap[NGHolder::null_vertex()] = NGHolder::null_vertex(); // For vertices in the common len, add to vmap and merge in the reports, if // any. for (u32 i = 0; i < common_len; i++) { - NFAVertex v_old = vic_info.at(i); - NFAVertex v = dest_info.at(i); + NFAVertex v_old = vic_info.at(i); + NFAVertex v = dest_info.at(i); vmap[v_old] = v; const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); } - // Add in vertices beyond the common len - for (u32 i = common_len; i < vic_info.size(); i++) { - NFAVertex v_old = vic_info.at(i); + // Add in vertices beyond the common len + for (u32 i = common_len; i < vic_info.size(); i++) { + NFAVertex v_old = vic_info.at(i); if (is_special(v_old, vic)) { // Dest already has start vertices, just merge the reports. @@ -283,17 +283,17 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { } NFAVertex v = add_vertex(vic[v_old], dest); - dest_info.add_to_tail(v); + dest_info.add_to_tail(v); vmap[v_old] = v; } /* add edges */ DEBUG_PRINTF("common_len=%zu\n", common_len); for (const auto &e : edges_range(vic)) { - NFAVertex u_old = source(e, vic); - NFAVertex v_old = target(e, vic); - NFAVertex u = vmap[u_old]; - NFAVertex v = vmap[v_old]; + NFAVertex u_old = source(e, vic); + NFAVertex v_old = target(e, vic); + NFAVertex u = vmap[u_old]; + NFAVertex v = vmap[v_old]; bool uspecial = is_special(u, dest); bool vspecial = is_special(v, dest); @@ -304,14 +304,14 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { // We're in the common region if v's state ID is low enough, unless v // is a special (an accept), in which case we use u's state ID. - bool in_common_region = dest_info.get(v) < common_len; - if (vspecial && dest_info.get(u) < common_len) { + bool in_common_region = dest_info.get(v) < common_len; + if (vspecial && dest_info.get(u) < common_len) { in_common_region = true; } - DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n", - dest[u].index, dest_info.get(u), - dest[v].index, dest_info.get(v), + DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n", + dest[u].index, dest_info.get(u), + dest[v].index, dest_info.get(v), in_common_region ? " [common]" : ""); if (in_common_region) { @@ -319,7 +319,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. - assert(vic[e].tops == dest[edge(u, v, dest)].tops); + assert(vic[e].tops == dest[edge(u, v, dest)].tops); continue; } else { assert(is_any_accept(v, dest)); @@ -335,8 +335,8 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { add_edge(u, v, vic[e], dest); } - renumber_edges(dest); - renumber_vertices(dest); + renumber_edges(dest); + renumber_vertices(dest); } namespace { @@ -363,20 +363,20 @@ struct NfaMergeCandidateH { /** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ static -bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, - const ReportManager *rm, const CompileContext &cc) { - size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; - - combinedStateCount -= 2 * 2; /* discount accepts from both */ - - if (is_triggered(ha)) { - /* allow for a state for each top, ignore existing starts */ - combinedStateCount -= 2; /* for start, startDs */ - auto tops = getTops(ha); - insert(&tops, getTops(hb)); - combinedStateCount += tops.size(); - } - +bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, + const ReportManager *rm, const CompileContext &cc) { + size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; + + combinedStateCount -= 2 * 2; /* discount accepts from both */ + + if (is_triggered(ha)) { + /* allow for a state for each top, ignore existing starts */ + combinedStateCount -= 2; /* for start, startDs */ + auto tops = getTops(ha); + insert(&tops, getTops(hb)); + combinedStateCount += tops.size(); + } + if (combinedStateCount > FAST_STATE_LIMIT) { // More complex implementability check. NGHolder h_temp; @@ -418,13 +418,13 @@ void buildNfaMergeQueue(const vector<NGHolder *> &cluster, // First, make sure all holders have numbered states and collect their // counts. - vector<ranking_info> states_map; - states_map.reserve(cs); + vector<ranking_info> states_map; + states_map.reserve(cs); for (size_t i = 0; i < cs; i++) { assert(cluster[i]); - assert(states_map.size() == i); - const NGHolder &g = *(cluster[i]); - states_map.emplace_back(g); + assert(states_map.size() == i); + const NGHolder &g = *(cluster[i]); + states_map.emplace_back(g); } vector<u16> seen_cpl(cs * cs, 0); @@ -482,46 +482,46 @@ void buildNfaMergeQueue(const vector<NGHolder *> &cluster, } } -/** - * True if the graphs have mergeable starts. - * - * Nowadays, this means that any vacuous edges must have the same tops. In - * addition, mixed-accept cases need to have matching reports. - */ +/** + * True if the graphs have mergeable starts. + * + * Nowadays, this means that any vacuous edges must have the same tops. In + * addition, mixed-accept cases need to have matching reports. + */ static bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { - if (!isVacuous(h1) || !isVacuous(h2)) { - return true; + if (!isVacuous(h1) || !isVacuous(h2)) { + return true; + } + + // Vacuous edges from startDs should not occur: we have better ways to + // implement true dot-star relationships. Just in case they do, ban them + // from being merged unless they have identical reports. + if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) { + assert(0); + return false; } - - // Vacuous edges from startDs should not occur: we have better ways to - // implement true dot-star relationships. Just in case they do, ban them - // from being merged unless they have identical reports. - if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) { - assert(0); - return false; + + /* TODO: relax top checks if reports match */ + + // If both graphs have edge (start, accept), the tops must match. + NFAEdge e1_accept = edge(h1.start, h1.accept, h1); + NFAEdge e2_accept = edge(h2.start, h2.accept, h2); + if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { + return false; } - /* TODO: relax top checks if reports match */ - - // If both graphs have edge (start, accept), the tops must match. - NFAEdge e1_accept = edge(h1.start, h1.accept, h1); - NFAEdge e2_accept = edge(h2.start, h2.accept, h2); - if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { - return false; - } - - // If both graphs have edge (start, acceptEod), the tops must match. - NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); - NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); - if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { - return false; - } - - // If one graph has an edge to accept and the other has an edge to - // acceptEod, the reports must match for the merge to be safe. - if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) { - if (h1[h1.start].reports != h2[h2.start].reports) { + // If both graphs have edge (start, acceptEod), the tops must match. + NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); + NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); + if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { + return false; + } + + // If one graph has an edge to accept and the other has an edge to + // acceptEod, the reports must match for the merge to be safe. + if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) { + if (h1[h1.start].reports != h2[h2.start].reports) { return false; } } @@ -530,19 +530,19 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { } /** Merge graph \p ga into graph \p gb. Returns false on failure. */ -bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc) { assert(ga.kind == gb.kind); - // Vacuous NFAs require special checks on their starts to ensure that tops - // match, and that reports match for mixed-accept cases. + // Vacuous NFAs require special checks on their starts to ensure that tops + // match, and that reports match for mixed-accept cases. if (!mergeableStarts(ga, gb)) { DEBUG_PRINTF("starts aren't mergeable\n"); return false; } - u32 cpl = commonPrefixLength(ga, gb); - if (!shouldMerge(gb, ga, cpl, rm, cc)) { + u32 cpl = commonPrefixLength(ga, gb); + if (!shouldMerge(gb, ga, cpl, rm, cc)) { return false; } @@ -551,13 +551,13 @@ bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, return true; } -map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster, - const ReportManager *rm, - const CompileContext &cc) { - map<NGHolder *, NGHolder *> merged; - +map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster, + const ReportManager *rm, + const CompileContext &cc) { + map<NGHolder *, NGHolder *> merged; + if (cluster.size() < 2) { - return merged; + return merged; } DEBUG_PRINTF("new cluster, size %zu\n", cluster.size()); @@ -589,8 +589,8 @@ map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster, } } } - - return merged; + + return merged; } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h index b0f42670a3..9336a78108 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,16 +49,16 @@ class ReportManager; * The CPL is calculated based the topological ordering given by the state * indices for each graph. */ -u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); /** * \brief Merge the group of graphs in \p cluster where possible. * - * The (from, to) mapping of merged graphs is returned. + * The (from, to) mapping of merged graphs is returned. */ -std::map<NGHolder *, NGHolder *> -mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm, - const CompileContext &cc); +std::map<NGHolder *, NGHolder *> +mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm, + const CompileContext &cc); /** * \brief Merge graph \p ga into graph \p gb. @@ -66,7 +66,7 @@ mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm, * Returns false on failure. On success, \p gb is reduced via \ref * reduceImplementableGraph and renumbered. */ -bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp index 89500fe39e..fa062a05da 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include "ng.h" #include "ng_prune.h" #include "ng_util.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "util/graph_range.h" #include "util/unicode_def.h" @@ -46,14 +46,14 @@ using namespace std; namespace ue2 { static -void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { - if (in_degree(v, g) != 1) { +void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { + if (in_degree(v, g) != 1) { DEBUG_PRINTF("unexpected pred\n"); assert(0); /* should be true due to the early stage of this analysis */ return; } - CharReach &cr = g[v].char_reach; + CharReach &cr = g[v].char_reach; if (pred_char == 0xe0) { assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); if (cr == CharReach(0xa0, 0xbf)) { @@ -80,8 +80,8 @@ void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { - if (!expr.utf8) { +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.utf8) { return; } @@ -89,12 +89,12 @@ void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { const CharReach f0(0xf0); const CharReach f4(0xf4); - for (auto v : vertices_range(g)) { - const CharReach &cr = g[v].char_reach; + for (auto v : vertices_range(g)) { + const CharReach &cr = g[v].char_reach; if (cr == e0 || cr == f0 || cr == f4) { u8 pred_char = cr.find_first(); - for (auto t : adjacent_vertices_range(v, g)) { - allowIllegal(g, t, pred_char); + for (auto t : adjacent_vertices_range(v, g)) { + allowIllegal(g, t, pred_char); } } } @@ -177,7 +177,7 @@ void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) { continue; } - DEBUG_PRINTF("%zu is a seed\n", h[v].index); + DEBUG_PRINTF("%zu is a seed\n", h[v].index); seeds->push_back(v); already_seeds.insert(v); } @@ -185,12 +185,12 @@ void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) { static bool expandCyclic(NGHolder &h, NFAVertex v) { - DEBUG_PRINTF("inspecting %zu\n", h[v].index); + DEBUG_PRINTF("inspecting %zu\n", h[v].index); bool changes = false; - auto v_preds = preds(v, h); - auto v_succs = succs(v, h); - + auto v_preds = preds(v, h); + auto v_succs = succs(v, h); + set<NFAVertex> start_siblings; set<NFAVertex> end_siblings; @@ -199,10 +199,10 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We need to find start vertices which have all of our preds. * As we have a self loop, it must be one of our succs. */ for (auto a : adjacent_vertices_range(v, h)) { - auto a_preds = preds(a, h); + auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { - DEBUG_PRINTF("%zu is a start v\n", h[a].index); + DEBUG_PRINTF("%zu is a start v\n", h[a].index); start_siblings.insert(a); } } @@ -210,10 +210,10 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We also need to find full cont vertices which have all our own succs; * As we have a self loop, it must be one of our preds. */ for (auto a : inv_adjacent_vertices_range(v, h)) { - auto a_succs = succs(a, h); + auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { - DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); + DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); end_siblings.insert(a); } } @@ -227,7 +227,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (cr.isSubsetOf(UTF_TWO_START_CR)) { if (end_siblings.find(*adjacent_vertices(s, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%zu is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { @@ -239,7 +239,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { } if (end_siblings.find(*adjacent_vertices(m, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%zu is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { @@ -259,11 +259,11 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (end_siblings.find(*adjacent_vertices(m2, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%zu is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else { - DEBUG_PRINTF("%zu is bad\n", h[s].index); + DEBUG_PRINTF("%zu is bad\n", h[s].index); continue; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h index 7c4288336f..1a9a8572f9 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ namespace ue2 { -class ExpressionInfo; +class ExpressionInfo; class NGHolder; /** \brief Relax forbidden UTF-8 sequences. @@ -44,7 +44,7 @@ class NGHolder; * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); /** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex * where possible, based on the assumption that we will always be matching diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp index cb2b710358..c3b9603b16 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,54 +33,54 @@ #include "grey.h" #include "ng_dump.h" -#include "ng_prune.h" +#include "ng_prune.h" #include "ue2common.h" #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. #include "parser/position.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/ue2string.h" #include "util/report_manager.h" -#include <limits> +#include <limits> #include <map> #include <set> -#include <unordered_map> -#include <unordered_set> - +#include <unordered_map> +#include <unordered_set> + #include <boost/graph/filtered_graph.hpp> #include <boost/graph/topological_sort.hpp> #include <boost/range/adaptor/map.hpp> using namespace std; -using boost::make_filtered_graph; +using boost::make_filtered_graph; using boost::make_assoc_property_map; namespace ue2 { NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { - assert(a != NGHolder::null_vertex()); + assert(a != NGHolder::null_vertex()); - NGHolder::out_edge_iterator ii, iie; + NGHolder::out_edge_iterator ii, iie; tie(ii, iie) = out_edges(a, g); if (ii == iie) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } NFAVertex b = target(*ii, g); if (a == b) { ++ii; if (ii == iie) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } b = target(*ii, g); if (++ii != iie) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } assert(a != b); @@ -88,23 +88,23 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { } NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) { - assert(a != NGHolder::null_vertex()); + assert(a != NGHolder::null_vertex()); u32 idegree = in_degree(a, g); if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } - NGHolder::in_edge_iterator ii, iie; + NGHolder::in_edge_iterator ii, iie; tie(ii, iie) = in_edges(a, g); if (ii == iie) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } NFAVertex b = source(*ii, g); if (a == b) { ++ii; if (ii == iie) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } b = source(*ii, g); @@ -129,7 +129,7 @@ void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { if (edge(dest, t, g).second) { continue; } - NFAEdge clone = add_edge(dest, t, g); + NFAEdge clone = add_edge(dest, t, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -140,7 +140,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { for (const auto &e : in_edges_range(s, g)) { NFAVertex ss = source(e, g); assert(!edge(ss, dest, g).second); - NFAEdge clone = add_edge(ss, dest, g); + NFAEdge clone = add_edge(ss, dest, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -148,21 +148,21 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { } bool onlyOneTop(const NGHolder &g) { - return getTops(g).size() == 1; + return getTops(g).size() == 1; } namespace { struct CycleFound {}; struct DetectCycles : public boost::default_dfs_visitor { explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} - void back_edge(const NFAEdge &e, const NGHolder &g) const { + void back_edge(const NFAEdge &e, const NGHolder &g) const { NFAVertex u = source(e, g), v = target(e, g); // We ignore the startDs self-loop. if (u == startDs && v == startDs) { return; } // Any other back-edge indicates a cycle. - DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index); + DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index); throw CycleFound(); } private: @@ -186,19 +186,19 @@ bool isAnchored(const NGHolder &g) { return true; } -bool isFloating(const NGHolder &g) { - for (auto v : adjacent_vertices_range(g.start, g)) { - if (v != g.startDs && !edge(g.startDs, v, g).second) { - return false; - } - } - return true; -} - +bool isFloating(const NGHolder &g) { + for (auto v : adjacent_vertices_range(g.start, g)) { + if (v != g.startDs && !edge(g.startDs, v, g).second) { + return false; + } + } + return true; +} + bool isAcyclic(const NGHolder &g) { try { - boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g), - g.start); + boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g), + g.start); } catch (const CycleFound &) { return false; } @@ -213,9 +213,9 @@ bool hasReachableCycle(const NGHolder &g, NFAVertex src) { try { // Use depth_first_visit, rather than depth_first_search, so that we // only search from src. - boost::depth_first_visit(g, src, DetectCycles(g), - make_small_color_map(g)); - } catch (const CycleFound &) { + boost::depth_first_visit(g, src, DetectCycles(g), + make_small_color_map(g)); + } catch (const CycleFound &) { return true; } @@ -226,8 +226,8 @@ bool hasBigCycles(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); set<NFAEdge> dead; BackEdges<set<NFAEdge>> backEdgeVisitor(dead); - boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g), - g.start); + boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g), + g.start); for (const auto &e : dead) { if (source(e, g) != target(e, g)) { @@ -238,15 +238,15 @@ bool hasBigCycles(const NGHolder &g) { return false; } -bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) { - return any_of_in(vertices_range(g), [&](NFAVertex v) { - return !is_special(v, g) && g[v].char_reach.count() < max_reach_count; - }); +bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) { + return any_of_in(vertices_range(g), [&](NFAVertex v) { + return !is_special(v, g) && g[v].char_reach.count() < max_reach_count; + }); } bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); - if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { + if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { DEBUG_PRINTF("no paths into accept\n"); return true; } @@ -255,7 +255,7 @@ bool can_never_match(const NGHolder &g) { } bool can_match_at_eod(const NGHolder &h) { - if (in_degree(h.acceptEod, h) > 1) { + if (in_degree(h.acceptEod, h) > 1) { DEBUG_PRINTF("more than one edge to acceptEod\n"); return true; } @@ -272,90 +272,90 @@ bool can_match_at_eod(const NGHolder &h) { } bool can_only_match_at_eod(const NGHolder &g) { - NGHolder::in_edge_iterator ie, ee; + NGHolder::in_edge_iterator ie, ee; tie(ie, ee) = in_edges(g.accept, g); return ie == ee; } bool matches_everywhere(const NGHolder &h) { - NFAEdge e = edge(h.startDs, h.accept, h); + NFAEdge e = edge(h.startDs, h.accept, h); - return e && !h[e].assert_flags; + return e && !h[e].assert_flags; } bool is_virtual_start(NFAVertex v, const NGHolder &g) { return g[v].assert_flags & POS_FLAG_VIRTUAL_START; } -static -void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) { - // Start is last element of reverse topo ordering. - auto it = find(topoOrder.begin(), topoOrder.end(), g.start); - if (it != topoOrder.end() - 1) { - DEBUG_PRINTF("repositioning start\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.end(), g.start); - } - - // StartDs is second-to-last element of reverse topo ordering. - it = find(topoOrder.begin(), topoOrder.end(), g.startDs); - if (it != topoOrder.end() - 2) { - DEBUG_PRINTF("repositioning start ds\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.end() - 1, g.startDs); - } - - // AcceptEOD is first element of reverse topo ordering. - it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); - if (it != topoOrder.begin()) { - DEBUG_PRINTF("repositioning accept\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - topoOrder.insert(topoOrder.begin(), g.acceptEod); - } - - // Accept is second element of reverse topo ordering, if it's connected. - it = find(topoOrder.begin(), topoOrder.end(), g.accept); - if (it != topoOrder.begin() + 1) { - DEBUG_PRINTF("repositioning accept\n"); - assert(it != topoOrder.end()); - topoOrder.erase(it); - if (in_degree(g.accept, g) != 0) { - topoOrder.insert(topoOrder.begin() + 1, g.accept); - } - } -} - +static +void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) { + // Start is last element of reverse topo ordering. + auto it = find(topoOrder.begin(), topoOrder.end(), g.start); + if (it != topoOrder.end() - 1) { + DEBUG_PRINTF("repositioning start\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end(), g.start); + } + + // StartDs is second-to-last element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.startDs); + if (it != topoOrder.end() - 2) { + DEBUG_PRINTF("repositioning start ds\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end() - 1, g.startDs); + } + + // AcceptEOD is first element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); + if (it != topoOrder.begin()) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.begin(), g.acceptEod); + } + + // Accept is second element of reverse topo ordering, if it's connected. + it = find(topoOrder.begin(), topoOrder.end(), g.accept); + if (it != topoOrder.begin() + 1) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + if (in_degree(g.accept, g) != 0) { + topoOrder.insert(topoOrder.begin() + 1, g.accept); + } + } +} + vector<NFAVertex> getTopoOrdering(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); // Use the same colour map for both DFS and topological_sort below: avoids // having to reallocate it, etc. - auto colors = make_small_color_map(g); + auto colors = make_small_color_map(g); - using EdgeSet = unordered_set<NFAEdge>; + using EdgeSet = unordered_set<NFAEdge>; EdgeSet backEdges; BackEdges<EdgeSet> be(backEdges); - depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors)); + depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors)); - auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); vector<NFAVertex> ordering; - ordering.reserve(num_vertices(g)); - topological_sort(acyclic_g, back_inserter(ordering), color_map(colors)); - - reorderSpecials(g, ordering); + ordering.reserve(num_vertices(g)); + topological_sort(acyclic_g, back_inserter(ordering), color_map(colors)); + reorderSpecials(g, ordering); + return ordering; } static void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, - decltype(make_small_color_map(NGHolder())) &colors) { + decltype(make_small_color_map(NGHolder())) &colors) { set<NFAVertex> s; insert(&s, adjacent_vertices(u, g)); @@ -370,10 +370,10 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, } } - auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); + auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); - depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()), - colors); + depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()), + colors); } bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, @@ -386,18 +386,18 @@ bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, return cache.cache[key]; } - auto colors = make_small_color_map(g); - mustBeSetBefore_int(u, g, colors); + auto colors = make_small_color_map(g); + mustBeSetBefore_int(u, g, colors); for (auto vi : vertices_range(g)) { - auto key2 = make_pair(g[u].index, g[vi].index); - DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); + auto key2 = make_pair(g[u].index, g[vi].index); + DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); assert(!contains(cache.cache, key2)); - bool value = get(colors, vi) == small_color::white; + bool value = get(colors, vi) == small_color::white; cache.cache[key2] = value; assert(contains(cache.cache, key2)); } - DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second, + DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second, cache.cache.size()); return cache.cache[key]; } @@ -430,27 +430,27 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) { } } -flat_set<u32> getTops(const NGHolder &h) { - flat_set<u32> tops; - for (const auto &e : out_edges_range(h.start, h)) { - insert(&tops, h[e].tops); - } - return tops; -} - -void setTops(NGHolder &h, u32 top) { +flat_set<u32> getTops(const NGHolder &h) { + flat_set<u32> tops; for (const auto &e : out_edges_range(h.start, h)) { - assert(h[e].tops.empty()); - if (target(e, h) == h.startDs) { + insert(&tops, h[e].tops); + } + return tops; +} + +void setTops(NGHolder &h, u32 top) { + for (const auto &e : out_edges_range(h.start, h)) { + assert(h[e].tops.empty()); + if (target(e, h) == h.startDs) { continue; } - h[e].tops.insert(top); + h[e].tops.insert(top); } } void clearReports(NGHolder &g) { DEBUG_PRINTF("clearing reports without an accept edge\n"); - unordered_set<NFAVertex> allow; + unordered_set<NFAVertex> allow; insert(&allow, inv_adjacent_vertices(g.accept, g)); insert(&allow, inv_adjacent_vertices(g.acceptEod, g)); allow.erase(g.accept); // due to stylised edge. @@ -474,7 +474,7 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) { static void fillHolderOutEdges(NGHolder &out, const NGHolder &in, - const unordered_map<NFAVertex, NFAVertex> &v_map, + const unordered_map<NFAVertex, NFAVertex> &v_map, NFAVertex u) { NFAVertex u_new = v_map.at(u); @@ -496,9 +496,9 @@ void fillHolderOutEdges(NGHolder &out, const NGHolder &in, } void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv, - unordered_map<NFAVertex, NFAVertex> *v_map_out) { + unordered_map<NFAVertex, NFAVertex> *v_map_out) { NGHolder &out = *outp; - unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out; + unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out; out.kind = in.kind; @@ -525,13 +525,13 @@ void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv, fillHolderOutEdges(out, in, v_map, u); } - renumber_edges(out); - renumber_vertices(out); + renumber_edges(out); + renumber_vertices(out); } void cloneHolder(NGHolder &out, const NGHolder &in) { assert(hasCorrectlyNumberedVertices(in)); - assert(hasCorrectlyNumberedVertices(out)); + assert(hasCorrectlyNumberedVertices(out)); out.kind = in.kind; // Note: depending on the state of the input graph, some stylized edges @@ -541,7 +541,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { /* remove the existing special edges */ clear_vertex(out.startDs, out); clear_vertex(out.accept, out); - renumber_edges(out); + renumber_edges(out); vector<NFAVertex> out_mapping(num_vertices(in)); out_mapping[NODE_START] = out.start; @@ -569,18 +569,18 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { NFAVertex s = out_mapping[si]; NFAVertex t = out_mapping[ti]; - NFAEdge e2 = add_edge(s, t, out); + NFAEdge e2 = add_edge(s, t, out); out[e2] = in[e]; } // Safety checks. - assert(num_vertices(in) == num_vertices(out)); - assert(num_edges(in) == num_edges(out)); + assert(num_vertices(in) == num_vertices(out)); + assert(num_edges(in) == num_edges(out)); assert(hasCorrectlyNumberedVertices(out)); } void cloneHolder(NGHolder &out, const NGHolder &in, - unordered_map<NFAVertex, NFAVertex> *mapping) { + unordered_map<NFAVertex, NFAVertex> *mapping) { cloneHolder(out, in); vector<NFAVertex> out_verts(num_vertices(in)); for (auto v : vertices_range(out)) { @@ -601,191 +601,191 @@ unique_ptr<NGHolder> cloneHolder(const NGHolder &in) { return h; } -void reverseHolder(const NGHolder &g_in, NGHolder &g) { - // Make the BGL do the grunt work. - unordered_map<NFAVertex, NFAVertex> vertexMap; - boost::transpose_graph(g_in, g, - orig_to_copy(boost::make_assoc_property_map(vertexMap))); - - // The transpose_graph operation will have created extra copies of our - // specials. We have to rewire their neighbours to the 'real' specials and - // delete them. - NFAVertex start = vertexMap[g_in.acceptEod]; - NFAVertex startDs = vertexMap[g_in.accept]; - NFAVertex accept = vertexMap[g_in.startDs]; - NFAVertex acceptEod = vertexMap[g_in.start]; - - // Successors of starts. - for (const auto &e : out_edges_range(start, g)) { - NFAVertex v = target(e, g); - add_edge(g.start, v, g[e], g); - } - for (const auto &e : out_edges_range(startDs, g)) { - NFAVertex v = target(e, g); - add_edge(g.startDs, v, g[e], g); - } - - // Predecessors of accepts. - for (const auto &e : in_edges_range(accept, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.accept, g[e], g); - } - for (const auto &e : in_edges_range(acceptEod, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.acceptEod, g[e], g); - } - - // Remove our impostors. - clear_vertex(start, g); - remove_vertex(start, g); - clear_vertex(startDs, g); - remove_vertex(startDs, g); - clear_vertex(accept, g); - remove_vertex(accept, g); - clear_vertex(acceptEod, g); - remove_vertex(acceptEod, g); - - // Renumber so that g's properties (number of vertices, edges) are - // accurate. - renumber_vertices(g); - renumber_edges(g); - - assert(num_vertices(g) == num_vertices(g_in)); - assert(num_edges(g) == num_edges(g_in)); -} - -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok) { - assert(isCorrectlyTopped(g)); - if (max_delay == numeric_limits<u32>::max()) { - max_delay--; - } - - DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); - set<NFAVertex> curr, next; - curr.insert(g.accept); - - auto it = lit.rbegin(); - for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - if (isSubsetOf(*it, cr)) { - next.insert(u); - } else { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } - } - } - - curr.swap(next); - } - bail: - if (curr.empty()) { - /* This can happen when we have an edge representing a cross from two - * sides of an alternation. This whole edge needs to be marked as - * dead */ - assert(0); /* should have been picked up by can match */ - return numeric_limits<u32>::max(); - } - - u32 delay = distance(lit.rbegin(), it); - assert(delay <= max_delay); - assert(delay <= lit.length()); - DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - - set<NFAVertex> pred; - for (auto v : curr) { - insert(&pred, inv_adjacent_vertices_range(v, g)); - } - - clear_in_edges(g.accept, g); - clearReports(g); - - for (auto v : pred) { - NFAEdge e = add_edge(v, g.accept, g); - g[v].reports.insert(0); - if (is_triggered(g) && v == g.start) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - pruneUseless(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); - - DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); - return delay; -} - +void reverseHolder(const NGHolder &g_in, NGHolder &g) { + // Make the BGL do the grunt work. + unordered_map<NFAVertex, NFAVertex> vertexMap; + boost::transpose_graph(g_in, g, + orig_to_copy(boost::make_assoc_property_map(vertexMap))); + + // The transpose_graph operation will have created extra copies of our + // specials. We have to rewire their neighbours to the 'real' specials and + // delete them. + NFAVertex start = vertexMap[g_in.acceptEod]; + NFAVertex startDs = vertexMap[g_in.accept]; + NFAVertex accept = vertexMap[g_in.startDs]; + NFAVertex acceptEod = vertexMap[g_in.start]; + + // Successors of starts. + for (const auto &e : out_edges_range(start, g)) { + NFAVertex v = target(e, g); + add_edge(g.start, v, g[e], g); + } + for (const auto &e : out_edges_range(startDs, g)) { + NFAVertex v = target(e, g); + add_edge(g.startDs, v, g[e], g); + } + + // Predecessors of accepts. + for (const auto &e : in_edges_range(accept, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.accept, g[e], g); + } + for (const auto &e : in_edges_range(acceptEod, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.acceptEod, g[e], g); + } + + // Remove our impostors. + clear_vertex(start, g); + remove_vertex(start, g); + clear_vertex(startDs, g); + remove_vertex(startDs, g); + clear_vertex(accept, g); + remove_vertex(accept, g); + clear_vertex(acceptEod, g); + remove_vertex(acceptEod, g); + + // Renumber so that g's properties (number of vertices, edges) are + // accurate. + renumber_vertices(g); + renumber_edges(g); + + assert(num_vertices(g) == num_vertices(g_in)); + assert(num_edges(g) == num_edges(g_in)); +} + +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok) { + assert(isCorrectlyTopped(g)); + if (max_delay == numeric_limits<u32>::max()) { + max_delay--; + } + + DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); + set<NFAVertex> curr, next; + curr.insert(g.accept); + + auto it = lit.rbegin(); + for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { + next.clear(); + for (auto v : curr) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + if (overhang_ok) { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } else { + continue; /* it is not possible for a lhs literal to + * overhang the start */ + } + } + + const CharReach &cr = g[u].char_reach; + if (!overlaps(*it, cr)) { + DEBUG_PRINTF("skip\n"); + continue; + } + if (isSubsetOf(*it, cr)) { + next.insert(u); + } else { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } + } + } + + curr.swap(next); + } + bail: + if (curr.empty()) { + /* This can happen when we have an edge representing a cross from two + * sides of an alternation. This whole edge needs to be marked as + * dead */ + assert(0); /* should have been picked up by can match */ + return numeric_limits<u32>::max(); + } + + u32 delay = distance(lit.rbegin(), it); + assert(delay <= max_delay); + assert(delay <= lit.length()); + DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); + + set<NFAVertex> pred; + for (auto v : curr) { + insert(&pred, inv_adjacent_vertices_range(v, g)); + } + + clear_in_edges(g.accept, g); + clearReports(g); + + for (auto v : pred) { + NFAEdge e = add_edge(v, g.accept, g); + g[v].reports.insert(0); + if (is_triggered(g) && v == g.start) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + pruneUseless(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); + + DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); + return delay; +} + #ifndef NDEBUG - + bool allMatchStatesHaveReports(const NGHolder &g) { - unordered_set<NFAVertex> reporters; + unordered_set<NFAVertex> reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } - reporters.insert(v); + reporters.insert(v); } - + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; // stylised edge } if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } - reporters.insert(v); + reporters.insert(v); } for (auto v : vertices_range(g)) { - if (!contains(reporters, v) && !g[v].reports.empty()) { - DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n", - g[v].index); - return false; + if (!contains(reporters, v) && !g[v].reports.empty()) { + DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n", + g[v].index); + return false; } } - - return true; + + return true; } -bool isCorrectlyTopped(const NGHolder &g) { - if (is_triggered(g)) { - for (const auto &e : out_edges_range(g.start, g)) { - if (g[e].tops.empty() != (target(e, g) == g.startDs)) { - return false; - } - } - } else { - for (const auto &e : out_edges_range(g.start, g)) { - if (!g[e].tops.empty()) { - return false; - } +bool isCorrectlyTopped(const NGHolder &g) { + if (is_triggered(g)) { + for (const auto &e : out_edges_range(g.start, g)) { + if (g[e].tops.empty() != (target(e, g) == g.startDs)) { + return false; + } } + } else { + for (const auto &e : out_edges_range(g.start, g)) { + if (!g[e].tops.empty()) { + return false; + } + } } - - return true; + + return true; } - + #endif // NDEBUG } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_util.h index a2d0d9b7d6..cbd5760df4 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_util.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,47 +32,47 @@ #ifndef NG_UTIL_H #define NG_UTIL_H -#include "ng_depth.h" +#include "ng_depth.h" #include "ng_holder.h" #include "ue2common.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" -#include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor - -#include <algorithm> -#include <map> -#include <unordered_map> -#include <vector> - +#include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor + +#include <algorithm> +#include <map> +#include <unordered_map> +#include <vector> + namespace ue2 { struct Grey; struct ue2_literal; class ReportManager; -template<class VertexDepth> -depth maxDistFromInit(const VertexDepth &vd) { - if (vd.fromStart.max.is_unreachable()) { - return vd.fromStartDotStar.max; - } else if (vd.fromStartDotStar.max.is_unreachable()) { - return vd.fromStart.max; - } else { - return std::max(vd.fromStartDotStar.max, vd.fromStart.max); - } -} - -template<class VertexDepth> -depth maxDistFromStartOfData(const VertexDepth &vd) { - if (vd.fromStartDotStar.max.is_reachable()) { - /* the irrepressible nature of floating literals cannot be contained */ - return depth::infinity(); - } else { - return vd.fromStart.max; - } -} - +template<class VertexDepth> +depth maxDistFromInit(const VertexDepth &vd) { + if (vd.fromStart.max.is_unreachable()) { + return vd.fromStartDotStar.max; + } else if (vd.fromStartDotStar.max.is_unreachable()) { + return vd.fromStart.max; + } else { + return std::max(vd.fromStartDotStar.max, vd.fromStart.max); + } +} + +template<class VertexDepth> +depth maxDistFromStartOfData(const VertexDepth &vd) { + if (vd.fromStartDotStar.max.is_reachable()) { + /* the irrepressible nature of floating literals cannot be contained */ + return depth::infinity(); + } else { + return vd.fromStart.max; + } +} + /** True if the given vertex is a dot (reachable on any character). */ template<class GraphT> static really_inline @@ -84,81 +84,81 @@ bool is_dot(NFAVertex v, const GraphT &g) { template<class U> static really_inline void succ(const NGHolder &g, NFAVertex v, U *s) { - auto rv = adjacent_vertices(v, g); - s->insert(rv.first, rv.second); -} - -template<class ContTemp = flat_set<NFAVertex>> -ContTemp succs(NFAVertex u, const NGHolder &g) { - ContTemp rv; - succ(g, u, &rv); - return rv; + auto rv = adjacent_vertices(v, g); + s->insert(rv.first, rv.second); } +template<class ContTemp = flat_set<NFAVertex>> +ContTemp succs(NFAVertex u, const NGHolder &g) { + ContTemp rv; + succ(g, u, &rv); + return rv; +} + /** adds predecessors of v to s */ template<class U> static really_inline void pred(const NGHolder &g, NFAVertex v, U *p) { - auto rv = inv_adjacent_vertices(v, g); - p->insert(rv.first, rv.second); -} - -template<class ContTemp = flat_set<NFAVertex>> -ContTemp preds(NFAVertex u, const NGHolder &g) { - ContTemp rv; - pred(g, u, &rv); - return rv; + auto rv = inv_adjacent_vertices(v, g); + p->insert(rv.first, rv.second); } +template<class ContTemp = flat_set<NFAVertex>> +ContTemp preds(NFAVertex u, const NGHolder &g) { + ContTemp rv; + pred(g, u, &rv); + return rv; +} + /** returns a vertex with an out edge from v and is not v. * v must have exactly one out-edge excluding self-loops. - * will return NGHolder::null_vertex() if the preconditions don't hold. + * will return NGHolder::null_vertex() if the preconditions don't hold. */ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); /** Like getSoleDestVertex but for in-edges */ NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); -/** \brief edge filtered graph. - * - * This will give you a view over the graph that has none of the edges from - * the provided set included. - * - * If this is provided with the back edges of the graph, this will result in an - * acyclic subgraph view. This is useful for topological_sort and other - * algorithms that require a DAG. - */ -template<typename EdgeSet> -struct bad_edge_filter { - bad_edge_filter() {} - explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {} - bool operator()(const typename EdgeSet::value_type &e) const { - return !contains(*bad_edges, e); /* keep edges not in the bad set */ - } - const EdgeSet *bad_edges = nullptr; -}; - -template<typename EdgeSet> -bad_edge_filter<EdgeSet> make_bad_edge_filter(const EdgeSet *e) { - return bad_edge_filter<EdgeSet>(e); -} - -/** \brief vertex graph filter. */ -template<typename VertexSet> -struct bad_vertex_filter { - bad_vertex_filter() = default; - explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {} - bool operator()(const typename VertexSet::value_type &v) const { - return !contains(*bad_vertices, v); /* keep vertices not in bad set */ - } - const VertexSet *bad_vertices = nullptr; -}; - -template<typename VertexSet> -bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) { - return bad_vertex_filter<VertexSet>(v); -} - +/** \brief edge filtered graph. + * + * This will give you a view over the graph that has none of the edges from + * the provided set included. + * + * If this is provided with the back edges of the graph, this will result in an + * acyclic subgraph view. This is useful for topological_sort and other + * algorithms that require a DAG. + */ +template<typename EdgeSet> +struct bad_edge_filter { + bad_edge_filter() {} + explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {} + bool operator()(const typename EdgeSet::value_type &e) const { + return !contains(*bad_edges, e); /* keep edges not in the bad set */ + } + const EdgeSet *bad_edges = nullptr; +}; + +template<typename EdgeSet> +bad_edge_filter<EdgeSet> make_bad_edge_filter(const EdgeSet *e) { + return bad_edge_filter<EdgeSet>(e); +} + +/** \brief vertex graph filter. */ +template<typename VertexSet> +struct bad_vertex_filter { + bad_vertex_filter() = default; + explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {} + bool operator()(const typename VertexSet::value_type &v) const { + return !contains(*bad_vertices, v); /* keep vertices not in bad set */ + } + const VertexSet *bad_vertices = nullptr; +}; + +template<typename VertexSet> +bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) { + return bad_vertex_filter<VertexSet>(v); +} + /** Visitor that records back edges */ template <typename BackEdgeSet> class BackEdges : public boost::default_dfs_visitor { @@ -175,7 +175,7 @@ public: * NODE_START_DOTSTAR). */ template <typename GraphT> static really_inline -bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { +bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_START || i == NODE_START_DOTSTAR; } @@ -183,34 +183,34 @@ bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { bool is_virtual_start(NFAVertex v, const NGHolder &g); template <typename GraphT> -bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) { +bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD; } /** returns true iff v has an edge to accept or acceptEod */ template <typename GraphT> -bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) { +bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) { return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second; } /** Generate a reverse topological ordering for a back-edge filtered version of - * our graph (as it must be a DAG and correctly numbered). - * - * Note: we ensure that we produce a topo ordering that begins with acceptEod - * and accept (if present) and ends with startDs followed by start. - */ + * our graph (as it must be a DAG and correctly numbered). + * + * Note: we ensure that we produce a topo ordering that begins with acceptEod + * and accept (if present) and ends with startDs followed by start. + */ std::vector<NFAVertex> getTopoOrdering(const NGHolder &g); bool onlyOneTop(const NGHolder &g); -/** Return the set of the tops on the given graph. */ +/** Return the set of the tops on the given graph. */ flat_set<u32> getTops(const NGHolder &h); -/** Initialise the tops on h to the provide top. Assumes that h is triggered and - * no tops have been set on h. */ -void setTops(NGHolder &h, u32 top = DEFAULT_TOP); - +/** Initialise the tops on h to the provide top. Assumes that h is triggered and + * no tops have been set on h. */ +void setTops(NGHolder &h, u32 top = DEFAULT_TOP); + /** adds a vertex to g with all the same vertex properties as \p v (aside from * index) */ NFAVertex clone_vertex(NGHolder &g, NFAVertex v); @@ -238,10 +238,10 @@ bool isVacuous(const NGHolder &h); * proper successors). */ bool isAnchored(const NGHolder &h); -/** \brief True if the graph contains no anchored vertices (start has no - * successors aside from startDs or vertices connected to startDs). */ -bool isFloating(const NGHolder &h); - +/** \brief True if the graph contains no anchored vertices (start has no + * successors aside from startDs or vertices connected to startDs). */ +bool isFloating(const NGHolder &h); + /** True if the graph contains no back-edges at all, other than the * startDs self-loop. */ bool isAcyclic(const NGHolder &g); @@ -252,12 +252,12 @@ bool hasReachableCycle(const NGHolder &g, NFAVertex src); /** True if g has any cycles which are not self-loops. */ bool hasBigCycles(const NGHolder &g); -/** - * \brief True if g has at least one non-special vertex with reach smaller than - * max_reach_count. The default of 200 is pretty conservative. - */ -bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200); - +/** + * \brief True if g has at least one non-special vertex with reach smaller than + * max_reach_count. The default of 200 is pretty conservative. + */ +bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200); + /** Returns the set of all vertices that appear in any of the graph's cycles. */ std::set<NFAVertex> findVerticesInCycles(const NGHolder &g); @@ -291,12 +291,12 @@ void appendLiteral(NGHolder &h, const ue2_literal &s); * \a in). A vertex mapping is returned in \a v_map_out. */ void fillHolder(NGHolder *outp, const NGHolder &in, const std::deque<NFAVertex> &vv, - std::unordered_map<NFAVertex, NFAVertex> *v_map_out); + std::unordered_map<NFAVertex, NFAVertex> *v_map_out); /** \brief Clone the graph in \a in into graph \a out, returning a vertex * mapping in \a v_map_out. */ void cloneHolder(NGHolder &out, const NGHolder &in, - std::unordered_map<NFAVertex, NFAVertex> *v_map_out); + std::unordered_map<NFAVertex, NFAVertex> *v_map_out); /** \brief Clone the graph in \a in into graph \a out. */ void cloneHolder(NGHolder &out, const NGHolder &in); @@ -312,33 +312,33 @@ void clearReports(NGHolder &g); * r_old. */ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g, NGHolder &out); - -/** \brief Returns the delay or ~0U if the graph cannot match with - * the trailing literal. */ -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok = true); - +/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to + * accepts. */ +void reverseHolder(const NGHolder &g, NGHolder &out); + +/** \brief Returns the delay or ~0U if the graph cannot match with + * the trailing literal. */ +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok = true); + #ifndef NDEBUG -// Assertions: only available in internal builds. - -/** - * Used in sanity-checking assertions: returns true if all vertices - * with edges to accept or acceptEod have at least one report ID. Additionally, - * checks that ONLY vertices with edges to accept or acceptEod has reports. - */ +// Assertions: only available in internal builds. + +/** + * Used in sanity-checking assertions: returns true if all vertices + * with edges to accept or acceptEod have at least one report ID. Additionally, + * checks that ONLY vertices with edges to accept or acceptEod has reports. + */ bool allMatchStatesHaveReports(const NGHolder &g); -/** - * Assertion: returns true if the graph is triggered and all edges out of start - * have tops OR if the graph is not-triggered and all edges out of start have no - * tops. - */ -bool isCorrectlyTopped(const NGHolder &g); -#endif // NDEBUG +/** + * Assertion: returns true if the graph is triggered and all edges out of start + * have tops OR if the graph is not-triggered and all edges out of start have no + * tops. + */ +bool isCorrectlyTopped(const NGHolder &g); +#endif // NDEBUG } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp index d1123dff49..05525ec06d 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,31 +34,31 @@ #include "grey.h" #include "ng.h" #include "ng_util.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" using namespace std; namespace ue2 { static -ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { - Report ir = rm.getBasicInternalReport(expr); +ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { + Report ir = rm.getBasicInternalReport(expr); // Apply any extended params. - if (expr.min_offset || expr.max_offset != MAX_OFFSET) { - ir.minOffset = expr.min_offset; - ir.maxOffset = expr.max_offset; + if (expr.min_offset || expr.max_offset != MAX_OFFSET) { + ir.minOffset = expr.min_offset; + ir.maxOffset = expr.max_offset; } - assert(!expr.min_length); // should be handled elsewhere. + assert(!expr.min_length); // should be handled elsewhere. return rm.getInternalId(ir); } static -void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr) { - const ReportID r = getInternalId(rm, expr); +void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + const ReportID r = getInternalId(rm, expr); boundary.report_at_0_eod.insert(r); boundary.report_at_0.insert(r); @@ -83,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, static void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGHolder &g, const ExpressionInfo &expr) { - boundary.report_at_0.insert(getInternalId(rm, expr)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0.insert(getInternalId(rm, expr)); remove_edge(g.start, g.accept, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -92,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGHolder &g, const ExpressionInfo &expr) { - boundary.report_at_eod.insert(getInternalId(rm, expr)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_eod.insert(getInternalId(rm, expr)); remove_edge(g.startDs, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -102,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGHolder &g, const ExpressionInfo &expr) { - boundary.report_at_0_eod.insert(getInternalId(rm, expr)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0_eod.insert(getInternalId(rm, expr)); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); } bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGHolder &g, const ExpressionInfo &expr) { + NGHolder &g, const ExpressionInfo &expr) { if (edge(g.startDs, g.accept, g).second) { // e.g. '.*'; match "between" every byte DEBUG_PRINTF("graph is firehose\n"); - makeFirehose(boundary, rm, g, expr); + makeFirehose(boundary, rm, g, expr); return true; } @@ -121,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, if (edge(g.start, g.accept, g).second) { DEBUG_PRINTF("creating anchored acceptor\n"); - makeAnchoredAcceptor(boundary, rm, g, expr); + makeAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.startDs, g.acceptEod, g).second) { DEBUG_PRINTF("creating end-anchored acceptor\n"); - makeEndAnchoredAcceptor(boundary, rm, g, expr); + makeEndAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.start, g.acceptEod, g).second) { DEBUG_PRINTF("creating nothing acceptor\n"); - makeNothingAcceptor(boundary, rm, g, expr); + makeNothingAcceptor(boundary, rm, g, expr); work_done = true; } diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h index c33cb312de..50590c0ded 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,13 +36,13 @@ namespace ue2 { struct BoundaryReports; -class ExpressionInfo; -class NGHolder; +class ExpressionInfo; +class NGHolder; class ReportManager; // Returns true if a "vacuous" reporter was created. bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGHolder &g, const ExpressionInfo &expr); + NGHolder &g, const ExpressionInfo &expr); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp index 685d452150..4eb4196da1 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp @@ -1,3068 +1,3068 @@ -/* +/* * Copyright (c) 2016-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "ng_violet.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_dominators.h" -#include "ng_dump.h" -#include "ng_equivalence.h" -#include "ng_holder.h" -#include "ng_is_equal.h" -#include "ng_literal_analysis.h" -#include "ng_limex.h" -#include "ng_mcclellan.h" -#include "ng_netflow.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_reports.h" -#include "ng_split.h" -#include "ng_util.h" -#include "ng_width.h" -#include "nfa/rdfa.h" -#include "rose/rose_build.h" -#include "rose/rose_build_util.h" -#include "rose/rose_in_dump.h" -#include "rose/rose_in_graph.h" -#include "rose/rose_in_util.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/flat_containers.h" -#include "util/graph.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "ng_violet.h" + +#include "grey.h" +#include "ng_depth.h" +#include "ng_dominators.h" +#include "ng_dump.h" +#include "ng_equivalence.h" +#include "ng_holder.h" +#include "ng_is_equal.h" +#include "ng_literal_analysis.h" +#include "ng_limex.h" +#include "ng_mcclellan.h" +#include "ng_netflow.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_region.h" +#include "ng_reports.h" +#include "ng_split.h" +#include "ng_util.h" +#include "ng_width.h" +#include "nfa/rdfa.h" +#include "rose/rose_build.h" +#include "rose/rose_build_util.h" +#include "rose/rose_in_dump.h" +#include "rose/rose_in_graph.h" +#include "rose/rose_in_util.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/flat_containers.h" +#include "util/graph.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/insertion_ordered.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/target_info.h" -#include "util/ue2string.h" - -#include <set> -#include <utility> -#include <vector> -#include <boost/dynamic_bitset.hpp> -#include <boost/range/adaptor/map.hpp> - -#define STAGE_DEBUG_PRINTF DEBUG_PRINTF - -using namespace std; -using boost::adaptors::map_values; - -namespace ue2 { - -/* createsAnchoredLHS() is conservative as the depths take into account - * back edges that come from beyond the split point and would be missing after - * the graph is split. */ -static -bool createsAnchoredLHS(const NGHolder &g, const vector<NFAVertex> &vv, - const vector<NFAVertexDepth> &depths, - const Grey &grey, depth max_depth = depth::infinity()) { - max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -/* createsTransientLHS() is conservative as the depths take into account - * back edges that come from beyond the split point and would be missing after - * the graph is split. */ -static -bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv, - const vector<NFAVertexDepth> &depths, - const Grey &grey) { - const depth max_depth(grey.maxHistoryAvailable); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromInit(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - +#include "util/insertion_ordered.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/target_info.h" +#include "util/ue2string.h" + +#include <set> +#include <utility> +#include <vector> +#include <boost/dynamic_bitset.hpp> +#include <boost/range/adaptor/map.hpp> + +#define STAGE_DEBUG_PRINTF DEBUG_PRINTF + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +/* createsAnchoredLHS() is conservative as the depths take into account + * back edges that come from beyond the split point and would be missing after + * the graph is split. */ +static +bool createsAnchoredLHS(const NGHolder &g, const vector<NFAVertex> &vv, + const vector<NFAVertexDepth> &depths, + const Grey &grey, depth max_depth = depth::infinity()) { + max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); + + for (auto v : vv) { + /* avoid issues of self loops blowing out depths: + * look at preds, add 1 */ + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + u32 idx = g[u].index; + assert(idx < depths.size()); + if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { + return false; + } + } + } + return true; +} + +/* createsTransientLHS() is conservative as the depths take into account + * back edges that come from beyond the split point and would be missing after + * the graph is split. */ +static +bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv, + const vector<NFAVertexDepth> &depths, + const Grey &grey) { + const depth max_depth(grey.maxHistoryAvailable); + + for (auto v : vv) { + /* avoid issues of self loops blowing out depths: + * look at preds, add 1 */ + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + u32 idx = g[u].index; + assert(idx < depths.size()); + if (maxDistFromInit(depths.at(idx)) >= max_depth) { + return false; + } + } + } + return true; +} + /** * Counts the number of vertices that are reachable from the set of sources * given. */ -static +static size_t count_reachable(const NGHolder &g, const vector<NFAVertex> &sources, small_color_map<decltype(get(vertex_index, g))> &color_map) { auto null_visitor = boost::make_dfs_visitor(boost::null_visitor()); color_map.fill(small_color::white); - + for (auto v : sources) { boost::depth_first_visit(g, v, null_visitor, color_map); } return color_map.count(small_color::black); -} - -static -size_t shorter_than(const set<ue2_literal> &s, size_t limit) { - return count_if(s.begin(), s.end(), - [&](const ue2_literal &a) { return a.length() < limit; }); -} - -static -u32 min_len(const set<ue2_literal> &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 min_period(const set<ue2_literal> &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)minStringPeriod(lit)); - } - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -namespace { -/** - * Information on a cut: vertices and literals. - */ -struct VertLitInfo { - VertLitInfo() {} - VertLitInfo(NFAVertex v, const set<ue2_literal> &litlit, bool c_anch, - bool c_tran = false) - : vv(vector<NFAVertex>(1, v)), lit(litlit), creates_anchored(c_anch), - creates_transient(c_tran) {} - VertLitInfo(const vector<NFAVertex> &vv_in, const set<ue2_literal> &lit_in, - bool c_anch) - : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} - vector<NFAVertex> vv; - set<ue2_literal> lit; - - bool creates_anchored = false; - bool creates_transient = false; - double split_ratio = 0; -}; - -#define LAST_CHANCE_STRONG_LEN 1 - -/** - * \brief Comparator class for comparing different literal cuts. - */ -class LitComparator { -public: - LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc) - : g(g_in), seeking_anchored(sa), seeking_transient(st), - last_chance(lc) {} - bool operator()(const unique_ptr<VertLitInfo> &a, - const unique_ptr<VertLitInfo> &b) const { - assert(a && b); - - if (seeking_anchored) { - if (a->creates_anchored != b->creates_anchored) { - return a->creates_anchored < b->creates_anchored; - } - } - - if (seeking_transient) { - if (a->creates_transient != b->creates_transient) { - return a->creates_transient < b->creates_transient; - } - } - - if (last_chance - && min_len(a->lit) > LAST_CHANCE_STRONG_LEN - && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) { - DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio, - b->split_ratio); - return a->split_ratio < b->split_ratio; - } - - u64a score_a = scoreSet(a->lit); - u64a score_b = scoreSet(b->lit); - - if (score_a != score_b) { - return score_a > score_b; - } - - /* vertices should only be in one candidate cut */ - assert(a->vv == b->vv || a->vv.front() != b->vv.front()); - return g[a->vv.front()].index > g[b->vv.front()].index; - } - -private: - const NGHolder &g; /**< graph on which cuts are found */ - - bool seeking_anchored; - bool seeking_transient; - bool last_chance; -}; -} - -#define MIN_ANCHORED_LEN 2 -#define MIN_ANCHORED_DESPERATE_LEN 1 - -/* anchored here means that the cut creates a 'usefully' anchored LHS */ -static -bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score, - bool anchored, u32 min_allowed_floating_len, - bool desperation, bool last_chance) { - u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN - : min_allowed_floating_len; - if (anchored && last_chance) { - min_allowed_len = MIN_ANCHORED_DESPERATE_LEN; - } - if (last_chance) { - desperation = true; - } - - DEBUG_PRINTF("validating%s set, min allowed len %u\n", - anchored ? " anchored" : "", min_allowed_len); - - assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); - - if (score >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); - return false; - } - - assert(!s.empty()); - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad/something went wrong\n"); - return false; - } - - u32 s_min_len = min_len(s); - u32 s_min_period = min_period(s); - size_t short_count = shorter_than(s, 5); - - DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" - " short_count=%zu desp=%d\n", - dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, - s_min_period, short_count, (int)desperation); - - bool ok = true; - - if (s.size() > 10 /* magic number is magic */ - || s_min_len < min_allowed_len - || (s_min_period <= 1 && min_allowed_len != 1)) { - DEBUG_PRINTF("candidate may be bad\n"); - ok = false; - } - - if (!ok && desperation - && s.size() <= 20 /* more magic numbers are magical */ - && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok && desperation - && s.size() <= 50 /* more magic numbers are magical */ - && s_min_len > 10 - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok) { - DEBUG_PRINTF("candidate is too shitty\n"); - return false; - } - - return true; -} - -static UNUSED -void dumpRoseLiteralSet(const set<ue2_literal> &s) { - for (UNUSED const auto &lit : s) { - DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); - } -} - -static -void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, - const vector<NFAVertexDepth> *depths, - const set<NFAVertex> &a_dom, - vector<unique_ptr<VertLitInfo>> *lits, - u32 min_allowed_len, bool desperation, - bool last_chance, const CompileContext &cc) { - assert(depths || !seeking_anchored); - - map<NFAVertex, u64a> scores; - map<NFAVertex, unique_ptr<VertLitInfo>> lit_info; - set<ue2_literal> s; - - for (auto v : a_dom) { - s = getLiteralSet(g, v, true); /* RHS will take responsibility for any - revisits to the target vertex */ - - if (s.empty()) { - DEBUG_PRINTF("candidate is too shitty\n"); - continue; - } - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = sanitizeAndCompressAndScore(s); - - bool anchored = false; - if (seeking_anchored) { - anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey); - } - - if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation, last_chance)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - scores[v] = score; - lit_info[v] = std::make_unique<VertLitInfo>(v, s, anchored); - } - - /* try to filter out cases where appending some characters produces worse - * literals. Only bother to look back one byte, TODO make better */ - for (auto u : a_dom) { - if (out_degree(u, g) != 1 || !scores[u]) { - continue; - } - NFAVertex v = *adjacent_vertices(u, g).first; - if (contains(scores, v) && scores[v] >= scores[u]) { - DEBUG_PRINTF("killing off v as score %llu >= %llu\n", - scores[v], scores[u]); - lit_info.erase(v); - } - } - - lits->reserve(lit_info.size()); - for (auto &m : lit_info) { - lits->push_back(move(m.second)); - } - DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); -} - -static -void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, - const vector<NFAVertexDepth> *depths, - const set<NFAVertex> &bad, - const set<NFAVertex> *allowed, - vector<unique_ptr<VertLitInfo>> *lits, - u32 min_allowed_len, bool desperation, - bool last_chance, const CompileContext &cc) { - /* This allows us to get more places to split the graph as we are not - limited to points where there is a single vertex to split at. */ - - assert(depths || !seeking_anchored); - - /* TODO: operate over 'proto-regions' which ignore back edges */ - auto regions = assignRegions(g); - - set<u32> mand, optional; - map<u32, vector<NFAVertex> > exits; - - for (auto v : vertices_range(g)) { - u32 region = regions[v]; - if (is_any_start(v, g) || region == 0) { - continue; - } - - if (is_any_accept(v, g)) { - continue; - } - - if (!generates_callbacks(g) && is_match_vertex(v, g)) { - /* we cannot leave a completely vacuous infix */ - continue; - } - - if (isRegionExit(g, v, regions)) { - exits[region].push_back(v); - } - - if (isRegionEntry(g, v, regions)) { - // Determine whether this region is mandatory or optional. We only - // need to do this check for the first entry vertex we encounter - // for this region. - if (!contains(mand, region) && !contains(optional, region)) { - if (isOptionalRegion(g, v, regions)) { - optional.insert(region); - } else { - mand.insert(region); - } - } - } - } - - for (const auto &m : exits) { - if (false) { - next_cand: - continue; - } - - const u32 region = m.first; - const vector<NFAVertex> &vv = m.second; - assert(!vv.empty()); - - if (!contains(mand, region)) { - continue; - } - - for (auto v : vv) { - /* if an exit is in bad, the region is already handled well - * by getSimpleRoseLiterals or is otherwise bad */ - if (contains(bad, v)) { - goto next_cand; - } - /* if we are only allowed to consider some vertices, v must be in - the list; */ - if (allowed && !contains(*allowed, v)) { - goto next_cand; - } - } - - /* the final region may not have a neat exit. validate that all exits - * have an edge to each accept or none do */ - bool edge_to_a = edge(vv[0], g.accept, g).second; - bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; - const auto &reports = g[vv[0]].reports; - for (auto v : vv) { - if (edge_to_a != edge(v, g.accept, g).second) { - goto next_cand; - } - - if (edge_to_aeod != edge(v, g.acceptEod, g).second) { - goto next_cand; - } - - if (g[v].reports != reports) { - goto next_cand; - } - } - - DEBUG_PRINTF("inspecting region %u\n", region); - set<ue2_literal> s; - for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); - /* Note: RHS can not be depended on to take all subsequent revisits - * to this vertex */ - set<ue2_literal> ss = getLiteralSet(g, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too shitty\n"); - goto next_cand; - } - insert(&s, ss); - } - - assert(!s.empty()); - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = sanitizeAndCompressAndScore(s); - - DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - - bool anchored = false; - if (seeking_anchored) { - anchored = createsAnchoredLHS(g, vv, *depths, cc.grey); - } - - if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation, last_chance)) { - goto next_cand; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - lits->push_back(std::make_unique<VertLitInfo>(vv, s, anchored)); - } -} - -static -void filterCandPivots(const NGHolder &g, const set<NFAVertex> &cand_raw, - set<NFAVertex> *out) { - for (auto u : cand_raw) { - const CharReach &u_cr = g[u].char_reach; - if (u_cr.count() > 40) { - continue; /* too wide to be plausible */ - } - - if (u_cr.count() > 2) { - /* include u as a candidate as successor may have backed away from - * expanding through it */ - out->insert(u); - continue; - } - - NFAVertex v = getSoleDestVertex(g, u); - if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { - const CharReach &v_cr = g[v].char_reach; - if (v_cr.count() == 1 || v_cr.isCaselessChar()) { - continue; /* v will always generate better literals */ - } - } - - out->insert(u); - } -} - -/* cand_raw is the candidate set before filtering points which are clearly - * a bad idea. */ -static -void getCandidatePivots(const NGHolder &g, set<NFAVertex> *cand, - set<NFAVertex> *cand_raw) { - auto dominators = findDominators(g); - - set<NFAVertex> accepts; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - - assert(!accepts.empty()); - - vector<NFAVertex> dom_trace; - auto ait = accepts.begin(); - assert(ait != accepts.end()); - NFAVertex curr = *ait; - while (curr && !is_special(curr, g)) { - dom_trace.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace.begin(), dom_trace.end()); - for (++ait; ait != accepts.end(); ++ait) { - curr = *ait; - vector<NFAVertex> dom_trace2; - while (curr && !is_special(curr, g)) { - dom_trace2.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace2.begin(), dom_trace2.end()); - auto dti = dom_trace.begin(), dtie = dom_trace.end(); - auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); - while (dti != dtie && dtj != dtje && *dti == *dtj) { - ++dti; - ++dtj; - } - dom_trace.erase(dti, dtie); - } - - cand_raw->insert(dom_trace.begin(), dom_trace.end()); - - filterCandPivots(g, *cand_raw, cand); -} - -static -unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g, - const vector<NFAVertexDepth> *depths, - bool for_prefix, u32 min_len, - const set<NFAVertex> *allowed_cand, - const set<NFAVertex> *disallowed_cand, - bool last_chance, - const CompileContext &cc) { - assert(!for_prefix || depths); - - /* look for a single simple split point */ - set<NFAVertex> cand; - set<NFAVertex> cand_raw; - - getCandidatePivots(g, &cand, &cand_raw); - - if (allowed_cand) { - set<NFAVertex> cand2; - set<NFAVertex> cand2_raw; - set_intersection(allowed_cand->begin(), allowed_cand->end(), - cand.begin(), cand.end(), - inserter(cand2, cand2.begin())); - - set_intersection(allowed_cand->begin(), allowed_cand->end(), - cand_raw.begin(), cand_raw.end(), - inserter(cand2_raw, cand2_raw.begin())); - - cand = std::move(cand2); - cand_raw = std::move(cand2_raw); - } - if (disallowed_cand) { - DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size()); - DEBUG_PRINTF("|old cand| = %zu\n", cand.size()); - erase_all(&cand, *disallowed_cand); - insert(&cand_raw, *disallowed_cand); - } - - if (!generates_callbacks(g)) { - /* not output exposed so must leave some RHS */ - for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) { - cand.erase(v); - cand_raw.erase(v); - } - - for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) { - cand.erase(v); - cand_raw.erase(v); - } - } - - DEBUG_PRINTF("|cand| = %zu\n", cand.size()); - - bool seeking_anchored = for_prefix; - bool seeking_transient = for_prefix; - - bool desperation = for_prefix && cc.streaming; - - vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */ - - getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, - desperation, last_chance, cc); - getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, - &lits, min_len, desperation, last_chance, cc); - - if (lits.empty()) { - DEBUG_PRINTF("no literals found\n"); - return nullptr; - } - - if (seeking_transient) { - for (auto &a : lits) { - a->creates_transient - = createsTransientLHS(g, a->vv, *depths, cc.grey); - } - } - - if (last_chance) { +} + +static +size_t shorter_than(const set<ue2_literal> &s, size_t limit) { + return count_if(s.begin(), s.end(), + [&](const ue2_literal &a) { return a.length() < limit; }); +} + +static +u32 min_len(const set<ue2_literal> &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)lit.length()); + } + + return rv; +} + +static +u32 min_period(const set<ue2_literal> &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)minStringPeriod(lit)); + } + DEBUG_PRINTF("min period %u\n", rv); + return rv; +} + +namespace { +/** + * Information on a cut: vertices and literals. + */ +struct VertLitInfo { + VertLitInfo() {} + VertLitInfo(NFAVertex v, const set<ue2_literal> &litlit, bool c_anch, + bool c_tran = false) + : vv(vector<NFAVertex>(1, v)), lit(litlit), creates_anchored(c_anch), + creates_transient(c_tran) {} + VertLitInfo(const vector<NFAVertex> &vv_in, const set<ue2_literal> &lit_in, + bool c_anch) + : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} + vector<NFAVertex> vv; + set<ue2_literal> lit; + + bool creates_anchored = false; + bool creates_transient = false; + double split_ratio = 0; +}; + +#define LAST_CHANCE_STRONG_LEN 1 + +/** + * \brief Comparator class for comparing different literal cuts. + */ +class LitComparator { +public: + LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc) + : g(g_in), seeking_anchored(sa), seeking_transient(st), + last_chance(lc) {} + bool operator()(const unique_ptr<VertLitInfo> &a, + const unique_ptr<VertLitInfo> &b) const { + assert(a && b); + + if (seeking_anchored) { + if (a->creates_anchored != b->creates_anchored) { + return a->creates_anchored < b->creates_anchored; + } + } + + if (seeking_transient) { + if (a->creates_transient != b->creates_transient) { + return a->creates_transient < b->creates_transient; + } + } + + if (last_chance + && min_len(a->lit) > LAST_CHANCE_STRONG_LEN + && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) { + DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio, + b->split_ratio); + return a->split_ratio < b->split_ratio; + } + + u64a score_a = scoreSet(a->lit); + u64a score_b = scoreSet(b->lit); + + if (score_a != score_b) { + return score_a > score_b; + } + + /* vertices should only be in one candidate cut */ + assert(a->vv == b->vv || a->vv.front() != b->vv.front()); + return g[a->vv.front()].index > g[b->vv.front()].index; + } + +private: + const NGHolder &g; /**< graph on which cuts are found */ + + bool seeking_anchored; + bool seeking_transient; + bool last_chance; +}; +} + +#define MIN_ANCHORED_LEN 2 +#define MIN_ANCHORED_DESPERATE_LEN 1 + +/* anchored here means that the cut creates a 'usefully' anchored LHS */ +static +bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score, + bool anchored, u32 min_allowed_floating_len, + bool desperation, bool last_chance) { + u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN + : min_allowed_floating_len; + if (anchored && last_chance) { + min_allowed_len = MIN_ANCHORED_DESPERATE_LEN; + } + if (last_chance) { + desperation = true; + } + + DEBUG_PRINTF("validating%s set, min allowed len %u\n", + anchored ? " anchored" : "", min_allowed_len); + + assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); + + if (score >= NO_LITERAL_AT_EDGE_SCORE) { + DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); + return false; + } + + assert(!s.empty()); + if (s.empty()) { + DEBUG_PRINTF("candidate is too bad/something went wrong\n"); + return false; + } + + u32 s_min_len = min_len(s); + u32 s_min_period = min_period(s); + size_t short_count = shorter_than(s, 5); + + DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" + " short_count=%zu desp=%d\n", + dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, + s_min_period, short_count, (int)desperation); + + bool ok = true; + + if (s.size() > 10 /* magic number is magic */ + || s_min_len < min_allowed_len + || (s_min_period <= 1 && min_allowed_len != 1)) { + DEBUG_PRINTF("candidate may be bad\n"); + ok = false; + } + + if (!ok && desperation + && s.size() <= 20 /* more magic numbers are magical */ + && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) + && s_min_period > 1) { + DEBUG_PRINTF("candidate is ok\n"); + ok = true; + } + + if (!ok && desperation + && s.size() <= 50 /* more magic numbers are magical */ + && s_min_len > 10 + && s_min_period > 1) { + DEBUG_PRINTF("candidate is ok\n"); + ok = true; + } + + if (!ok) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + return true; +} + +static UNUSED +void dumpRoseLiteralSet(const set<ue2_literal> &s) { + for (UNUSED const auto &lit : s) { + DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); + } +} + +static +void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, + const vector<NFAVertexDepth> *depths, + const set<NFAVertex> &a_dom, + vector<unique_ptr<VertLitInfo>> *lits, + u32 min_allowed_len, bool desperation, + bool last_chance, const CompileContext &cc) { + assert(depths || !seeking_anchored); + + map<NFAVertex, u64a> scores; + map<NFAVertex, unique_ptr<VertLitInfo>> lit_info; + set<ue2_literal> s; + + for (auto v : a_dom) { + s = getLiteralSet(g, v, true); /* RHS will take responsibility for any + revisits to the target vertex */ + + if (s.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + continue; + } + + DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + u64a score = sanitizeAndCompressAndScore(s); + + bool anchored = false; + if (seeking_anchored) { + anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey); + } + + if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, + desperation, last_chance)) { + continue; + } + + DEBUG_PRINTF("candidate is a candidate\n"); + scores[v] = score; + lit_info[v] = std::make_unique<VertLitInfo>(v, s, anchored); + } + + /* try to filter out cases where appending some characters produces worse + * literals. Only bother to look back one byte, TODO make better */ + for (auto u : a_dom) { + if (out_degree(u, g) != 1 || !scores[u]) { + continue; + } + NFAVertex v = *adjacent_vertices(u, g).first; + if (contains(scores, v) && scores[v] >= scores[u]) { + DEBUG_PRINTF("killing off v as score %llu >= %llu\n", + scores[v], scores[u]); + lit_info.erase(v); + } + } + + lits->reserve(lit_info.size()); + for (auto &m : lit_info) { + lits->push_back(move(m.second)); + } + DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); +} + +static +void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, + const vector<NFAVertexDepth> *depths, + const set<NFAVertex> &bad, + const set<NFAVertex> *allowed, + vector<unique_ptr<VertLitInfo>> *lits, + u32 min_allowed_len, bool desperation, + bool last_chance, const CompileContext &cc) { + /* This allows us to get more places to split the graph as we are not + limited to points where there is a single vertex to split at. */ + + assert(depths || !seeking_anchored); + + /* TODO: operate over 'proto-regions' which ignore back edges */ + auto regions = assignRegions(g); + + set<u32> mand, optional; + map<u32, vector<NFAVertex> > exits; + + for (auto v : vertices_range(g)) { + u32 region = regions[v]; + if (is_any_start(v, g) || region == 0) { + continue; + } + + if (is_any_accept(v, g)) { + continue; + } + + if (!generates_callbacks(g) && is_match_vertex(v, g)) { + /* we cannot leave a completely vacuous infix */ + continue; + } + + if (isRegionExit(g, v, regions)) { + exits[region].push_back(v); + } + + if (isRegionEntry(g, v, regions)) { + // Determine whether this region is mandatory or optional. We only + // need to do this check for the first entry vertex we encounter + // for this region. + if (!contains(mand, region) && !contains(optional, region)) { + if (isOptionalRegion(g, v, regions)) { + optional.insert(region); + } else { + mand.insert(region); + } + } + } + } + + for (const auto &m : exits) { + if (false) { + next_cand: + continue; + } + + const u32 region = m.first; + const vector<NFAVertex> &vv = m.second; + assert(!vv.empty()); + + if (!contains(mand, region)) { + continue; + } + + for (auto v : vv) { + /* if an exit is in bad, the region is already handled well + * by getSimpleRoseLiterals or is otherwise bad */ + if (contains(bad, v)) { + goto next_cand; + } + /* if we are only allowed to consider some vertices, v must be in + the list; */ + if (allowed && !contains(*allowed, v)) { + goto next_cand; + } + } + + /* the final region may not have a neat exit. validate that all exits + * have an edge to each accept or none do */ + bool edge_to_a = edge(vv[0], g.accept, g).second; + bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; + const auto &reports = g[vv[0]].reports; + for (auto v : vv) { + if (edge_to_a != edge(v, g.accept, g).second) { + goto next_cand; + } + + if (edge_to_aeod != edge(v, g.acceptEod, g).second) { + goto next_cand; + } + + if (g[v].reports != reports) { + goto next_cand; + } + } + + DEBUG_PRINTF("inspecting region %u\n", region); + set<ue2_literal> s; + for (auto v : vv) { + DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); + /* Note: RHS can not be depended on to take all subsequent revisits + * to this vertex */ + set<ue2_literal> ss = getLiteralSet(g, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + goto next_cand; + } + insert(&s, ss); + } + + assert(!s.empty()); + + DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + u64a score = sanitizeAndCompressAndScore(s); + + DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + + bool anchored = false; + if (seeking_anchored) { + anchored = createsAnchoredLHS(g, vv, *depths, cc.grey); + } + + if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, + desperation, last_chance)) { + goto next_cand; + } + + DEBUG_PRINTF("candidate is a candidate\n"); + lits->push_back(std::make_unique<VertLitInfo>(vv, s, anchored)); + } +} + +static +void filterCandPivots(const NGHolder &g, const set<NFAVertex> &cand_raw, + set<NFAVertex> *out) { + for (auto u : cand_raw) { + const CharReach &u_cr = g[u].char_reach; + if (u_cr.count() > 40) { + continue; /* too wide to be plausible */ + } + + if (u_cr.count() > 2) { + /* include u as a candidate as successor may have backed away from + * expanding through it */ + out->insert(u); + continue; + } + + NFAVertex v = getSoleDestVertex(g, u); + if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { + const CharReach &v_cr = g[v].char_reach; + if (v_cr.count() == 1 || v_cr.isCaselessChar()) { + continue; /* v will always generate better literals */ + } + } + + out->insert(u); + } +} + +/* cand_raw is the candidate set before filtering points which are clearly + * a bad idea. */ +static +void getCandidatePivots(const NGHolder &g, set<NFAVertex> *cand, + set<NFAVertex> *cand_raw) { + auto dominators = findDominators(g); + + set<NFAVertex> accepts; + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (is_special(v, g)) { + continue; + } + accepts.insert(v); + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (is_special(v, g)) { + continue; + } + accepts.insert(v); + } + + assert(!accepts.empty()); + + vector<NFAVertex> dom_trace; + auto ait = accepts.begin(); + assert(ait != accepts.end()); + NFAVertex curr = *ait; + while (curr && !is_special(curr, g)) { + dom_trace.push_back(curr); + curr = dominators[curr]; + } + reverse(dom_trace.begin(), dom_trace.end()); + for (++ait; ait != accepts.end(); ++ait) { + curr = *ait; + vector<NFAVertex> dom_trace2; + while (curr && !is_special(curr, g)) { + dom_trace2.push_back(curr); + curr = dominators[curr]; + } + reverse(dom_trace2.begin(), dom_trace2.end()); + auto dti = dom_trace.begin(), dtie = dom_trace.end(); + auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); + while (dti != dtie && dtj != dtje && *dti == *dtj) { + ++dti; + ++dtj; + } + dom_trace.erase(dti, dtie); + } + + cand_raw->insert(dom_trace.begin(), dom_trace.end()); + + filterCandPivots(g, *cand_raw, cand); +} + +static +unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g, + const vector<NFAVertexDepth> *depths, + bool for_prefix, u32 min_len, + const set<NFAVertex> *allowed_cand, + const set<NFAVertex> *disallowed_cand, + bool last_chance, + const CompileContext &cc) { + assert(!for_prefix || depths); + + /* look for a single simple split point */ + set<NFAVertex> cand; + set<NFAVertex> cand_raw; + + getCandidatePivots(g, &cand, &cand_raw); + + if (allowed_cand) { + set<NFAVertex> cand2; + set<NFAVertex> cand2_raw; + set_intersection(allowed_cand->begin(), allowed_cand->end(), + cand.begin(), cand.end(), + inserter(cand2, cand2.begin())); + + set_intersection(allowed_cand->begin(), allowed_cand->end(), + cand_raw.begin(), cand_raw.end(), + inserter(cand2_raw, cand2_raw.begin())); + + cand = std::move(cand2); + cand_raw = std::move(cand2_raw); + } + if (disallowed_cand) { + DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size()); + DEBUG_PRINTF("|old cand| = %zu\n", cand.size()); + erase_all(&cand, *disallowed_cand); + insert(&cand_raw, *disallowed_cand); + } + + if (!generates_callbacks(g)) { + /* not output exposed so must leave some RHS */ + for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) { + cand.erase(v); + cand_raw.erase(v); + } + + for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) { + cand.erase(v); + cand_raw.erase(v); + } + } + + DEBUG_PRINTF("|cand| = %zu\n", cand.size()); + + bool seeking_anchored = for_prefix; + bool seeking_transient = for_prefix; + + bool desperation = for_prefix && cc.streaming; + + vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */ + + getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, + desperation, last_chance, cc); + getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, + &lits, min_len, desperation, last_chance, cc); + + if (lits.empty()) { + DEBUG_PRINTF("no literals found\n"); + return nullptr; + } + + if (seeking_transient) { + for (auto &a : lits) { + a->creates_transient + = createsTransientLHS(g, a->vv, *depths, cc.grey); + } + } + + if (last_chance) { const size_t num_verts = num_vertices(g); auto color_map = make_small_color_map(g); - for (auto &a : lits) { + for (auto &a : lits) { size_t num_reachable = count_reachable(g, a->vv, color_map); double ratio = (double)num_reachable / (double)num_verts; a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio; - } - } - - auto cmp = LitComparator(g, seeking_anchored, seeking_transient, - last_chance); - - unique_ptr<VertLitInfo> best = move(lits.back()); - lits.pop_back(); - while (!lits.empty()) { - if (cmp(best, lits.back())) { - best = move(lits.back()); - } - lits.pop_back(); - } - - DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", - dumpString(*best->lit.begin()).c_str(), - g[best->vv.front()].index, - depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, - depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0); - - return best; -} - -static -void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, - bool overhang_ok, flat_set<NFAEdge> &bad) { - DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", - num_vertices(h), succ.length()); - - using EdgeSet = boost::dynamic_bitset<>; - - const size_t edge_count = num_edges(h); - EdgeSet bad_edges(edge_count); - - unordered_map<NFAVertex, EdgeSet> curr; - for (const auto &e : in_edges_range(h.accept, h)) { - auto &path_set = curr[source(e, h)]; - if (path_set.empty()) { - path_set.resize(edge_count); - } - path_set.set(h[e].index); - } - - unordered_map<NFAVertex, EdgeSet> next; - for (auto it = succ.rbegin(); it != succ.rend(); ++it) { - for (const auto &path : curr) { - NFAVertex u = path.first; - const auto &path_set = path.second; - if (u == h.start && overhang_ok) { - DEBUG_PRINTF("poisoning early %zu [overhang]\n", - path_set.count()); - bad_edges |= path_set; - continue; - } - if (overlaps(h[u].char_reach, *it)) { - for (const auto &e : in_edges_range(u, h)) { - auto &new_path_set = next[source(e, h)]; - if (new_path_set.empty()) { - new_path_set.resize(edge_count); - } - new_path_set |= path_set; - new_path_set.set(h[e].index); - } - } - } - DEBUG_PRINTF("succ char matches at %zu paths\n", next.size()); - assert(overhang_ok || !curr.empty()); - swap(curr, next); - next.clear(); - } - - assert(overhang_ok || !curr.empty()); - for (const auto &path : curr) { - bad_edges |= path.second; - DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count()); - } - - for (const auto &e : edges_range(h)) { - if (bad_edges.test(h[e].index)) { - bad.insert(e); - } - } -} - -static -void poisonForGoodPrefix(const NGHolder &h, - const vector<NFAVertexDepth> &depths, - flat_set<NFAEdge> &bad, const Grey &grey) { - for (const auto &v : vertices_range(h)) { - if (!createsAnchoredLHS(h, {v}, depths, grey) - && !createsTransientLHS(h, {v}, depths, grey)) { - insert(&bad, in_edges_range(v, h)); - } - } -} - -static UNUSED -bool is_any_accept_type(RoseInVertexType t) { - return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD; -} - -static -flat_set<NFAEdge> poisonEdges(const NGHolder &h, - const vector<NFAVertexDepth> *depths, - const RoseInGraph &vg, const vector<RoseInEdge> &ee, - bool for_prefix, const Grey &grey) { - DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size()); - - /* poison edges covered by successor literal */ - - set<pair<ue2_literal, bool> > succs; - for (const RoseInEdge &ve : ee) { - if (vg[target(ve, vg)].type != RIV_LITERAL) { - /* nothing to poison in suffixes/outfixes */ - assert(generates_callbacks(h)); - assert(is_any_accept_type(vg[target(ve, vg)].type)); - continue; - } - succs.insert({vg[target(ve, vg)].s, - vg[source(ve, vg)].type == RIV_LITERAL}); - - } - - DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); - - flat_set<NFAEdge> bad; - for (const auto &p : succs) { - poisonFromSuccessor(h, p.first, p.second, bad); - } - - /* poison edges which don't significantly improve a prefix */ - - if (for_prefix) { - poisonForGoodPrefix(h, *depths, bad, grey); - } - - return bad; -} - -static -set<NFAVertex> poisonVertices(const NGHolder &h, const RoseInGraph &vg, - const vector<RoseInEdge> &ee, const Grey &grey) { - flat_set<NFAEdge> bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey); - set<NFAVertex> bad_vertices; - for (const NFAEdge &e : bad_edges) { - bad_vertices.insert(target(e, h)); - DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index, - h[target(e, h)].index); - } - - return bad_vertices; -} - -static -unique_ptr<VertLitInfo> findBestNormalSplit(const NGHolder &g, - const RoseInGraph &vg, - const vector<RoseInEdge> &ee, - const CompileContext &cc) { - assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); - set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); - - return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, false, cc); -} - -static -unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g, - const RoseInGraph &vg, - const vector<RoseInEdge> &ee, - const CompileContext &cc) { - assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); - set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); - - return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, true, cc); -} - -static -unique_ptr<VertLitInfo> findSimplePrefixSplit(const NGHolder &g, - const CompileContext &cc) { - DEBUG_PRINTF("looking for simple prefix split\n"); - bool anchored = !proper_out_degree(g.startDs, g); - NFAVertex u = anchored ? g.start : g.startDs; - - if (out_degree(u, g) != 2) { /* startDs + succ */ - return nullptr; - } - - NFAVertex v = NGHolder::null_vertex(); - for (NFAVertex t : adjacent_vertices_range(u, g)) { - if (t != g.startDs) { - assert(!v); - v = t; - } - } - assert(v); - - if (!anchored) { - if (out_degree(g.start, g) > 2) { - return nullptr; - } - if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) { - return nullptr; - } - } - - NFAVertex best_v = NGHolder::null_vertex(); - ue2_literal best_lit; - - u32 limit = cc.grey.maxHistoryAvailable; - if (anchored) { - LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion); - } - - ue2_literal curr_lit; - for (u32 i = 0; i < limit; i++) { - const auto &v_cr = g[v].char_reach; - if (v_cr.count() == 1 || v_cr.isCaselessChar()) { - curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar()); - } else { - curr_lit.clear(); - } - - if (curr_lit.length() > best_lit.length()) { - best_lit = curr_lit; - best_v = v; - } - - if (out_degree(v, g) != 1) { - break; - } - v = *adjacent_vertices(v, g).first; - } - - if (best_lit.length() < cc.grey.minRoseLiteralLength) { - return nullptr; - } - - set<ue2_literal> best_lit_set({best_lit}); - if (bad_mixed_sensitivity(best_lit)) { - sanitizeAndCompressAndScore(best_lit_set); - } - - return ue2::make_unique<VertLitInfo>(best_v, best_lit_set, anchored, true); -} - -static -unique_ptr<VertLitInfo> findBestPrefixSplit(const NGHolder &g, - const vector<NFAVertexDepth> &depths, - const RoseInGraph &vg, - const vector<RoseInEdge> &ee, - bool last_chance, - const CompileContext &cc) { - assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX); - set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); - auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, last_chance, cc); - - /* large back edges may prevent us identifying anchored or transient cases - * properly - use a simple walk instead */ - if (!rv || !(rv->creates_transient || rv->creates_anchored)) { - auto rv2 = findSimplePrefixSplit(g, cc); - if (rv2) { - return rv2; - } - } - - return rv; -} - -static -unique_ptr<VertLitInfo> findBestCleanSplit(const NGHolder &g, - const CompileContext &cc) { - assert(g.kind != NFA_PREFIX); - set<NFAVertex> cleanSplits; - for (NFAVertex v : vertices_range(g)) { - if (!g[v].char_reach.all() || !edge(v, v, g).second) { - continue; - } - insert(&cleanSplits, inv_adjacent_vertices(v, g)); - cleanSplits.erase(v); - } - cleanSplits.erase(g.start); - if (cleanSplits.empty()) { - return nullptr; - } - return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, - &cleanSplits, nullptr, false, cc); -} - -static -bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { - set<NFAVertex> curr, next; - curr.insert(g.accept); - - for (auto it = lit.rbegin(); it != lit.rend(); ++it) { - next.clear(); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - return true; - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - continue; - } - - next.insert(u); - } - } - - curr.swap(next); - } - - return !curr.empty(); -} - -static -bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, - const vector<RoseInEdge> &ee, const VertLitInfo &split) { - const vector<NFAVertex> &splitters = split.vv; - assert(!splitters.empty()); - - shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); - shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); - - unordered_map<NFAVertex, NFAVertex> lhs_map; - unordered_map<NFAVertex, NFAVertex> rhs_map; - - splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map); - DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n", - to_string(base_graph.kind).c_str(), num_vertices(base_graph), - to_string(lhs->kind).c_str(), num_vertices(*lhs), - to_string(rhs->kind).c_str(), num_vertices(*rhs)); - - bool suffix = generates_callbacks(base_graph); - - if (is_triggered(base_graph)) { - /* if we are already guarded, check if the split reduces the size of - * the problem before continuing with the split */ - if (num_vertices(*lhs) >= num_vertices(base_graph) - && !(suffix && isVacuous(*rhs))) { - DEBUG_PRINTF("split's lhs is no smaller\n"); - return false; - } - - if (num_vertices(*rhs) >= num_vertices(base_graph)) { - DEBUG_PRINTF("split's rhs is no smaller\n"); - return false; - } - } - - bool do_accept = false; - bool do_accept_eod = false; - assert(rhs); - if (isVacuous(*rhs) && suffix) { - if (edge(rhs->start, rhs->accept, *rhs).second) { - DEBUG_PRINTF("rhs has a cliche\n"); - do_accept = true; - remove_edge(rhs->start, rhs->accept, *rhs); - } - - if (edge(rhs->start, rhs->acceptEod, *rhs).second) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - do_accept_eod = true; - remove_edge(rhs->start, rhs->acceptEod, *rhs); - } - - renumber_edges(*rhs); - } - - /* check if we still have a useful graph left over */ - bool do_norm = out_degree(rhs->start, *rhs) != 1; - - set<ReportID> splitter_reports; - for (auto v : splitters) { - insert(&splitter_reports, base_graph[v].reports); - } - - /* find the targets of each source vertex; insertion_ordered_map used to - * preserve deterministic ordering */ - insertion_ordered_map<RoseInVertex, vector<RoseInVertex>> images; - for (const RoseInEdge &e : ee) { - RoseInVertex src = source(e, vg); - RoseInVertex dest = target(e, vg); - images[src].push_back(dest); - remove_edge(e, vg); - } - - map<vector<RoseInVertex>, vector<RoseInVertex>> verts_by_image; - - for (const auto &m : images) { - const auto &u = m.first; - const auto &image = m.second; - - if (contains(verts_by_image, image)) { - for (RoseInVertex v : verts_by_image[image]) { - add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); - } - continue; - } - - for (const auto &lit : split.lit) { - assert(!bad_mixed_sensitivity(lit)); - - /* don't allow overhang in can_match() as literals should - * correspond to the edge graph being split; overhanging the graph - * would indicate a false path.*/ - if (!can_match(*lhs, lit, false)) { - DEBUG_PRINTF("'%s' did not match lhs\n", - escapeString(lit).c_str()); - continue; - } - - DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); - auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); - add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); - - /* work out delay later */ - if (do_accept) { - DEBUG_PRINTF("rhs has a cliche\n"); - auto tt = add_vertex(RoseInVertexProps::makeAccept( - splitter_reports), vg); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); - } - - if (do_accept_eod) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - auto tt = add_vertex(RoseInVertexProps::makeAcceptEod( - splitter_reports), vg); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); - } - - if (do_norm) { - assert(out_degree(rhs->start, *rhs) > 1); - for (RoseInVertex dest : image) { - add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg); - } - } - verts_by_image[image].push_back(v); - } - } - - assert(hasCorrectlyNumberedVertices(*rhs)); - assert(hasCorrectlyNumberedEdges(*rhs)); - assert(isCorrectlyTopped(*rhs)); - assert(hasCorrectlyNumberedVertices(*lhs)); - assert(hasCorrectlyNumberedEdges(*lhs)); - assert(isCorrectlyTopped(*lhs)); - - return true; -} - -#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ -#define MAX_LEN_2_LITERALS_PER_CUT 3 - -static -bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores, - const map<NFAEdge, set<ue2_literal>> &cut_lits, - u32 min_allowed_length) { - DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), - min_allowed_length); - if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { - return false; - } - - u32 len_2_count = 0; - - for (const auto &cut : cut_lits) { - if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("cut uses a forbidden edge\n"); - return false; - } - - if (min_len(cut.second) < min_allowed_length) { - DEBUG_PRINTF("cut uses a bad literal\n"); - return false; - } - - for (const auto &lit : cut.second) { - if (lit.length() == 2) { - len_2_count++; - } - } - } - - if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { - return false; - } - - return true; -} - -static -void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, - const vector<RoseInEdge> &to_cut, - const vector<NFAEdge> &cut, - const map<NFAEdge, set<ue2_literal>> &cut_lits) { - DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(), - num_vertices(h)); - - /* create literal vertices and connect preds */ - unordered_set<RoseInVertex> done_sources; - map<RoseInVertex, vector<pair<RoseInVertex, NFAVertex>>> verts_by_source; - for (const RoseInEdge &ve : to_cut) { - assert(&h == &*vg[ve].graph); - RoseInVertex src = source(ve, vg); - if (!done_sources.insert(src).second) { - continue; /* already processed */ - } - - /* iterate over cut for determinism */ - for (const auto &e : cut) { - NFAVertex prev_v = source(e, h); - NFAVertex pivot = target(e, h); - - DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); - unordered_map<NFAVertex, NFAVertex> temp_map; - shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>(); - splitLHS(h, pivot, new_lhs.get(), &temp_map); - - /* want to cut off paths to pivot from things other than the pivot - - * makes a more svelte graphy */ - clear_in_edges(temp_map[pivot], *new_lhs); - NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], - *new_lhs); - if (is_triggered(h) && prev_v == h.start) { - (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); - } - - pruneUseless(*new_lhs, false); - renumber_vertices(*new_lhs); - renumber_edges(*new_lhs); - - DEBUG_PRINTF(" into lhs %s (%zu vertices)\n", - to_string(new_lhs->kind).c_str(), - num_vertices(*new_lhs)); - - assert(hasCorrectlyNumberedVertices(*new_lhs)); - assert(hasCorrectlyNumberedEdges(*new_lhs)); - assert(isCorrectlyTopped(*new_lhs)); - - const set<ue2_literal> &lits = cut_lits.at(e); - for (const auto &lit : lits) { - if (!can_match(*new_lhs, lit, is_triggered(h))) { - continue; - } - - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); - - /* if this is a prefix/infix an edge directly to accept should - * represent a false path as we have poisoned vertices covered - * by the literals. */ - if (generates_callbacks(h)) { - if (edge(pivot, h.accept, h).second) { - DEBUG_PRINTF("adding acceptEod\n"); - /* literal has a direct connection to accept */ - const flat_set<ReportID> &reports = h[pivot].reports; - auto tt = add_vertex( - RoseInVertexProps::makeAccept(reports), vg); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); - } - - if (edge(pivot, h.acceptEod, h).second) { - assert(generates_callbacks(h)); - DEBUG_PRINTF("adding acceptEod\n"); - /* literal has a direct connection to accept */ - const flat_set<ReportID> &reports = h[pivot].reports; - auto tt = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), vg); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); - } - } - - add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg); - verts_by_source[src].push_back({v, pivot}); - } - } - } - - /* wire the literal vertices up to successors */ - map<vector<NFAVertex>, shared_ptr<NGHolder> > done_rhs; - for (const RoseInEdge &ve : to_cut) { - RoseInVertex src = source(ve, vg); - RoseInVertex dest = target(ve, vg); - - /* iterate over cut for determinism */ - for (const auto &elem : verts_by_source[src]) { - NFAVertex pivot = elem.second; - RoseInVertex v = elem.first; - - vector<NFAVertex> adj; - insert(&adj, adj.end(), adjacent_vertices(pivot, h)); - /* we can ignore presence of accept, accepteod in adj as it is best - effort */ - - if (!contains(done_rhs, adj)) { - unordered_map<NFAVertex, NFAVertex> temp_map; - shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>(); - splitRHS(h, adj, new_rhs.get(), &temp_map); - remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); - remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); - renumber_edges(*new_rhs); - DEBUG_PRINTF(" into rhs %s (%zu vertices)\n", - to_string(new_rhs->kind).c_str(), - num_vertices(*new_rhs)); - done_rhs.emplace(adj, new_rhs); - assert(isCorrectlyTopped(*new_rhs)); - } - - assert(done_rhs[adj].get()); - shared_ptr<NGHolder> new_rhs = done_rhs[adj]; - - assert(hasCorrectlyNumberedVertices(*new_rhs)); - assert(hasCorrectlyNumberedEdges(*new_rhs)); - assert(isCorrectlyTopped(*new_rhs)); - - if (vg[dest].type == RIV_LITERAL - && !can_match(*new_rhs, vg[dest].s, true)) { - continue; - } - - if (out_degree(new_rhs->start, *new_rhs) != 1) { - add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg); - } - } - - remove_edge(ve, vg); - } -} - -static -bool doNetflowCut(NGHolder &h, - const vector<NFAVertexDepth> *depths, - RoseInGraph &vg, - const vector<RoseInEdge> &ee, bool for_prefix, - const Grey &grey, u32 min_allowed_length = 0U) { - ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength); - - DEBUG_PRINTF("doing netflow cut\n"); - /* TODO: we should really get literals/scores from the full graph as this - * allows us to overlap with previous cuts. */ - assert(!ee.empty()); - assert(&h == &*vg[ee.front()].graph); - assert(!for_prefix || depths); - - if (num_edges(h) > grey.maxRoseNetflowEdges) { - /* We have a limit on this because scoring edges and running netflow - * gets very slow for big graphs. */ - DEBUG_PRINTF("too many edges, skipping netflow cut\n"); - return false; - } - - assert(hasCorrectlyNumberedVertices(h)); - assert(hasCorrectlyNumberedEdges(h)); - - auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey); - - /* Step 1: Get scores for all edges */ - vector<u64a> scores = scoreEdges(h, known_bad); /* scores by edge_index */ - - /* Step 2: Find cutset based on scores */ - vector<NFAEdge> cut = findMinCut(h, scores); - - /* Step 3: Get literals corresponding to cut edges */ - map<NFAEdge, set<ue2_literal>> cut_lits; - for (const auto &e : cut) { - set<ue2_literal> lits = getLiteralSet(h, e); - sanitizeAndCompressAndScore(lits); - - cut_lits[e] = lits; - } - - /* if literals are underlength bail or if it involves a forbidden edge*/ - if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) { - return false; - } - DEBUG_PRINTF("splitting\n"); - - /* Step 4: Split graph based on cuts */ - splitEdgesByCut(h, vg, ee, cut, cut_lits); - - return true; -} - -static -bool deanchorIfNeeded(NGHolder &g) { - DEBUG_PRINTF("hi\n"); - if (proper_out_degree(g.startDs, g)) { - return false; - } - - /* look for a non-special dot with a loop following start */ - set<NFAVertex> succ_g; - insert(&succ_g, adjacent_vertices(g.start, g)); - succ_g.erase(g.startDs); - - for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index, - g[v].char_reach.count()); - - if (v == g.startDs || !g[v].char_reach.all()) { - continue; - } - - set<NFAVertex> succ_v; - insert(&succ_v, adjacent_vertices(v, g)); - - if (succ_v == succ_g) { - DEBUG_PRINTF("found ^.*\n"); - for (auto succ : adjacent_vertices_range(g.start, g)) { - if (succ == g.startDs) { - continue; - } - add_edge(g.startDs, succ, g); - } - clear_vertex(v, g); - remove_vertex(v, g); - renumber_vertices(g); - return true; - } - - if (succ_g.size() == 1 && hasSelfLoop(v, g)) { - DEBUG_PRINTF("found ^.+\n"); - add_edge(g.startDs, v, g); - remove_edge(v, v, g); - return true; - } - } - - return false; -} - -static -RoseInGraph populateTrivialGraph(const NGHolder &h) { - RoseInGraph g; - shared_ptr<NGHolder> root_g = cloneHolder(h); - bool orig_anch = isAnchored(*root_g); - orig_anch |= deanchorIfNeeded(*root_g); - - DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); - - auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g); - auto accept = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g); - - add_edge(start, accept, RoseInEdgeProps(root_g, 0), g); - - return g; -} - -static -void avoidOutfixes(RoseInGraph &vg, bool last_chance, - const CompileContext &cc) { - STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); - assert(num_vertices(vg) == 2); - assert(num_edges(vg) == 1); - - RoseInEdge e = *edges(vg).first; - - NGHolder &h = *vg[e].graph; - assert(isCorrectlyTopped(h)); - - renumber_vertices(h); - renumber_edges(h); - - unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, {e}, cc); - - if (split && splitRoseEdge(h, vg, {e}, *split)) { - DEBUG_PRINTF("split on simple literal\n"); - return; - } - - if (last_chance) { - /* look for a prefix split as it allows us to accept very weak anchored - * literals. */ - auto depths = calcDepths(h); - - split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); - - if (split && splitRoseEdge(h, vg, {e}, *split)) { - DEBUG_PRINTF("split on simple literal\n"); - return; - } - } - - doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); -} - -static -void removeRedundantPrefixes(RoseInGraph &g) { - STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n"); - - for (const RoseInEdge &e : edges_range(g)) { - RoseInVertex s = source(e, g); - RoseInVertex t = target(e, g); - - if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) { - continue; - } - - if (!g[e].graph) { - continue; - } - - assert(!g[t].delay); - const ue2_literal &lit = g[t].s; - - if (!literalIsWholeGraph(*g[e].graph, lit)) { - DEBUG_PRINTF("not whole graph\n"); - continue; - } - - if (!isFloating(*g[e].graph)) { - DEBUG_PRINTF("not floating\n"); - continue; - } - g[e].graph.reset(); - } -} - -static -u32 maxDelay(const CompileContext &cc) { - if (!cc.streaming) { - return MO_INVALID_IDX; - } - return cc.grey.maxHistoryAvailable; -} - -static -void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, - const CompileContext &cc) { - STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n"); - - vector<RoseInEdge> to_anchor; - for (const RoseInEdge &e : edges_range(g)) { - RoseInVertex s = source(e, g); - RoseInVertex t = target(e, g); - - if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) { - continue; - } - - if (g[t].type != RIV_LITERAL) { - continue; - } - - if (!g[e].graph) { - continue; - } - - if (g[e].graph_lag) { - /* already removed redundant parts of literals */ - continue; - } - - if (g[e].dfa) { - /* if we removed any more states, we would need to rebuild the - * the dfa which can be time consuming. */ - continue; - } - - assert(!g[t].delay); - const ue2_literal &lit = g[t].s; - - DEBUG_PRINTF("removing states for literal: %s\n", - dumpString(lit).c_str()); - - unique_ptr<NGHolder> h = cloneHolder(*g[e].graph); - const u32 max_delay = maxDelay(cc); - - u32 delay = removeTrailingLiteralStates(*h, lit, max_delay, - false /* can't overhang start */); - - DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay); - - if (edge(h->startDs, h->accept, *h).second) { - /* we should have delay == lit.length(), but in really complex - * cases we may fail to identify that we can remove the whole - * graph. Regardless, the fact that sds is wired to accept means the - * graph serves no purpose. */ - DEBUG_PRINTF("whole graph\n"); - g[e].graph.reset(); - continue; - } - - if (delay == lit.length() && edge(h->start, h->accept, *h).second - && num_vertices(*h) == N_SPECIALS) { - to_anchor.push_back(e); - continue; - } - - /* if we got here we should still have an interesting graph */ - assert(delay == max_delay || num_vertices(*h) > N_SPECIALS); - - if (delay && delay != MO_INVALID_IDX) { - DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); - - g[e].graph = move(h); - g[e].graph_lag = delay; - } - } - - if (!to_anchor.empty()) { - RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g); - - for (RoseInEdge e : to_anchor) { - DEBUG_PRINTF("rehoming to anchor\n"); - RoseInVertex v = target(e, g); - add_edge(anch, v, g); - remove_edge(e, g); - } - } -} - -static -bool isStarCliche(const NGHolder &g) { - DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); - - bool nonspecials_seen = false; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - if (nonspecials_seen) { - return false; - } - nonspecials_seen = true; - - if (!g[v].char_reach.all()) { - return false; - } - - if (!hasSelfLoop(v, g)) { - return false; - } - if (!edge(v, g.accept, g).second) { - return false; - } - } - - if (!nonspecials_seen) { - return false; - } - - if (!edge(g.start, g.accept, g).second) { - return false; - } - - return true; -} - -static -void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, - const vector<RoseInEdge> &ee, - const CompileContext &cc) { - /* TODO: This could be better by not creating a separate graph for each - * successor literal. This would require using distinct report ids and also - * taking into account overlap of successor literals. */ - - set<ue2_literal> preds; - set<ue2_literal> succs; - for (const RoseInEdge &e : ee) { - RoseInVertex u = source(e, ig); - assert(ig[u].type == RIV_LITERAL); - assert(!ig[u].delay); - preds.insert(ig[u].s); - - RoseInVertex v = target(e, ig); - assert(ig[v].type == RIV_LITERAL); - assert(!ig[v].delay); - succs.insert(ig[v].s); - - if (ig[e].graph_lag) { - /* already removed redundant parts of literals */ - return; - } - - assert(!ig[e].dfa); - } - - map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */ - - for (const ue2_literal &right : succs) { - size_t max_overlap = 0; - for (const ue2_literal &left : preds) { - size_t overlap = maxOverlap(left, right, 0); - ENSURE_AT_LEAST(&max_overlap, overlap); - } - - u32 max_allowed_delay = right.length() - max_overlap; - - if (cc.streaming) { - LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); - } - - if (!max_allowed_delay) { - continue; - } - - shared_ptr<NGHolder> h_new = cloneHolder(h); - - u32 delay = removeTrailingLiteralStates(*h_new, right, - max_allowed_delay); - - if (delay == MO_INVALID_IDX) { - /* successor literal could not match infix -> ignore false path */ - assert(0); - continue; - } - - if (!delay) { - /* unable to trim graph --> no point swapping to new holder */ - continue; - } - - assert(isCorrectlyTopped(*h_new)); - graphs[right] = make_pair(h_new, delay); - } - - for (const RoseInEdge &e : ee) { - RoseInVertex v = target(e, ig); - const ue2_literal &succ = ig[v].s; - if (!contains(graphs, succ)) { - continue; - } - - ig[e].graph = graphs[succ].first; - ig[e].graph_lag = graphs[succ].second; - - if (isStarCliche(*ig[e].graph)) { - DEBUG_PRINTF("is a X star!\n"); - ig[e].graph.reset(); - ig[e].graph_lag = 0; - } - } -} - -static -void removeRedundantLiteralsFromInfixes(RoseInGraph &g, - const CompileContext &cc) { - insertion_ordered_map<NGHolder *, vector<RoseInEdge>> infixes; - - for (const RoseInEdge &e : edges_range(g)) { - RoseInVertex s = source(e, g); - RoseInVertex t = target(e, g); - - if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) { - continue; - } - - if (!g[e].graph) { - continue; - } - - assert(!g[t].delay); - if (g[e].dfa) { - /* if we removed any more states, we would need to rebuild the - * the dfa which can be time consuming. */ - continue; - } - - NGHolder *h = g[e].graph.get(); - infixes[h].push_back(e); - } - - for (const auto &m : infixes) { - NGHolder *h = m.first; - const auto &edges = m.second; - removeRedundantLiteralsFromInfix(*h, g, edges, cc); - } -} - -static -void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { - removeRedundantLiteralsFromPrefixes(g, cc); - removeRedundantLiteralsFromInfixes(g, cc); -} - -static -RoseInVertex getStart(RoseInGraph &vg) { - for (RoseInVertex v : vertices_range(vg)) { - if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { - return v; - } - } - assert(0); - return RoseInGraph::null_vertex(); -} - -/** - * Finds the initial accept vertex created to which suffix/outfixes are - * attached. - */ -static -RoseInVertex getPrimaryAccept(RoseInGraph &vg) { - for (RoseInVertex v : vertices_range(vg)) { - if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) { - return v; - } - } - assert(0); - return RoseInGraph::null_vertex(); -} - -static -bool willBeTransient(const depth &max_depth, const CompileContext &cc) { - if (!cc.streaming) { - return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH); - } else { - return max_depth <= depth(cc.grey.maxHistoryAvailable + 1); - } -} - -static -bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) { - return max_depth <= depth(grey.maxAnchoredRegion); -} - -static -unique_ptr<NGHolder> make_chain(u32 count) { - assert(count); - - auto rv = std::make_unique<NGHolder>(NFA_INFIX); - - NGHolder &h = *rv; - - NFAVertex u = h.start; - for (u32 i = 0; i < count; i++) { - NFAVertex v = add_vertex(h); - h[v].char_reach = CharReach::dot(); - add_edge(u, v, h); - u = v; - } - h[u].reports.insert(0); - add_edge(u, h.accept, h); - - setTops(h); - - return rv; -} - -#define SHORT_TRIGGER_LEN 16 - -static -bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, - const vector<RoseInEdge> &ee, - const CompileContext &cc) { - /* check max width and literal lengths to see if possible */ - size_t min_lit = (size_t)~0ULL; - for (const RoseInEdge &e : ee) { - RoseInVertex v = target(e, vg); - LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length()); - } - - if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) { - return false; - } - - depth max_width = findMaxWidth(h); - - u32 delta = min_lit - SHORT_TRIGGER_LEN; - - if (!willBeTransient(max_width - depth(delta), cc) - && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) { - return false; - } - - DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit); - DEBUG_PRINTF("delta = %u\n", delta); - - /* try split */ - map<RoseInVertex, shared_ptr<NGHolder> > graphs; - for (const RoseInEdge &e : ee) { - RoseInVertex v = target(e, vg); - - shared_ptr<NGHolder> h_new = cloneHolder(h); - - u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta); - - DEBUG_PRINTF("delay %u\n", delay); - - if (delay != delta) { - DEBUG_PRINTF("unable to trim literal\n"); - return false; - } - - if (in_degree(v, vg) != 1) { - DEBUG_PRINTF("complicated\n"); - return false; - } - - DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new)); - assert(willBeTransient(findMaxWidth(*h_new), cc) - || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); - - assert(isCorrectlyTopped(*h_new)); - graphs[v] = h_new; - } - - /* add .{repeats} from prefixes to long literals */ - for (const RoseInEdge &e : ee) { - RoseInVertex s = source(e, vg); - RoseInVertex t = target(e, vg); - - remove_edge(e, vg); - const ue2_literal &orig_lit = vg[t].s; - - ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta); - - ue2_literal lit2(orig_lit.end() - delta, orig_lit.end()); - - assert(lit.length() + delta == orig_lit.length()); - - vg[t].s = lit2; - - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); - add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg); - add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg); - } - - DEBUG_PRINTF("success\n"); - /* TODO: alter split point to avoid pathological splits */ - return true; -} - -static -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, const vector<NFAVertex> &preds) { - assert(delay <= lit.length()); - assert(isCorrectlyTopped(g)); - DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); - - NFAVertex prev = g.accept; - auto it = lit.rbegin(); - while (delay--) { - NFAVertex curr = add_vertex(g); - assert(it != lit.rend()); - g[curr].char_reach = *it; - add_edge(curr, prev, g); - ++it; - prev = curr; - } - - for (auto v : preds) { - NFAEdge e = add_edge_if_not_present(v, prev, g); - if (v == g.start && is_triggered(g)) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - // Every predecessor of accept must have a report. - set_report(g, 0); - - renumber_vertices(g); - renumber_edges(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); -} - -static -void restoreTrailingLiteralStates(NGHolder &g, - const vector<pair<ue2_literal, u32>> &lits) { - vector<NFAVertex> preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); - clear_in_edges(g.accept, g); - - for (auto v : preds) { - g[v].reports.clear(); /* clear report from old accepts */ - } - - for (const auto &p : lits) { - const ue2_literal &lit = p.first; - u32 delay = p.second; - - restoreTrailingLiteralStates(g, lit, delay, preds); - } -} - -static -bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, - const CompileContext &cc) { - DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, - num_vertices(h)); - assert(isCorrectlyTopped(h)); - - renumber_vertices(h); - renumber_edges(h); - - auto depths = calcDepths(h); - - /* If the reason the prefix is not transient is due to a very long literal - * following, we can make it transient by restricting ourselves to using - * just the head of the literal. */ - if (makeTransientFromLongLiteral(h, vg, ee, cc)) { - return true; - } - - auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc); - - if (split && (split->creates_transient || split->creates_anchored) - && splitRoseEdge(h, vg, ee, *split)) { - DEBUG_PRINTF("split on simple literal\n"); - return true; - } - - /* large back edges may prevent us identifing anchored or transient cases - * properly - use a simple walk instead */ - - if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) { - return true; - } - - if (split && splitRoseEdge(h, vg, ee, *split)) { - /* use the simple split even though it doesn't create a transient - * prefix */ - DEBUG_PRINTF("split on simple literal\n"); - return true; - } - - /* look for netflow cuts which don't produce good prefixes */ - if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) { - return true; - } - - if (ee.size() > 1) { - DEBUG_PRINTF("split the prefix apart based on succ literals\n"); - unordered_map<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> >, + } + } + + auto cmp = LitComparator(g, seeking_anchored, seeking_transient, + last_chance); + + unique_ptr<VertLitInfo> best = move(lits.back()); + lits.pop_back(); + while (!lits.empty()) { + if (cmp(best, lits.back())) { + best = move(lits.back()); + } + lits.pop_back(); + } + + DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", + dumpString(*best->lit.begin()).c_str(), + g[best->vv.front()].index, + depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, + depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0); + + return best; +} + +static +void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, + bool overhang_ok, flat_set<NFAEdge> &bad) { + DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", + num_vertices(h), succ.length()); + + using EdgeSet = boost::dynamic_bitset<>; + + const size_t edge_count = num_edges(h); + EdgeSet bad_edges(edge_count); + + unordered_map<NFAVertex, EdgeSet> curr; + for (const auto &e : in_edges_range(h.accept, h)) { + auto &path_set = curr[source(e, h)]; + if (path_set.empty()) { + path_set.resize(edge_count); + } + path_set.set(h[e].index); + } + + unordered_map<NFAVertex, EdgeSet> next; + for (auto it = succ.rbegin(); it != succ.rend(); ++it) { + for (const auto &path : curr) { + NFAVertex u = path.first; + const auto &path_set = path.second; + if (u == h.start && overhang_ok) { + DEBUG_PRINTF("poisoning early %zu [overhang]\n", + path_set.count()); + bad_edges |= path_set; + continue; + } + if (overlaps(h[u].char_reach, *it)) { + for (const auto &e : in_edges_range(u, h)) { + auto &new_path_set = next[source(e, h)]; + if (new_path_set.empty()) { + new_path_set.resize(edge_count); + } + new_path_set |= path_set; + new_path_set.set(h[e].index); + } + } + } + DEBUG_PRINTF("succ char matches at %zu paths\n", next.size()); + assert(overhang_ok || !curr.empty()); + swap(curr, next); + next.clear(); + } + + assert(overhang_ok || !curr.empty()); + for (const auto &path : curr) { + bad_edges |= path.second; + DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count()); + } + + for (const auto &e : edges_range(h)) { + if (bad_edges.test(h[e].index)) { + bad.insert(e); + } + } +} + +static +void poisonForGoodPrefix(const NGHolder &h, + const vector<NFAVertexDepth> &depths, + flat_set<NFAEdge> &bad, const Grey &grey) { + for (const auto &v : vertices_range(h)) { + if (!createsAnchoredLHS(h, {v}, depths, grey) + && !createsTransientLHS(h, {v}, depths, grey)) { + insert(&bad, in_edges_range(v, h)); + } + } +} + +static UNUSED +bool is_any_accept_type(RoseInVertexType t) { + return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD; +} + +static +flat_set<NFAEdge> poisonEdges(const NGHolder &h, + const vector<NFAVertexDepth> *depths, + const RoseInGraph &vg, const vector<RoseInEdge> &ee, + bool for_prefix, const Grey &grey) { + DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size()); + + /* poison edges covered by successor literal */ + + set<pair<ue2_literal, bool> > succs; + for (const RoseInEdge &ve : ee) { + if (vg[target(ve, vg)].type != RIV_LITERAL) { + /* nothing to poison in suffixes/outfixes */ + assert(generates_callbacks(h)); + assert(is_any_accept_type(vg[target(ve, vg)].type)); + continue; + } + succs.insert({vg[target(ve, vg)].s, + vg[source(ve, vg)].type == RIV_LITERAL}); + + } + + DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); + + flat_set<NFAEdge> bad; + for (const auto &p : succs) { + poisonFromSuccessor(h, p.first, p.second, bad); + } + + /* poison edges which don't significantly improve a prefix */ + + if (for_prefix) { + poisonForGoodPrefix(h, *depths, bad, grey); + } + + return bad; +} + +static +set<NFAVertex> poisonVertices(const NGHolder &h, const RoseInGraph &vg, + const vector<RoseInEdge> &ee, const Grey &grey) { + flat_set<NFAEdge> bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey); + set<NFAVertex> bad_vertices; + for (const NFAEdge &e : bad_edges) { + bad_vertices.insert(target(e, h)); + DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index, + h[target(e, h)].index); + } + + return bad_vertices; +} + +static +unique_ptr<VertLitInfo> findBestNormalSplit(const NGHolder &g, + const RoseInGraph &vg, + const vector<RoseInEdge> &ee, + const CompileContext &cc) { + assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); + set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); + + return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, false, cc); +} + +static +unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g, + const RoseInGraph &vg, + const vector<RoseInEdge> &ee, + const CompileContext &cc) { + assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); + set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); + + return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, true, cc); +} + +static +unique_ptr<VertLitInfo> findSimplePrefixSplit(const NGHolder &g, + const CompileContext &cc) { + DEBUG_PRINTF("looking for simple prefix split\n"); + bool anchored = !proper_out_degree(g.startDs, g); + NFAVertex u = anchored ? g.start : g.startDs; + + if (out_degree(u, g) != 2) { /* startDs + succ */ + return nullptr; + } + + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(u, g)) { + if (t != g.startDs) { + assert(!v); + v = t; + } + } + assert(v); + + if (!anchored) { + if (out_degree(g.start, g) > 2) { + return nullptr; + } + if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) { + return nullptr; + } + } + + NFAVertex best_v = NGHolder::null_vertex(); + ue2_literal best_lit; + + u32 limit = cc.grey.maxHistoryAvailable; + if (anchored) { + LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion); + } + + ue2_literal curr_lit; + for (u32 i = 0; i < limit; i++) { + const auto &v_cr = g[v].char_reach; + if (v_cr.count() == 1 || v_cr.isCaselessChar()) { + curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar()); + } else { + curr_lit.clear(); + } + + if (curr_lit.length() > best_lit.length()) { + best_lit = curr_lit; + best_v = v; + } + + if (out_degree(v, g) != 1) { + break; + } + v = *adjacent_vertices(v, g).first; + } + + if (best_lit.length() < cc.grey.minRoseLiteralLength) { + return nullptr; + } + + set<ue2_literal> best_lit_set({best_lit}); + if (bad_mixed_sensitivity(best_lit)) { + sanitizeAndCompressAndScore(best_lit_set); + } + + return ue2::make_unique<VertLitInfo>(best_v, best_lit_set, anchored, true); +} + +static +unique_ptr<VertLitInfo> findBestPrefixSplit(const NGHolder &g, + const vector<NFAVertexDepth> &depths, + const RoseInGraph &vg, + const vector<RoseInEdge> &ee, + bool last_chance, + const CompileContext &cc) { + assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX); + set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); + auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, last_chance, cc); + + /* large back edges may prevent us identifying anchored or transient cases + * properly - use a simple walk instead */ + if (!rv || !(rv->creates_transient || rv->creates_anchored)) { + auto rv2 = findSimplePrefixSplit(g, cc); + if (rv2) { + return rv2; + } + } + + return rv; +} + +static +unique_ptr<VertLitInfo> findBestCleanSplit(const NGHolder &g, + const CompileContext &cc) { + assert(g.kind != NFA_PREFIX); + set<NFAVertex> cleanSplits; + for (NFAVertex v : vertices_range(g)) { + if (!g[v].char_reach.all() || !edge(v, v, g).second) { + continue; + } + insert(&cleanSplits, inv_adjacent_vertices(v, g)); + cleanSplits.erase(v); + } + cleanSplits.erase(g.start); + if (cleanSplits.empty()) { + return nullptr; + } + return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, + &cleanSplits, nullptr, false, cc); +} + +static +bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { + set<NFAVertex> curr, next; + curr.insert(g.accept); + + for (auto it = lit.rbegin(); it != lit.rend(); ++it) { + next.clear(); + + for (auto v : curr) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + if (overhang_ok) { + DEBUG_PRINTF("bail\n"); + return true; + } else { + continue; /* it is not possible for a lhs literal to + * overhang the start */ + } + } + + const CharReach &cr = g[u].char_reach; + if (!overlaps(*it, cr)) { + continue; + } + + next.insert(u); + } + } + + curr.swap(next); + } + + return !curr.empty(); +} + +static +bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, + const vector<RoseInEdge> &ee, const VertLitInfo &split) { + const vector<NFAVertex> &splitters = split.vv; + assert(!splitters.empty()); + + shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); + shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); + + unordered_map<NFAVertex, NFAVertex> lhs_map; + unordered_map<NFAVertex, NFAVertex> rhs_map; + + splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map); + DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n", + to_string(base_graph.kind).c_str(), num_vertices(base_graph), + to_string(lhs->kind).c_str(), num_vertices(*lhs), + to_string(rhs->kind).c_str(), num_vertices(*rhs)); + + bool suffix = generates_callbacks(base_graph); + + if (is_triggered(base_graph)) { + /* if we are already guarded, check if the split reduces the size of + * the problem before continuing with the split */ + if (num_vertices(*lhs) >= num_vertices(base_graph) + && !(suffix && isVacuous(*rhs))) { + DEBUG_PRINTF("split's lhs is no smaller\n"); + return false; + } + + if (num_vertices(*rhs) >= num_vertices(base_graph)) { + DEBUG_PRINTF("split's rhs is no smaller\n"); + return false; + } + } + + bool do_accept = false; + bool do_accept_eod = false; + assert(rhs); + if (isVacuous(*rhs) && suffix) { + if (edge(rhs->start, rhs->accept, *rhs).second) { + DEBUG_PRINTF("rhs has a cliche\n"); + do_accept = true; + remove_edge(rhs->start, rhs->accept, *rhs); + } + + if (edge(rhs->start, rhs->acceptEod, *rhs).second) { + DEBUG_PRINTF("rhs has an eod cliche\n"); + do_accept_eod = true; + remove_edge(rhs->start, rhs->acceptEod, *rhs); + } + + renumber_edges(*rhs); + } + + /* check if we still have a useful graph left over */ + bool do_norm = out_degree(rhs->start, *rhs) != 1; + + set<ReportID> splitter_reports; + for (auto v : splitters) { + insert(&splitter_reports, base_graph[v].reports); + } + + /* find the targets of each source vertex; insertion_ordered_map used to + * preserve deterministic ordering */ + insertion_ordered_map<RoseInVertex, vector<RoseInVertex>> images; + for (const RoseInEdge &e : ee) { + RoseInVertex src = source(e, vg); + RoseInVertex dest = target(e, vg); + images[src].push_back(dest); + remove_edge(e, vg); + } + + map<vector<RoseInVertex>, vector<RoseInVertex>> verts_by_image; + + for (const auto &m : images) { + const auto &u = m.first; + const auto &image = m.second; + + if (contains(verts_by_image, image)) { + for (RoseInVertex v : verts_by_image[image]) { + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + } + continue; + } + + for (const auto &lit : split.lit) { + assert(!bad_mixed_sensitivity(lit)); + + /* don't allow overhang in can_match() as literals should + * correspond to the edge graph being split; overhanging the graph + * would indicate a false path.*/ + if (!can_match(*lhs, lit, false)) { + DEBUG_PRINTF("'%s' did not match lhs\n", + escapeString(lit).c_str()); + continue; + } + + DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); + auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + + /* work out delay later */ + if (do_accept) { + DEBUG_PRINTF("rhs has a cliche\n"); + auto tt = add_vertex(RoseInVertexProps::makeAccept( + splitter_reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (do_accept_eod) { + DEBUG_PRINTF("rhs has an eod cliche\n"); + auto tt = add_vertex(RoseInVertexProps::makeAcceptEod( + splitter_reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (do_norm) { + assert(out_degree(rhs->start, *rhs) > 1); + for (RoseInVertex dest : image) { + add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg); + } + } + verts_by_image[image].push_back(v); + } + } + + assert(hasCorrectlyNumberedVertices(*rhs)); + assert(hasCorrectlyNumberedEdges(*rhs)); + assert(isCorrectlyTopped(*rhs)); + assert(hasCorrectlyNumberedVertices(*lhs)); + assert(hasCorrectlyNumberedEdges(*lhs)); + assert(isCorrectlyTopped(*lhs)); + + return true; +} + +#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ +#define MAX_LEN_2_LITERALS_PER_CUT 3 + +static +bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores, + const map<NFAEdge, set<ue2_literal>> &cut_lits, + u32 min_allowed_length) { + DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), + min_allowed_length); + if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { + return false; + } + + u32 len_2_count = 0; + + for (const auto &cut : cut_lits) { + if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { + DEBUG_PRINTF("cut uses a forbidden edge\n"); + return false; + } + + if (min_len(cut.second) < min_allowed_length) { + DEBUG_PRINTF("cut uses a bad literal\n"); + return false; + } + + for (const auto &lit : cut.second) { + if (lit.length() == 2) { + len_2_count++; + } + } + } + + if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { + return false; + } + + return true; +} + +static +void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, + const vector<RoseInEdge> &to_cut, + const vector<NFAEdge> &cut, + const map<NFAEdge, set<ue2_literal>> &cut_lits) { + DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(), + num_vertices(h)); + + /* create literal vertices and connect preds */ + unordered_set<RoseInVertex> done_sources; + map<RoseInVertex, vector<pair<RoseInVertex, NFAVertex>>> verts_by_source; + for (const RoseInEdge &ve : to_cut) { + assert(&h == &*vg[ve].graph); + RoseInVertex src = source(ve, vg); + if (!done_sources.insert(src).second) { + continue; /* already processed */ + } + + /* iterate over cut for determinism */ + for (const auto &e : cut) { + NFAVertex prev_v = source(e, h); + NFAVertex pivot = target(e, h); + + DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); + unordered_map<NFAVertex, NFAVertex> temp_map; + shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>(); + splitLHS(h, pivot, new_lhs.get(), &temp_map); + + /* want to cut off paths to pivot from things other than the pivot - + * makes a more svelte graphy */ + clear_in_edges(temp_map[pivot], *new_lhs); + NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], + *new_lhs); + if (is_triggered(h) && prev_v == h.start) { + (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); + } + + pruneUseless(*new_lhs, false); + renumber_vertices(*new_lhs); + renumber_edges(*new_lhs); + + DEBUG_PRINTF(" into lhs %s (%zu vertices)\n", + to_string(new_lhs->kind).c_str(), + num_vertices(*new_lhs)); + + assert(hasCorrectlyNumberedVertices(*new_lhs)); + assert(hasCorrectlyNumberedEdges(*new_lhs)); + assert(isCorrectlyTopped(*new_lhs)); + + const set<ue2_literal> &lits = cut_lits.at(e); + for (const auto &lit : lits) { + if (!can_match(*new_lhs, lit, is_triggered(h))) { + continue; + } + + RoseInVertex v + = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + + /* if this is a prefix/infix an edge directly to accept should + * represent a false path as we have poisoned vertices covered + * by the literals. */ + if (generates_callbacks(h)) { + if (edge(pivot, h.accept, h).second) { + DEBUG_PRINTF("adding acceptEod\n"); + /* literal has a direct connection to accept */ + const flat_set<ReportID> &reports = h[pivot].reports; + auto tt = add_vertex( + RoseInVertexProps::makeAccept(reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (edge(pivot, h.acceptEod, h).second) { + assert(generates_callbacks(h)); + DEBUG_PRINTF("adding acceptEod\n"); + /* literal has a direct connection to accept */ + const flat_set<ReportID> &reports = h[pivot].reports; + auto tt = add_vertex( + RoseInVertexProps::makeAcceptEod(reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + } + + add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg); + verts_by_source[src].push_back({v, pivot}); + } + } + } + + /* wire the literal vertices up to successors */ + map<vector<NFAVertex>, shared_ptr<NGHolder> > done_rhs; + for (const RoseInEdge &ve : to_cut) { + RoseInVertex src = source(ve, vg); + RoseInVertex dest = target(ve, vg); + + /* iterate over cut for determinism */ + for (const auto &elem : verts_by_source[src]) { + NFAVertex pivot = elem.second; + RoseInVertex v = elem.first; + + vector<NFAVertex> adj; + insert(&adj, adj.end(), adjacent_vertices(pivot, h)); + /* we can ignore presence of accept, accepteod in adj as it is best + effort */ + + if (!contains(done_rhs, adj)) { + unordered_map<NFAVertex, NFAVertex> temp_map; + shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>(); + splitRHS(h, adj, new_rhs.get(), &temp_map); + remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); + remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); + renumber_edges(*new_rhs); + DEBUG_PRINTF(" into rhs %s (%zu vertices)\n", + to_string(new_rhs->kind).c_str(), + num_vertices(*new_rhs)); + done_rhs.emplace(adj, new_rhs); + assert(isCorrectlyTopped(*new_rhs)); + } + + assert(done_rhs[adj].get()); + shared_ptr<NGHolder> new_rhs = done_rhs[adj]; + + assert(hasCorrectlyNumberedVertices(*new_rhs)); + assert(hasCorrectlyNumberedEdges(*new_rhs)); + assert(isCorrectlyTopped(*new_rhs)); + + if (vg[dest].type == RIV_LITERAL + && !can_match(*new_rhs, vg[dest].s, true)) { + continue; + } + + if (out_degree(new_rhs->start, *new_rhs) != 1) { + add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg); + } + } + + remove_edge(ve, vg); + } +} + +static +bool doNetflowCut(NGHolder &h, + const vector<NFAVertexDepth> *depths, + RoseInGraph &vg, + const vector<RoseInEdge> &ee, bool for_prefix, + const Grey &grey, u32 min_allowed_length = 0U) { + ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength); + + DEBUG_PRINTF("doing netflow cut\n"); + /* TODO: we should really get literals/scores from the full graph as this + * allows us to overlap with previous cuts. */ + assert(!ee.empty()); + assert(&h == &*vg[ee.front()].graph); + assert(!for_prefix || depths); + + if (num_edges(h) > grey.maxRoseNetflowEdges) { + /* We have a limit on this because scoring edges and running netflow + * gets very slow for big graphs. */ + DEBUG_PRINTF("too many edges, skipping netflow cut\n"); + return false; + } + + assert(hasCorrectlyNumberedVertices(h)); + assert(hasCorrectlyNumberedEdges(h)); + + auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey); + + /* Step 1: Get scores for all edges */ + vector<u64a> scores = scoreEdges(h, known_bad); /* scores by edge_index */ + + /* Step 2: Find cutset based on scores */ + vector<NFAEdge> cut = findMinCut(h, scores); + + /* Step 3: Get literals corresponding to cut edges */ + map<NFAEdge, set<ue2_literal>> cut_lits; + for (const auto &e : cut) { + set<ue2_literal> lits = getLiteralSet(h, e); + sanitizeAndCompressAndScore(lits); + + cut_lits[e] = lits; + } + + /* if literals are underlength bail or if it involves a forbidden edge*/ + if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) { + return false; + } + DEBUG_PRINTF("splitting\n"); + + /* Step 4: Split graph based on cuts */ + splitEdgesByCut(h, vg, ee, cut, cut_lits); + + return true; +} + +static +bool deanchorIfNeeded(NGHolder &g) { + DEBUG_PRINTF("hi\n"); + if (proper_out_degree(g.startDs, g)) { + return false; + } + + /* look for a non-special dot with a loop following start */ + set<NFAVertex> succ_g; + insert(&succ_g, adjacent_vertices(g.start, g)); + succ_g.erase(g.startDs); + + for (auto v : adjacent_vertices_range(g.start, g)) { + DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index, + g[v].char_reach.count()); + + if (v == g.startDs || !g[v].char_reach.all()) { + continue; + } + + set<NFAVertex> succ_v; + insert(&succ_v, adjacent_vertices(v, g)); + + if (succ_v == succ_g) { + DEBUG_PRINTF("found ^.*\n"); + for (auto succ : adjacent_vertices_range(g.start, g)) { + if (succ == g.startDs) { + continue; + } + add_edge(g.startDs, succ, g); + } + clear_vertex(v, g); + remove_vertex(v, g); + renumber_vertices(g); + return true; + } + + if (succ_g.size() == 1 && hasSelfLoop(v, g)) { + DEBUG_PRINTF("found ^.+\n"); + add_edge(g.startDs, v, g); + remove_edge(v, v, g); + return true; + } + } + + return false; +} + +static +RoseInGraph populateTrivialGraph(const NGHolder &h) { + RoseInGraph g; + shared_ptr<NGHolder> root_g = cloneHolder(h); + bool orig_anch = isAnchored(*root_g); + orig_anch |= deanchorIfNeeded(*root_g); + + DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); + + auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g); + auto accept = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g); + + add_edge(start, accept, RoseInEdgeProps(root_g, 0), g); + + return g; +} + +static +void avoidOutfixes(RoseInGraph &vg, bool last_chance, + const CompileContext &cc) { + STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); + assert(num_vertices(vg) == 2); + assert(num_edges(vg) == 1); + + RoseInEdge e = *edges(vg).first; + + NGHolder &h = *vg[e].graph; + assert(isCorrectlyTopped(h)); + + renumber_vertices(h); + renumber_edges(h); + + unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, {e}, cc); + + if (split && splitRoseEdge(h, vg, {e}, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return; + } + + if (last_chance) { + /* look for a prefix split as it allows us to accept very weak anchored + * literals. */ + auto depths = calcDepths(h); + + split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); + + if (split && splitRoseEdge(h, vg, {e}, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return; + } + } + + doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); +} + +static +void removeRedundantPrefixes(RoseInGraph &g) { + STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n"); + + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + assert(!g[t].delay); + const ue2_literal &lit = g[t].s; + + if (!literalIsWholeGraph(*g[e].graph, lit)) { + DEBUG_PRINTF("not whole graph\n"); + continue; + } + + if (!isFloating(*g[e].graph)) { + DEBUG_PRINTF("not floating\n"); + continue; + } + g[e].graph.reset(); + } +} + +static +u32 maxDelay(const CompileContext &cc) { + if (!cc.streaming) { + return MO_INVALID_IDX; + } + return cc.grey.maxHistoryAvailable; +} + +static +void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, + const CompileContext &cc) { + STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n"); + + vector<RoseInEdge> to_anchor; + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) { + continue; + } + + if (g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + if (g[e].graph_lag) { + /* already removed redundant parts of literals */ + continue; + } + + if (g[e].dfa) { + /* if we removed any more states, we would need to rebuild the + * the dfa which can be time consuming. */ + continue; + } + + assert(!g[t].delay); + const ue2_literal &lit = g[t].s; + + DEBUG_PRINTF("removing states for literal: %s\n", + dumpString(lit).c_str()); + + unique_ptr<NGHolder> h = cloneHolder(*g[e].graph); + const u32 max_delay = maxDelay(cc); + + u32 delay = removeTrailingLiteralStates(*h, lit, max_delay, + false /* can't overhang start */); + + DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay); + + if (edge(h->startDs, h->accept, *h).second) { + /* we should have delay == lit.length(), but in really complex + * cases we may fail to identify that we can remove the whole + * graph. Regardless, the fact that sds is wired to accept means the + * graph serves no purpose. */ + DEBUG_PRINTF("whole graph\n"); + g[e].graph.reset(); + continue; + } + + if (delay == lit.length() && edge(h->start, h->accept, *h).second + && num_vertices(*h) == N_SPECIALS) { + to_anchor.push_back(e); + continue; + } + + /* if we got here we should still have an interesting graph */ + assert(delay == max_delay || num_vertices(*h) > N_SPECIALS); + + if (delay && delay != MO_INVALID_IDX) { + DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); + + g[e].graph = move(h); + g[e].graph_lag = delay; + } + } + + if (!to_anchor.empty()) { + RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g); + + for (RoseInEdge e : to_anchor) { + DEBUG_PRINTF("rehoming to anchor\n"); + RoseInVertex v = target(e, g); + add_edge(anch, v, g); + remove_edge(e, g); + } + } +} + +static +bool isStarCliche(const NGHolder &g) { + DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); + + bool nonspecials_seen = false; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + if (nonspecials_seen) { + return false; + } + nonspecials_seen = true; + + if (!g[v].char_reach.all()) { + return false; + } + + if (!hasSelfLoop(v, g)) { + return false; + } + if (!edge(v, g.accept, g).second) { + return false; + } + } + + if (!nonspecials_seen) { + return false; + } + + if (!edge(g.start, g.accept, g).second) { + return false; + } + + return true; +} + +static +void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, + const vector<RoseInEdge> &ee, + const CompileContext &cc) { + /* TODO: This could be better by not creating a separate graph for each + * successor literal. This would require using distinct report ids and also + * taking into account overlap of successor literals. */ + + set<ue2_literal> preds; + set<ue2_literal> succs; + for (const RoseInEdge &e : ee) { + RoseInVertex u = source(e, ig); + assert(ig[u].type == RIV_LITERAL); + assert(!ig[u].delay); + preds.insert(ig[u].s); + + RoseInVertex v = target(e, ig); + assert(ig[v].type == RIV_LITERAL); + assert(!ig[v].delay); + succs.insert(ig[v].s); + + if (ig[e].graph_lag) { + /* already removed redundant parts of literals */ + return; + } + + assert(!ig[e].dfa); + } + + map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */ + + for (const ue2_literal &right : succs) { + size_t max_overlap = 0; + for (const ue2_literal &left : preds) { + size_t overlap = maxOverlap(left, right, 0); + ENSURE_AT_LEAST(&max_overlap, overlap); + } + + u32 max_allowed_delay = right.length() - max_overlap; + + if (cc.streaming) { + LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); + } + + if (!max_allowed_delay) { + continue; + } + + shared_ptr<NGHolder> h_new = cloneHolder(h); + + u32 delay = removeTrailingLiteralStates(*h_new, right, + max_allowed_delay); + + if (delay == MO_INVALID_IDX) { + /* successor literal could not match infix -> ignore false path */ + assert(0); + continue; + } + + if (!delay) { + /* unable to trim graph --> no point swapping to new holder */ + continue; + } + + assert(isCorrectlyTopped(*h_new)); + graphs[right] = make_pair(h_new, delay); + } + + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, ig); + const ue2_literal &succ = ig[v].s; + if (!contains(graphs, succ)) { + continue; + } + + ig[e].graph = graphs[succ].first; + ig[e].graph_lag = graphs[succ].second; + + if (isStarCliche(*ig[e].graph)) { + DEBUG_PRINTF("is a X star!\n"); + ig[e].graph.reset(); + ig[e].graph_lag = 0; + } + } +} + +static +void removeRedundantLiteralsFromInfixes(RoseInGraph &g, + const CompileContext &cc) { + insertion_ordered_map<NGHolder *, vector<RoseInEdge>> infixes; + + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + assert(!g[t].delay); + if (g[e].dfa) { + /* if we removed any more states, we would need to rebuild the + * the dfa which can be time consuming. */ + continue; + } + + NGHolder *h = g[e].graph.get(); + infixes[h].push_back(e); + } + + for (const auto &m : infixes) { + NGHolder *h = m.first; + const auto &edges = m.second; + removeRedundantLiteralsFromInfix(*h, g, edges, cc); + } +} + +static +void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { + removeRedundantLiteralsFromPrefixes(g, cc); + removeRedundantLiteralsFromInfixes(g, cc); +} + +static +RoseInVertex getStart(RoseInGraph &vg) { + for (RoseInVertex v : vertices_range(vg)) { + if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { + return v; + } + } + assert(0); + return RoseInGraph::null_vertex(); +} + +/** + * Finds the initial accept vertex created to which suffix/outfixes are + * attached. + */ +static +RoseInVertex getPrimaryAccept(RoseInGraph &vg) { + for (RoseInVertex v : vertices_range(vg)) { + if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) { + return v; + } + } + assert(0); + return RoseInGraph::null_vertex(); +} + +static +bool willBeTransient(const depth &max_depth, const CompileContext &cc) { + if (!cc.streaming) { + return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH); + } else { + return max_depth <= depth(cc.grey.maxHistoryAvailable + 1); + } +} + +static +bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) { + return max_depth <= depth(grey.maxAnchoredRegion); +} + +static +unique_ptr<NGHolder> make_chain(u32 count) { + assert(count); + + auto rv = std::make_unique<NGHolder>(NFA_INFIX); + + NGHolder &h = *rv; + + NFAVertex u = h.start; + for (u32 i = 0; i < count; i++) { + NFAVertex v = add_vertex(h); + h[v].char_reach = CharReach::dot(); + add_edge(u, v, h); + u = v; + } + h[u].reports.insert(0); + add_edge(u, h.accept, h); + + setTops(h); + + return rv; +} + +#define SHORT_TRIGGER_LEN 16 + +static +bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, + const vector<RoseInEdge> &ee, + const CompileContext &cc) { + /* check max width and literal lengths to see if possible */ + size_t min_lit = (size_t)~0ULL; + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, vg); + LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length()); + } + + if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) { + return false; + } + + depth max_width = findMaxWidth(h); + + u32 delta = min_lit - SHORT_TRIGGER_LEN; + + if (!willBeTransient(max_width - depth(delta), cc) + && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) { + return false; + } + + DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit); + DEBUG_PRINTF("delta = %u\n", delta); + + /* try split */ + map<RoseInVertex, shared_ptr<NGHolder> > graphs; + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, vg); + + shared_ptr<NGHolder> h_new = cloneHolder(h); + + u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta); + + DEBUG_PRINTF("delay %u\n", delay); + + if (delay != delta) { + DEBUG_PRINTF("unable to trim literal\n"); + return false; + } + + if (in_degree(v, vg) != 1) { + DEBUG_PRINTF("complicated\n"); + return false; + } + + DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new)); + assert(willBeTransient(findMaxWidth(*h_new), cc) + || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); + + assert(isCorrectlyTopped(*h_new)); + graphs[v] = h_new; + } + + /* add .{repeats} from prefixes to long literals */ + for (const RoseInEdge &e : ee) { + RoseInVertex s = source(e, vg); + RoseInVertex t = target(e, vg); + + remove_edge(e, vg); + const ue2_literal &orig_lit = vg[t].s; + + ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta); + + ue2_literal lit2(orig_lit.end() - delta, orig_lit.end()); + + assert(lit.length() + delta == orig_lit.length()); + + vg[t].s = lit2; + + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg); + add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg); + } + + DEBUG_PRINTF("success\n"); + /* TODO: alter split point to avoid pathological splits */ + return true; +} + +static +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay, const vector<NFAVertex> &preds) { + assert(delay <= lit.length()); + assert(isCorrectlyTopped(g)); + DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); + + NFAVertex prev = g.accept; + auto it = lit.rbegin(); + while (delay--) { + NFAVertex curr = add_vertex(g); + assert(it != lit.rend()); + g[curr].char_reach = *it; + add_edge(curr, prev, g); + ++it; + prev = curr; + } + + for (auto v : preds) { + NFAEdge e = add_edge_if_not_present(v, prev, g); + if (v == g.start && is_triggered(g)) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + // Every predecessor of accept must have a report. + set_report(g, 0); + + renumber_vertices(g); + renumber_edges(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); +} + +static +void restoreTrailingLiteralStates(NGHolder &g, + const vector<pair<ue2_literal, u32>> &lits) { + vector<NFAVertex> preds; + insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); + clear_in_edges(g.accept, g); + + for (auto v : preds) { + g[v].reports.clear(); /* clear report from old accepts */ + } + + for (const auto &p : lits) { + const ue2_literal &lit = p.first; + u32 delay = p.second; + + restoreTrailingLiteralStates(g, lit, delay, preds); + } +} + +static +bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, + const CompileContext &cc) { + DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, + num_vertices(h)); + assert(isCorrectlyTopped(h)); + + renumber_vertices(h); + renumber_edges(h); + + auto depths = calcDepths(h); + + /* If the reason the prefix is not transient is due to a very long literal + * following, we can make it transient by restricting ourselves to using + * just the head of the literal. */ + if (makeTransientFromLongLiteral(h, vg, ee, cc)) { + return true; + } + + auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc); + + if (split && (split->creates_transient || split->creates_anchored) + && splitRoseEdge(h, vg, ee, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + /* large back edges may prevent us identifing anchored or transient cases + * properly - use a simple walk instead */ + + if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) { + return true; + } + + if (split && splitRoseEdge(h, vg, ee, *split)) { + /* use the simple split even though it doesn't create a transient + * prefix */ + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + /* look for netflow cuts which don't produce good prefixes */ + if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) { + return true; + } + + if (ee.size() > 1) { + DEBUG_PRINTF("split the prefix apart based on succ literals\n"); + unordered_map<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> >, NGHolderHasher, NGHolderEqual> trimmed; - - for (const auto &e : ee) { - shared_ptr<NGHolder> hh = cloneHolder(h); - auto succ_lit = vg[target(e, vg)].s; - assert(isCorrectlyTopped(*hh)); - u32 delay = removeTrailingLiteralStates(*hh, succ_lit, - succ_lit.length(), - false /* can't overhang start */); - if (!delay) { - DEBUG_PRINTF("could not remove any literal, skip over\n"); - continue; - } - - assert(isCorrectlyTopped(*hh)); - trimmed[hh].emplace_back(e, delay); - } - - if (trimmed.size() == 1) { - return false; - } - - /* shift the contents to a vector so we can modify the graphs without - * violating the map's invariants. */ - vector<pair<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> > > > - trimmed_vec(trimmed.begin(), trimmed.end()); - trimmed.clear(); - for (auto &elem : trimmed_vec) { - shared_ptr<NGHolder> &hp = elem.first; - vector<pair<ue2_literal, u32>> succ_lits; - - for (const auto &edge_delay : elem.second) { - const RoseInEdge &e = edge_delay.first; - u32 delay = edge_delay.second; - auto lit = vg[target(e, vg)].s; - - vg[e].graph = hp; - assert(delay <= lit.length()); - succ_lits.emplace_back(lit, delay); - } - restoreTrailingLiteralStates(*hp, succ_lits); - } - return true; - } - - return false; -} - -#define MAX_FIND_BETTER_PREFIX_GEN 4 -#define MAX_FIND_BETTER_PREFIX_COUNT 100 - -static -void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { - STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n"); - RoseInVertex start = getStart(vg); - - insertion_ordered_map<NGHolder *, vector<RoseInEdge>> prefixes; - bool changed; - u32 gen = 0; - do { - DEBUG_PRINTF("gen %u\n", gen); - changed = false; - prefixes.clear(); - - /* find prefixes */ - for (const RoseInEdge &e : out_edges_range(start, vg)) { - /* outfixes shouldn't have made it this far */ - assert(vg[target(e, vg)].type == RIV_LITERAL); - if (vg[e].graph) { - NGHolder *h = vg[e].graph.get(); - prefixes[h].push_back(e); - } - } - - if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) { - break; - } - - /* look for bad prefixes and try to split */ - for (const auto &m : prefixes) { - NGHolder *h = m.first; - const auto &edges = m.second; - depth max_width = findMaxWidth(*h); - if (willBeTransient(max_width, cc) - || willBeAnchoredTable(max_width, cc.grey)) { - continue; - } - - changed = improvePrefix(*h, vg, edges, cc); - } - } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); -} - -#define STRONG_LITERAL_LENGTH 20 -#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 - -static -bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, - const vector<RoseInEdge> &ee, - const CompileContext &cc) { - DEBUG_PRINTF("looking for string literal\n"); - unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); - - if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) { - DEBUG_PRINTF("splitting simple literal\n"); - return splitRoseEdge(h, vg, ee, *split); - } - - return false; -} - -static -void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { - if (!cc.grey.violetExtractStrongLiterals) { - return; - } - - STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); - - unordered_set<NGHolder *> stuck; - insertion_ordered_map<NGHolder *, vector<RoseInEdge>> edges_by_graph; - bool changed; - - do { - changed = false; - - edges_by_graph.clear(); - for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[source(ve, vg)].type != RIV_LITERAL) { - continue; - } - - if (vg[ve].graph) { - NGHolder *h = vg[ve].graph.get(); - edges_by_graph[h].push_back(ve); - } - } - - if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) { - DEBUG_PRINTF("too many graphs, stopping\n"); - return; - } - - for (const auto &m : edges_by_graph) { - NGHolder *g = m.first; - const auto &edges = m.second; - if (contains(stuck, g)) { - DEBUG_PRINTF("already known to be bad\n"); - continue; - } - bool rv = extractStrongLiteral(*g, vg, edges, cc); - if (rv) { - changed = true; - } else { - stuck.insert(g); - } - } - } while (changed); -} - -#define INFIX_STRONG_GUARD_LEN 8 -#define INFIX_MIN_SPLIT_LITERAL_LEN 12 - -static -bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, - const CompileContext &cc) { - unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); - - if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN - && splitRoseEdge(h, vg, ee, *split)) { - DEBUG_PRINTF("splitting simple literal\n"); - return true; - } - - DEBUG_PRINTF("trying for a netflow cut\n"); - /* look for netflow cuts which don't produce good prefixes */ - bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8); - - DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv); - - return rv; -} - -/** - * Infixes which are weakly guarded can, in effect, act like prefixes as they - * will often be live. We should try to split these infixes further if they - * contain strong literals so that we are at least running smaller weak infixes - * which can hopeful be accelerated/miracled. - */ -static -void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { - if (!cc.grey.violetAvoidWeakInfixes) { - return; - } - STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n"); - - RoseInVertex start = getStart(vg); - - unordered_set<NGHolder *> weak; - - for (RoseInVertex vv : adjacent_vertices_range(start, vg)) { - /* outfixes shouldn't have made it this far */ - assert(vg[vv].type == RIV_LITERAL); - if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) { - continue; - } - - for (const RoseInEdge &e : out_edges_range(vv, vg)) { - if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) { - continue; - } - - NGHolder *h = vg[e].graph.get(); - DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h); - weak.insert(h); - } - } - - insertion_ordered_map<NGHolder *, vector<RoseInEdge>> weak_edges; - for (const RoseInEdge &ve : edges_range(vg)) { - NGHolder *h = vg[ve].graph.get(); - if (contains(weak, h)) { - weak_edges[h].push_back(ve); - } - } - - for (const auto &m : weak_edges) { - NGHolder *h = m.first; - const auto &edges = m.second; - improveInfix(*h, vg, edges, cc); - } -} - -static -void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, - const vector<RoseInEdge> &ee, const VertLitInfo &split, - bool eod, const flat_set<ReportID> &reports) { - const vector<NFAVertex> &splitters = split.vv; - assert(!splitters.empty()); - - shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); - unordered_map<NFAVertex, NFAVertex> v_map; - cloneHolder(*lhs, base_graph, &v_map); - lhs->kind = NFA_INFIX; - clear_in_edges(lhs->accept, *lhs); - clear_in_edges(lhs->acceptEod, *lhs); - add_edge(lhs->accept, lhs->acceptEod, *lhs); - clearReports(*lhs); - for (NFAVertex v : splitters) { - NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); - if (v == base_graph.start) { - (*lhs)[e].tops.insert(DEFAULT_TOP); - } - (*lhs)[v_map[v]].reports.insert(0); - - } - pruneUseless(*lhs); - assert(isCorrectlyTopped(*lhs)); - - /* create literal vertices and connect preds */ - for (const auto &lit : split.lit) { - if (!can_match(*lhs, lit, is_triggered(*lhs))) { - continue; - } - - DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); - - RoseInVertex tt; - if (eod) { - DEBUG_PRINTF("doing eod\n"); - tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg); - } else { - DEBUG_PRINTF("doing non-eod\n"); - tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg); - } - add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); - - for (const RoseInEdge &e : ee) { - RoseInVertex u = source(e, vg); - assert(!edge(u, v, vg).second); - add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); - } - } -} - -#define MIN_SUFFIX_LEN 6 - -static -bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, - const vector<RoseInEdge> &suffix_edges, - const CompileContext &cc) { - DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h, - suffix_edges.size()); - /* - * We would, in general, rather not have output exposed engines because - * once they are triggered, they must be run while infixes only have to run - * if the successor literal is seen. Matches from output exposed engines - * also have to be placed in a priority queue and interleaved with matches - * from other sources. - * - * Note: - * - if the LHS is extremely unlikely we may be better off leaving - * a suffix unguarded. - * - * - limited width suffixes may be less bad as they won't be continuously - * active, we may want to have (a) stronger controls on if we want to pick - * a trailing literal in these cases and/or (b) look also for literals - * near accept as well as right on accept - * - * TODO: improve heuristics, splitting logic. - */ - - /* we may do multiple splits corresponding to different report behaviour */ - set<NFAVertex> seen; - map<pair<bool, flat_set<ReportID> >, VertLitInfo> by_reports; /* eod, rep */ - - for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) { - set<ue2_literal> ss = getLiteralSet(h, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too shitty\n"); - return false; - } - - VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)]; - insert(&vli.lit, ss); - vli.vv.push_back(v); - seen.insert(v); - } - - seen.insert(h.accept); - for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) { - if (contains(seen, v)) { - continue; - } - - set<ue2_literal> ss = getLiteralSet(h, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too shitty\n"); - return false; - } - - VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)]; - insert(&vli.lit, ss); - vli.vv.push_back(v); - } - - assert(!by_reports.empty()); - - /* TODO: how strong a min len do we want here ? */ - u32 min_len = cc.grey.minRoseLiteralLength; - ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); - - for (auto &vli : by_reports | map_values) { - u64a score = sanitizeAndCompressAndScore(vli.lit); - - if (vli.lit.empty() - || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, - false, false)) { - return false; - } - } - - for (const auto &info : by_reports) { - DEBUG_PRINTF("splitting on simple literals\n"); - splitEdgesForSuffix(h, vg, suffix_edges, info.second, - info.first.first /* eod */, - info.first.second /* reports */); - } - - for (const RoseInEdge &e : suffix_edges) { - remove_edge(e, vg); - } - return true; -} - -static -void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { - if (!cc.grey.violetAvoidSuffixes) { - return; - } - - STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n"); - - RoseInVertex accept = getPrimaryAccept(vg); - - insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> suffixes; - - /* find suffixes */ - for (const RoseInEdge &e : in_edges_range(accept, vg)) { - /* outfixes shouldn't have made it this far */ - assert(vg[source(e, vg)].type == RIV_LITERAL); - assert(vg[e].graph); /* non suffix paths should be wired to other - accepts */ - const NGHolder *h = vg[e].graph.get(); - suffixes[h].push_back(e); - } - - /* look at suffixes and try to split */ - for (const auto &m : suffixes) { - const NGHolder *h = m.first; - const auto &edges = m.second; - replaceSuffixWithInfix(*h, vg, edges, cc); - } -} - -static -bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { - if (out_degree(h.start, h) != 3) { - return false; - } - - NFAVertex v = NGHolder::null_vertex(); - NFAVertex ds = NGHolder::null_vertex(); - - for (NFAVertex a : adjacent_vertices_range(h.start, h)) { - if (a == h.startDs) { - continue; - } - if (h[a].char_reach.all()) { - ds = a; - if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) { - return false; - } - } else { - v = a; - } - } - - if (!v || !ds || !edge(ds, v, h).second) { - return false; - } - - if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { - return false; - } - - ue2_literal lit; - lit.push_back(h[v].char_reach.find_first(), - h[v].char_reach.isCaselessChar()); - while (out_degree(v, h) == 1) { - NFAVertex vv = *adjacent_vertices(v, h).first; - if (h[vv].char_reach.count() != 1 - && !h[vv].char_reach.isCaselessChar()) { - break; - } - - v = vv; - - lit.push_back(h[v].char_reach.find_first(), - h[v].char_reach.isCaselessChar()); - } - - if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) { - /* we have rediscovered the post-infix literal */ - return false; - } - - if (bad_mixed_sensitivity(lit)) { - make_nocase(&lit); - } - - DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); - out->vv = {v}; - out->lit = {lit}; - return true; -} - -static -bool lookForDoubleCut(const NGHolder &h, const vector<RoseInEdge> &ee, - RoseInGraph &vg, const Grey &grey) { - VertLitInfo info; - if (!leadingDotStartLiteral(h, &info) - || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { - return false; - } - DEBUG_PRINTF("performing split\n"); - return splitRoseEdge(h, vg, ee, {info}); -} - -static -void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { - if (!cc.grey.violetDoubleCut) { - return; - } - - insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; - for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { - const NGHolder *h = vg[ve].graph.get(); - right_edges[h].push_back(ve); - } - } - - for (const auto &m : right_edges) { - const NGHolder *h = m.first; - const auto &edges = m.second; - lookForDoubleCut(*h, edges, vg, cc.grey); - } -} - -static -pair<NFAVertex, ue2_literal> findLiteralBefore(const NGHolder &h, NFAVertex v) { - ue2_literal lit; - if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { - return {v, std::move(lit) }; - } - lit.push_back(h[v].char_reach.find_first(), - h[v].char_reach.isCaselessChar()); - - while (in_degree(v, h) == 1) { - NFAVertex vv = *inv_adjacent_vertices(v, h).first; - if (h[vv].char_reach.count() != 1 - && !h[vv].char_reach.isCaselessChar()) { - break; - } - - lit.push_back(h[vv].char_reach.find_first(), - h[vv].char_reach.isCaselessChar()); - v = vv; - } - - return {v, std::move(lit) }; -} - -static -bool lookForDotStarPred(NFAVertex v, const NGHolder &h, - NFAVertex *u, NFAVertex *ds) { - *u = NGHolder::null_vertex(); - *ds = NGHolder::null_vertex(); - for (NFAVertex a : inv_adjacent_vertices_range(v, h)) { - if (h[a].char_reach.all()) { - if (!edge(a, a, h).second) { - return false; - } - - if (*ds) { - return false; - } - - *ds = a; - } else { - if (*u) { - return false; - } - *u = a; - } - } - - if (!*u || !*ds) { - return false; - } - - return true; -} - -static -bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { - /* Note: there is no delay yet - so the final literal is the already - * discovered successor literal - we are in fact interested in the literal - * before it. */ - - if (in_degree(h.accept, h) != 1) { - return false; - } - - if (in_degree(h.acceptEod, h) != 1) { - assert(0); - return false; - } - - NFAVertex v - = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first; - - NFAVertex u; - NFAVertex ds; - - if (!lookForDotStarPred(v, h, &u, &ds)) { - return false; - } - - v = u; - auto rv = findLiteralBefore(h, v); - - if (!lookForDotStarPred(v, h, &u, &ds)) { - return false; - } - - ue2_literal lit = reverse_literal(rv.second); - DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); - - if (bad_mixed_sensitivity(lit)) { - make_nocase(&lit); - } - - out->vv = {v}; - out->lit = {lit}; - return true; -} - -static -bool lookForTrailingLiteralDotStar(const NGHolder &h, - const vector<RoseInEdge> &ee, - RoseInGraph &vg, const Grey &grey) { - VertLitInfo info; - if (!trailingDotStarLiteral(h, &info) - || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { - return false; - } - DEBUG_PRINTF("performing split\n"); - return splitRoseEdge(h, vg, ee, info); -} - -/* In streaming mode, active engines have to be caught up at stream boundaries - * and have to be stored in stream state, so we prefer to decompose patterns - * in to literals with no state between them if possible. */ -static -void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) { - if (!cc.grey.violetLiteralChains) { - return; - } - - insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; - bool changed; - do { - changed = false; - - right_edges.clear(); - for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { - const NGHolder *h = vg[ve].graph.get(); - right_edges[h].push_back(ve); - } - } - - for (const auto &m : right_edges) { - const NGHolder *h = m.first; - const vector<RoseInEdge> &ee = m.second; - bool rv = lookForDoubleCut(*h, ee, vg, cc.grey); - if (!rv && h->kind != NFA_SUFFIX) { - rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey); - } - changed |= rv; - } - } while (changed); -} - -static -bool lookForCleanSplit(const NGHolder &h, const vector<RoseInEdge> &ee, - RoseInGraph &vg, const CompileContext &cc) { - unique_ptr<VertLitInfo> split = findBestCleanSplit(h, cc); - - if (split) { - return splitRoseEdge(h, vg, {ee}, *split); - } - - return false; -} - -#define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4 - -static -void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { - u32 gen = 0; - - insertion_ordered_set<RoseInVertex> prev({getStart(vg)}); - insertion_ordered_set<RoseInVertex> curr; - - while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) { - curr.clear(); - for (RoseInVertex u : prev) { - for (auto v : adjacent_vertices_range(u, vg)) { - curr.insert(v); - } - } - - insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> rightfixes; - for (RoseInVertex v : curr) { - for (const RoseInEdge &e : out_edges_range(v, vg)) { - if (vg[e].graph) { - NGHolder *h = vg[e].graph.get(); - rightfixes[h].push_back(e); - } - } - } - - for (const auto &m : rightfixes) { - const NGHolder *h = m.first; - const auto &edges = m.second; - lookForCleanSplit(*h, edges, vg, cc); - } - - prev = std::move(curr); - gen++; - } -} - -static -void rehomeEodSuffixes(RoseInGraph &vg) { - // Find edges to accept with EOD-anchored graphs that we can move over to - // acceptEod. - vector<RoseInEdge> acc_edges; - for (const auto &e : edges_range(vg)) { - if (vg[target(e, vg)].type != RIV_ACCEPT) { - continue; - } - if (vg[e].haig || !vg[e].graph) { - continue; - } - - const NGHolder &h = *vg[e].graph; - - if (in_degree(h.accept, h)) { - DEBUG_PRINTF("graph isn't eod anchored\n"); - continue; - } - - acc_edges.push_back(e); - } - - for (const RoseInEdge &e : acc_edges) { - // Move this edge from accept to acceptEod - RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg); - add_edge(source(e, vg), w, vg[e], vg); - remove_edge(e, vg); - } - - /* old accept vertices will be tidied up by final pruneUseless() call */ -} - -static -bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { - switch (h.kind) { - case NFA_OUTFIX: /* 'prefix' of eod */ - case NFA_PREFIX: - return cc.grey.earlyMcClellanPrefix; - case NFA_INFIX: - return cc.grey.earlyMcClellanInfix; - case NFA_SUFFIX: - return cc.grey.earlyMcClellanSuffix; - default: - DEBUG_PRINTF("kind %u\n", (u32)h.kind); - assert(0); - return false; - } -} - -static -vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg, - const vector<RoseInEdge> &edges, - bool *single_trigger) { - vector<vector<CharReach>> triggers; - u32 min_offset = ~0U; - u32 max_offset = 0; - for (const auto &e : edges) { - RoseInVertex s = source(e, vg); - if (vg[s].type == RIV_LITERAL) { - triggers.push_back(as_cr_seq(vg[s].s)); - } - ENSURE_AT_LEAST(&max_offset, vg[s].max_offset); - LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset); - } - - *single_trigger = min_offset == max_offset; - DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset); - - return triggers; -} - -static -bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, - const vector<RoseInEdge> &edges, bool final_chance, - const ReportManager &rm, const CompileContext &cc) { - DEBUG_PRINTF("trying for dfa\n"); - - bool single_trigger; - for (const auto &e : edges) { - if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) { - /* TODO: support eod prefixes */ - return false; - } - } - - auto triggers = getDfaTriggers(vg, edges, &single_trigger); - - /* TODO: literal delay things */ - if (!generates_callbacks(h)) { - set_report(h, rose.getNewNfaReport()); - } - - shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers, - cc.grey, final_chance); - - if (!dfa) { - return false; - } - - DEBUG_PRINTF("dfa ok\n"); - for (const auto &e : edges) { - vg[e].dfa = dfa; - } - - return true; -} - -#define MAX_EDGES_FOR_IMPLEMENTABILITY 50 - -static -bool splitForImplementability(RoseInGraph &vg, NGHolder &h, - const vector<RoseInEdge> &edges, - const CompileContext &cc) { - vector<pair<ue2_literal, u32>> succ_lits; - DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", - to_string(h.kind).c_str(), num_vertices(h), edges.size()); - - if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) { - return false; - } - - if (!generates_callbacks(h)) { - for (const auto &e : edges) { - const auto &lit = vg[target(e, vg)].s; - u32 delay = vg[e].graph_lag; - vg[e].graph_lag = 0; - - assert(delay <= lit.length()); - succ_lits.emplace_back(lit, delay); - } - restoreTrailingLiteralStates(h, succ_lits); - } - - unique_ptr<VertLitInfo> split; - bool last_chance = true; - if (h.kind == NFA_PREFIX) { - auto depths = calcDepths(h); - - split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); - } else { - split = findBestLastChanceSplit(h, vg, edges, cc); - } - - if (split && splitRoseEdge(h, vg, edges, *split)) { - DEBUG_PRINTF("split on simple literal\n"); - return true; - } - - DEBUG_PRINTF("trying to netflow\n"); - bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); - DEBUG_PRINTF("done\n"); - - return rv; -} - -#define MAX_IMPLEMENTABLE_SPLITS 50 - -bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, - bool final_chance, const ReportManager &rm, - const CompileContext &cc) { - DEBUG_PRINTF("checking for impl %d\n", final_chance); - bool changed = false; - bool need_to_recalc = false; - u32 added_count = 0; - unordered_set<shared_ptr<NGHolder>> good; /* known to be implementable */ - do { - changed = false; - DEBUG_PRINTF("added %u\n", added_count); - insertion_ordered_map<shared_ptr<NGHolder>, - vector<RoseInEdge>> edges_by_graph; - for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[ve].graph && !vg[ve].dfa) { - auto &h = vg[ve].graph; - edges_by_graph[h].push_back(ve); - } - } - for (auto &m : edges_by_graph) { - auto &h = m.first; - if (contains(good, h)) { - continue; - } - reduceGraphEquivalences(*h, cc); - if (isImplementableNFA(*h, &rm, cc)) { - good.insert(h); - continue; - } - - const auto &edges = m.second; - - if (tryForEarlyDfa(*h, cc) && - doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { - continue; - } - - DEBUG_PRINTF("eek\n"); - if (!allow_changes) { - return false; - } - - if (splitForImplementability(vg, *h, edges, cc)) { - added_count++; - if (added_count > MAX_IMPLEMENTABLE_SPLITS) { - DEBUG_PRINTF("added_count hit limit\n"); - return false; - } - changed = true; - continue; - } - - return false; - } - - assert(added_count <= MAX_IMPLEMENTABLE_SPLITS); - - if (changed) { - removeRedundantLiterals(vg, cc); - pruneUseless(vg); - need_to_recalc = true; - } - } while (changed); - - if (need_to_recalc) { - renumber_vertices(vg); - calcVertexOffsets(vg); - } - - DEBUG_PRINTF("ok!\n"); - return true; -} - -static -RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, - const CompileContext &cc) { - assert(!can_never_match(h)); - - RoseInGraph vg = populateTrivialGraph(h); - - if (!cc.grey.allowViolet) { - return vg; - } - - /* Avoid running the Violet analysis at all on graphs with no vertices with - * small reach, since we will not be able to extract any literals. */ - if (!hasNarrowReachVertex(h)) { - DEBUG_PRINTF("fail, no vertices with small reach\n"); - return vg; - } - - DEBUG_PRINTF("hello world\n"); - - /* Step 1: avoid outfixes as we always have to run them. */ - avoidOutfixes(vg, last_chance, cc); - - if (num_vertices(vg) <= 2) { - return vg; /* unable to transform pattern */ - } - - removeRedundantPrefixes(vg); - dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot"); - - /* Step 2: avoid non-transient prefixes (esp in streaming mode) */ - findBetterPrefixes(vg, cc); - - dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot"); - - extractStrongLiterals(vg, cc); - dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot"); - improveWeakInfixes(vg, cc); - dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot"); - - /* Step 3: avoid output exposed engines if there is a strong trailing - literal) */ - avoidSuffixes(vg, cc); - - /* Step 4: look for infixes/suffixes with leading .*literals - * This can reduce the amount of work a heavily picked literal has to do and - * reduce the amount of state used as .* is handled internally to rose. */ - lookForDoubleCut(vg, cc); - - if (cc.streaming) { - lookForCleanEarlySplits(vg, cc); - decomposeLiteralChains(vg, cc); - } - - rehomeEodSuffixes(vg); - removeRedundantLiterals(vg, cc); - - pruneUseless(vg); - dumpPreRoseGraph(vg, cc.grey); - renumber_vertices(vg); - calcVertexOffsets(vg); - - return vg; -} - -bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, - bool last_chance, const ReportManager &rm, - const CompileContext &cc) { - auto vg = doInitialVioletTransform(h, last_chance, cc); - if (num_vertices(vg) <= 2) { - return false; - } - - /* Step 5: avoid unimplementable, or overly large engines if possible */ - if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { - return false; - } - dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot"); - - /* Step 6: send to rose */ - bool rv = rose.addRose(vg, prefilter); - DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); - return rv; -} - -bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - auto vg = doInitialVioletTransform(h, true, cc); - if (num_vertices(vg) <= 2) { - return false; - } - - bool rv = roseCheckRose(vg, prefilter, rm, cc); - DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); - return rv; -} - -} + + for (const auto &e : ee) { + shared_ptr<NGHolder> hh = cloneHolder(h); + auto succ_lit = vg[target(e, vg)].s; + assert(isCorrectlyTopped(*hh)); + u32 delay = removeTrailingLiteralStates(*hh, succ_lit, + succ_lit.length(), + false /* can't overhang start */); + if (!delay) { + DEBUG_PRINTF("could not remove any literal, skip over\n"); + continue; + } + + assert(isCorrectlyTopped(*hh)); + trimmed[hh].emplace_back(e, delay); + } + + if (trimmed.size() == 1) { + return false; + } + + /* shift the contents to a vector so we can modify the graphs without + * violating the map's invariants. */ + vector<pair<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> > > > + trimmed_vec(trimmed.begin(), trimmed.end()); + trimmed.clear(); + for (auto &elem : trimmed_vec) { + shared_ptr<NGHolder> &hp = elem.first; + vector<pair<ue2_literal, u32>> succ_lits; + + for (const auto &edge_delay : elem.second) { + const RoseInEdge &e = edge_delay.first; + u32 delay = edge_delay.second; + auto lit = vg[target(e, vg)].s; + + vg[e].graph = hp; + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); + } + restoreTrailingLiteralStates(*hp, succ_lits); + } + return true; + } + + return false; +} + +#define MAX_FIND_BETTER_PREFIX_GEN 4 +#define MAX_FIND_BETTER_PREFIX_COUNT 100 + +static +void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { + STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n"); + RoseInVertex start = getStart(vg); + + insertion_ordered_map<NGHolder *, vector<RoseInEdge>> prefixes; + bool changed; + u32 gen = 0; + do { + DEBUG_PRINTF("gen %u\n", gen); + changed = false; + prefixes.clear(); + + /* find prefixes */ + for (const RoseInEdge &e : out_edges_range(start, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[target(e, vg)].type == RIV_LITERAL); + if (vg[e].graph) { + NGHolder *h = vg[e].graph.get(); + prefixes[h].push_back(e); + } + } + + if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) { + break; + } + + /* look for bad prefixes and try to split */ + for (const auto &m : prefixes) { + NGHolder *h = m.first; + const auto &edges = m.second; + depth max_width = findMaxWidth(*h); + if (willBeTransient(max_width, cc) + || willBeAnchoredTable(max_width, cc.grey)) { + continue; + } + + changed = improvePrefix(*h, vg, edges, cc); + } + } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); +} + +#define STRONG_LITERAL_LENGTH 20 +#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 + +static +bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, + const vector<RoseInEdge> &ee, + const CompileContext &cc) { + DEBUG_PRINTF("looking for string literal\n"); + unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); + + if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) { + DEBUG_PRINTF("splitting simple literal\n"); + return splitRoseEdge(h, vg, ee, *split); + } + + return false; +} + +static +void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetExtractStrongLiterals) { + return; + } + + STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); + + unordered_set<NGHolder *> stuck; + insertion_ordered_map<NGHolder *, vector<RoseInEdge>> edges_by_graph; + bool changed; + + do { + changed = false; + + edges_by_graph.clear(); + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[source(ve, vg)].type != RIV_LITERAL) { + continue; + } + + if (vg[ve].graph) { + NGHolder *h = vg[ve].graph.get(); + edges_by_graph[h].push_back(ve); + } + } + + if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) { + DEBUG_PRINTF("too many graphs, stopping\n"); + return; + } + + for (const auto &m : edges_by_graph) { + NGHolder *g = m.first; + const auto &edges = m.second; + if (contains(stuck, g)) { + DEBUG_PRINTF("already known to be bad\n"); + continue; + } + bool rv = extractStrongLiteral(*g, vg, edges, cc); + if (rv) { + changed = true; + } else { + stuck.insert(g); + } + } + } while (changed); +} + +#define INFIX_STRONG_GUARD_LEN 8 +#define INFIX_MIN_SPLIT_LITERAL_LEN 12 + +static +bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, + const CompileContext &cc) { + unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); + + if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN + && splitRoseEdge(h, vg, ee, *split)) { + DEBUG_PRINTF("splitting simple literal\n"); + return true; + } + + DEBUG_PRINTF("trying for a netflow cut\n"); + /* look for netflow cuts which don't produce good prefixes */ + bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8); + + DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv); + + return rv; +} + +/** + * Infixes which are weakly guarded can, in effect, act like prefixes as they + * will often be live. We should try to split these infixes further if they + * contain strong literals so that we are at least running smaller weak infixes + * which can hopeful be accelerated/miracled. + */ +static +void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetAvoidWeakInfixes) { + return; + } + STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n"); + + RoseInVertex start = getStart(vg); + + unordered_set<NGHolder *> weak; + + for (RoseInVertex vv : adjacent_vertices_range(start, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[vv].type == RIV_LITERAL); + if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) { + continue; + } + + for (const RoseInEdge &e : out_edges_range(vv, vg)) { + if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) { + continue; + } + + NGHolder *h = vg[e].graph.get(); + DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h); + weak.insert(h); + } + } + + insertion_ordered_map<NGHolder *, vector<RoseInEdge>> weak_edges; + for (const RoseInEdge &ve : edges_range(vg)) { + NGHolder *h = vg[ve].graph.get(); + if (contains(weak, h)) { + weak_edges[h].push_back(ve); + } + } + + for (const auto &m : weak_edges) { + NGHolder *h = m.first; + const auto &edges = m.second; + improveInfix(*h, vg, edges, cc); + } +} + +static +void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, + const vector<RoseInEdge> &ee, const VertLitInfo &split, + bool eod, const flat_set<ReportID> &reports) { + const vector<NFAVertex> &splitters = split.vv; + assert(!splitters.empty()); + + shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); + unordered_map<NFAVertex, NFAVertex> v_map; + cloneHolder(*lhs, base_graph, &v_map); + lhs->kind = NFA_INFIX; + clear_in_edges(lhs->accept, *lhs); + clear_in_edges(lhs->acceptEod, *lhs); + add_edge(lhs->accept, lhs->acceptEod, *lhs); + clearReports(*lhs); + for (NFAVertex v : splitters) { + NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); + if (v == base_graph.start) { + (*lhs)[e].tops.insert(DEFAULT_TOP); + } + (*lhs)[v_map[v]].reports.insert(0); + + } + pruneUseless(*lhs); + assert(isCorrectlyTopped(*lhs)); + + /* create literal vertices and connect preds */ + for (const auto &lit : split.lit) { + if (!can_match(*lhs, lit, is_triggered(*lhs))) { + continue; + } + + DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + + RoseInVertex tt; + if (eod) { + DEBUG_PRINTF("doing eod\n"); + tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg); + } else { + DEBUG_PRINTF("doing non-eod\n"); + tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg); + } + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + + for (const RoseInEdge &e : ee) { + RoseInVertex u = source(e, vg); + assert(!edge(u, v, vg).second); + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + } + } +} + +#define MIN_SUFFIX_LEN 6 + +static +bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, + const vector<RoseInEdge> &suffix_edges, + const CompileContext &cc) { + DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h, + suffix_edges.size()); + /* + * We would, in general, rather not have output exposed engines because + * once they are triggered, they must be run while infixes only have to run + * if the successor literal is seen. Matches from output exposed engines + * also have to be placed in a priority queue and interleaved with matches + * from other sources. + * + * Note: + * - if the LHS is extremely unlikely we may be better off leaving + * a suffix unguarded. + * + * - limited width suffixes may be less bad as they won't be continuously + * active, we may want to have (a) stronger controls on if we want to pick + * a trailing literal in these cases and/or (b) look also for literals + * near accept as well as right on accept + * + * TODO: improve heuristics, splitting logic. + */ + + /* we may do multiple splits corresponding to different report behaviour */ + set<NFAVertex> seen; + map<pair<bool, flat_set<ReportID> >, VertLitInfo> by_reports; /* eod, rep */ + + for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) { + set<ue2_literal> ss = getLiteralSet(h, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)]; + insert(&vli.lit, ss); + vli.vv.push_back(v); + seen.insert(v); + } + + seen.insert(h.accept); + for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) { + if (contains(seen, v)) { + continue; + } + + set<ue2_literal> ss = getLiteralSet(h, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)]; + insert(&vli.lit, ss); + vli.vv.push_back(v); + } + + assert(!by_reports.empty()); + + /* TODO: how strong a min len do we want here ? */ + u32 min_len = cc.grey.minRoseLiteralLength; + ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); + + for (auto &vli : by_reports | map_values) { + u64a score = sanitizeAndCompressAndScore(vli.lit); + + if (vli.lit.empty() + || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, + false, false)) { + return false; + } + } + + for (const auto &info : by_reports) { + DEBUG_PRINTF("splitting on simple literals\n"); + splitEdgesForSuffix(h, vg, suffix_edges, info.second, + info.first.first /* eod */, + info.first.second /* reports */); + } + + for (const RoseInEdge &e : suffix_edges) { + remove_edge(e, vg); + } + return true; +} + +static +void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetAvoidSuffixes) { + return; + } + + STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n"); + + RoseInVertex accept = getPrimaryAccept(vg); + + insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> suffixes; + + /* find suffixes */ + for (const RoseInEdge &e : in_edges_range(accept, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[source(e, vg)].type == RIV_LITERAL); + assert(vg[e].graph); /* non suffix paths should be wired to other + accepts */ + const NGHolder *h = vg[e].graph.get(); + suffixes[h].push_back(e); + } + + /* look at suffixes and try to split */ + for (const auto &m : suffixes) { + const NGHolder *h = m.first; + const auto &edges = m.second; + replaceSuffixWithInfix(*h, vg, edges, cc); + } +} + +static +bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { + if (out_degree(h.start, h) != 3) { + return false; + } + + NFAVertex v = NGHolder::null_vertex(); + NFAVertex ds = NGHolder::null_vertex(); + + for (NFAVertex a : adjacent_vertices_range(h.start, h)) { + if (a == h.startDs) { + continue; + } + if (h[a].char_reach.all()) { + ds = a; + if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) { + return false; + } + } else { + v = a; + } + } + + if (!v || !ds || !edge(ds, v, h).second) { + return false; + } + + if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { + return false; + } + + ue2_literal lit; + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + while (out_degree(v, h) == 1) { + NFAVertex vv = *adjacent_vertices(v, h).first; + if (h[vv].char_reach.count() != 1 + && !h[vv].char_reach.isCaselessChar()) { + break; + } + + v = vv; + + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + } + + if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) { + /* we have rediscovered the post-infix literal */ + return false; + } + + if (bad_mixed_sensitivity(lit)) { + make_nocase(&lit); + } + + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); + out->vv = {v}; + out->lit = {lit}; + return true; +} + +static +bool lookForDoubleCut(const NGHolder &h, const vector<RoseInEdge> &ee, + RoseInGraph &vg, const Grey &grey) { + VertLitInfo info; + if (!leadingDotStartLiteral(h, &info) + || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { + return false; + } + DEBUG_PRINTF("performing split\n"); + return splitRoseEdge(h, vg, ee, {info}); +} + +static +void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetDoubleCut) { + return; + } + + insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { + const NGHolder *h = vg[ve].graph.get(); + right_edges[h].push_back(ve); + } + } + + for (const auto &m : right_edges) { + const NGHolder *h = m.first; + const auto &edges = m.second; + lookForDoubleCut(*h, edges, vg, cc.grey); + } +} + +static +pair<NFAVertex, ue2_literal> findLiteralBefore(const NGHolder &h, NFAVertex v) { + ue2_literal lit; + if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { + return {v, std::move(lit) }; + } + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + + while (in_degree(v, h) == 1) { + NFAVertex vv = *inv_adjacent_vertices(v, h).first; + if (h[vv].char_reach.count() != 1 + && !h[vv].char_reach.isCaselessChar()) { + break; + } + + lit.push_back(h[vv].char_reach.find_first(), + h[vv].char_reach.isCaselessChar()); + v = vv; + } + + return {v, std::move(lit) }; +} + +static +bool lookForDotStarPred(NFAVertex v, const NGHolder &h, + NFAVertex *u, NFAVertex *ds) { + *u = NGHolder::null_vertex(); + *ds = NGHolder::null_vertex(); + for (NFAVertex a : inv_adjacent_vertices_range(v, h)) { + if (h[a].char_reach.all()) { + if (!edge(a, a, h).second) { + return false; + } + + if (*ds) { + return false; + } + + *ds = a; + } else { + if (*u) { + return false; + } + *u = a; + } + } + + if (!*u || !*ds) { + return false; + } + + return true; +} + +static +bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { + /* Note: there is no delay yet - so the final literal is the already + * discovered successor literal - we are in fact interested in the literal + * before it. */ + + if (in_degree(h.accept, h) != 1) { + return false; + } + + if (in_degree(h.acceptEod, h) != 1) { + assert(0); + return false; + } + + NFAVertex v + = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first; + + NFAVertex u; + NFAVertex ds; + + if (!lookForDotStarPred(v, h, &u, &ds)) { + return false; + } + + v = u; + auto rv = findLiteralBefore(h, v); + + if (!lookForDotStarPred(v, h, &u, &ds)) { + return false; + } + + ue2_literal lit = reverse_literal(rv.second); + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); + + if (bad_mixed_sensitivity(lit)) { + make_nocase(&lit); + } + + out->vv = {v}; + out->lit = {lit}; + return true; +} + +static +bool lookForTrailingLiteralDotStar(const NGHolder &h, + const vector<RoseInEdge> &ee, + RoseInGraph &vg, const Grey &grey) { + VertLitInfo info; + if (!trailingDotStarLiteral(h, &info) + || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { + return false; + } + DEBUG_PRINTF("performing split\n"); + return splitRoseEdge(h, vg, ee, info); +} + +/* In streaming mode, active engines have to be caught up at stream boundaries + * and have to be stored in stream state, so we prefer to decompose patterns + * in to literals with no state between them if possible. */ +static +void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetLiteralChains) { + return; + } + + insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; + bool changed; + do { + changed = false; + + right_edges.clear(); + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { + const NGHolder *h = vg[ve].graph.get(); + right_edges[h].push_back(ve); + } + } + + for (const auto &m : right_edges) { + const NGHolder *h = m.first; + const vector<RoseInEdge> &ee = m.second; + bool rv = lookForDoubleCut(*h, ee, vg, cc.grey); + if (!rv && h->kind != NFA_SUFFIX) { + rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey); + } + changed |= rv; + } + } while (changed); +} + +static +bool lookForCleanSplit(const NGHolder &h, const vector<RoseInEdge> &ee, + RoseInGraph &vg, const CompileContext &cc) { + unique_ptr<VertLitInfo> split = findBestCleanSplit(h, cc); + + if (split) { + return splitRoseEdge(h, vg, {ee}, *split); + } + + return false; +} + +#define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4 + +static +void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { + u32 gen = 0; + + insertion_ordered_set<RoseInVertex> prev({getStart(vg)}); + insertion_ordered_set<RoseInVertex> curr; + + while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) { + curr.clear(); + for (RoseInVertex u : prev) { + for (auto v : adjacent_vertices_range(u, vg)) { + curr.insert(v); + } + } + + insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> rightfixes; + for (RoseInVertex v : curr) { + for (const RoseInEdge &e : out_edges_range(v, vg)) { + if (vg[e].graph) { + NGHolder *h = vg[e].graph.get(); + rightfixes[h].push_back(e); + } + } + } + + for (const auto &m : rightfixes) { + const NGHolder *h = m.first; + const auto &edges = m.second; + lookForCleanSplit(*h, edges, vg, cc); + } + + prev = std::move(curr); + gen++; + } +} + +static +void rehomeEodSuffixes(RoseInGraph &vg) { + // Find edges to accept with EOD-anchored graphs that we can move over to + // acceptEod. + vector<RoseInEdge> acc_edges; + for (const auto &e : edges_range(vg)) { + if (vg[target(e, vg)].type != RIV_ACCEPT) { + continue; + } + if (vg[e].haig || !vg[e].graph) { + continue; + } + + const NGHolder &h = *vg[e].graph; + + if (in_degree(h.accept, h)) { + DEBUG_PRINTF("graph isn't eod anchored\n"); + continue; + } + + acc_edges.push_back(e); + } + + for (const RoseInEdge &e : acc_edges) { + // Move this edge from accept to acceptEod + RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg); + add_edge(source(e, vg), w, vg[e], vg); + remove_edge(e, vg); + } + + /* old accept vertices will be tidied up by final pruneUseless() call */ +} + +static +bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { + switch (h.kind) { + case NFA_OUTFIX: /* 'prefix' of eod */ + case NFA_PREFIX: + return cc.grey.earlyMcClellanPrefix; + case NFA_INFIX: + return cc.grey.earlyMcClellanInfix; + case NFA_SUFFIX: + return cc.grey.earlyMcClellanSuffix; + default: + DEBUG_PRINTF("kind %u\n", (u32)h.kind); + assert(0); + return false; + } +} + +static +vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg, + const vector<RoseInEdge> &edges, + bool *single_trigger) { + vector<vector<CharReach>> triggers; + u32 min_offset = ~0U; + u32 max_offset = 0; + for (const auto &e : edges) { + RoseInVertex s = source(e, vg); + if (vg[s].type == RIV_LITERAL) { + triggers.push_back(as_cr_seq(vg[s].s)); + } + ENSURE_AT_LEAST(&max_offset, vg[s].max_offset); + LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset); + } + + *single_trigger = min_offset == max_offset; + DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset); + + return triggers; +} + +static +bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, + const vector<RoseInEdge> &edges, bool final_chance, + const ReportManager &rm, const CompileContext &cc) { + DEBUG_PRINTF("trying for dfa\n"); + + bool single_trigger; + for (const auto &e : edges) { + if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) { + /* TODO: support eod prefixes */ + return false; + } + } + + auto triggers = getDfaTriggers(vg, edges, &single_trigger); + + /* TODO: literal delay things */ + if (!generates_callbacks(h)) { + set_report(h, rose.getNewNfaReport()); + } + + shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers, + cc.grey, final_chance); + + if (!dfa) { + return false; + } + + DEBUG_PRINTF("dfa ok\n"); + for (const auto &e : edges) { + vg[e].dfa = dfa; + } + + return true; +} + +#define MAX_EDGES_FOR_IMPLEMENTABILITY 50 + +static +bool splitForImplementability(RoseInGraph &vg, NGHolder &h, + const vector<RoseInEdge> &edges, + const CompileContext &cc) { + vector<pair<ue2_literal, u32>> succ_lits; + DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", + to_string(h.kind).c_str(), num_vertices(h), edges.size()); + + if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) { + return false; + } + + if (!generates_callbacks(h)) { + for (const auto &e : edges) { + const auto &lit = vg[target(e, vg)].s; + u32 delay = vg[e].graph_lag; + vg[e].graph_lag = 0; + + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); + } + restoreTrailingLiteralStates(h, succ_lits); + } + + unique_ptr<VertLitInfo> split; + bool last_chance = true; + if (h.kind == NFA_PREFIX) { + auto depths = calcDepths(h); + + split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); + } else { + split = findBestLastChanceSplit(h, vg, edges, cc); + } + + if (split && splitRoseEdge(h, vg, edges, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + DEBUG_PRINTF("trying to netflow\n"); + bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); + DEBUG_PRINTF("done\n"); + + return rv; +} + +#define MAX_IMPLEMENTABLE_SPLITS 50 + +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc) { + DEBUG_PRINTF("checking for impl %d\n", final_chance); + bool changed = false; + bool need_to_recalc = false; + u32 added_count = 0; + unordered_set<shared_ptr<NGHolder>> good; /* known to be implementable */ + do { + changed = false; + DEBUG_PRINTF("added %u\n", added_count); + insertion_ordered_map<shared_ptr<NGHolder>, + vector<RoseInEdge>> edges_by_graph; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph && !vg[ve].dfa) { + auto &h = vg[ve].graph; + edges_by_graph[h].push_back(ve); + } + } + for (auto &m : edges_by_graph) { + auto &h = m.first; + if (contains(good, h)) { + continue; + } + reduceGraphEquivalences(*h, cc); + if (isImplementableNFA(*h, &rm, cc)) { + good.insert(h); + continue; + } + + const auto &edges = m.second; + + if (tryForEarlyDfa(*h, cc) && + doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { + continue; + } + + DEBUG_PRINTF("eek\n"); + if (!allow_changes) { + return false; + } + + if (splitForImplementability(vg, *h, edges, cc)) { + added_count++; + if (added_count > MAX_IMPLEMENTABLE_SPLITS) { + DEBUG_PRINTF("added_count hit limit\n"); + return false; + } + changed = true; + continue; + } + + return false; + } + + assert(added_count <= MAX_IMPLEMENTABLE_SPLITS); + + if (changed) { + removeRedundantLiterals(vg, cc); + pruneUseless(vg); + need_to_recalc = true; + } + } while (changed); + + if (need_to_recalc) { + renumber_vertices(vg); + calcVertexOffsets(vg); + } + + DEBUG_PRINTF("ok!\n"); + return true; +} + +static +RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, + const CompileContext &cc) { + assert(!can_never_match(h)); + + RoseInGraph vg = populateTrivialGraph(h); + + if (!cc.grey.allowViolet) { + return vg; + } + + /* Avoid running the Violet analysis at all on graphs with no vertices with + * small reach, since we will not be able to extract any literals. */ + if (!hasNarrowReachVertex(h)) { + DEBUG_PRINTF("fail, no vertices with small reach\n"); + return vg; + } + + DEBUG_PRINTF("hello world\n"); + + /* Step 1: avoid outfixes as we always have to run them. */ + avoidOutfixes(vg, last_chance, cc); + + if (num_vertices(vg) <= 2) { + return vg; /* unable to transform pattern */ + } + + removeRedundantPrefixes(vg); + dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot"); + + /* Step 2: avoid non-transient prefixes (esp in streaming mode) */ + findBetterPrefixes(vg, cc); + + dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot"); + + extractStrongLiterals(vg, cc); + dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot"); + improveWeakInfixes(vg, cc); + dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot"); + + /* Step 3: avoid output exposed engines if there is a strong trailing + literal) */ + avoidSuffixes(vg, cc); + + /* Step 4: look for infixes/suffixes with leading .*literals + * This can reduce the amount of work a heavily picked literal has to do and + * reduce the amount of state used as .* is handled internally to rose. */ + lookForDoubleCut(vg, cc); + + if (cc.streaming) { + lookForCleanEarlySplits(vg, cc); + decomposeLiteralChains(vg, cc); + } + + rehomeEodSuffixes(vg); + removeRedundantLiterals(vg, cc); + + pruneUseless(vg); + dumpPreRoseGraph(vg, cc.grey); + renumber_vertices(vg); + calcVertexOffsets(vg); + + return vg; +} + +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, last_chance, cc); + if (num_vertices(vg) <= 2) { + return false; + } + + /* Step 5: avoid unimplementable, or overly large engines if possible */ + if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { + return false; + } + dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot"); + + /* Step 6: send to rose */ + bool rv = rose.addRose(vg, prefilter); + DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); + return rv; +} + +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, true, cc); + if (num_vertices(vg) <= 2) { + return false; + } + + bool rv = roseCheckRose(vg, prefilter, rm, cc); + DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); + return rv; +} + +} diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_violet.h b/contrib/libs/hyperscan/src/nfagraph/ng_violet.h index 3fe57dbfaa..5158c43a08 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_violet.h +++ b/contrib/libs/hyperscan/src/nfagraph/ng_violet.h @@ -1,65 +1,65 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Violet method of rose construction from NGHolder. - */ - -#ifndef NG_VIOLET_H -#define NG_VIOLET_H - -#include "ue2common.h" - -namespace ue2 { - -class NGHolder; -class RoseBuild; - -struct CompileContext; -class ReportManager; -struct RoseInGraph; - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * Returns true if successful. */ -bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, - bool last_chance, const ReportManager &rm, - const CompileContext &cc); - -bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, - bool final_chance, const ReportManager &rm, - const CompileContext &cc); - -/** \brief True if the pattern in \a h is consumable by Rose/Violet. This - * function may be conservative (return false even if supported) for - * efficiency. */ -bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Violet method of rose construction from NGHolder. + */ + +#ifndef NG_VIOLET_H +#define NG_VIOLET_H + +#include "ue2common.h" + +namespace ue2 { + +class NGHolder; +class RoseBuild; + +struct CompileContext; +class ReportManager; +struct RoseInGraph; + +/** \brief Attempt to consume the entire pattern in graph \a h with Rose. + * Returns true if successful. */ +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, + const CompileContext &cc); + +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc); + +/** \brief True if the pattern in \a h is consumable by Rose/Violet. This + * function may be conservative (return false even if supported) for + * efficiency. */ +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp index 219241ca55..f33d5d5689 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "ue2common.h" #include "util/depth.h" #include "util/graph.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include <deque> #include <vector> @@ -59,18 +59,18 @@ namespace { struct SpecialEdgeFilter { SpecialEdgeFilter() {} explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} - SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) + SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) : h(&h_in), single_top(true), top(top_in) {} bool operator()(const NFAEdge &e) const { - NFAVertex u = source(e, *h); - NFAVertex v = target(e, *h); - if ((is_any_start(u, *h) && is_any_start(v, *h)) || - (is_any_accept(u, *h) && is_any_accept(v, *h))) { + NFAVertex u = source(e, *h); + NFAVertex v = target(e, *h); + if ((is_any_start(u, *h) && is_any_start(v, *h)) || + (is_any_accept(u, *h) && is_any_accept(v, *h))) { return false; } if (single_top) { - if (u == h->start && !contains((*h)[e].tops, top)) { + if (u == h->start && !contains((*h)[e].tops, top)) { return false; } if (u == h->startDs) { @@ -95,7 +95,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::unreachable(); } - boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); + boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -107,10 +107,10 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, // Since we are interested in the single-source shortest paths on a graph // with the same weight on every edge, using BFS will be faster than // Dijkstra here. - breadth_first_search(g, src, + breadth_first_search(g, src, visitor(make_bfs_visitor(record_distances( make_iterator_property_map(distance.begin(), index_map), - boost::on_tree_edge())))); + boost::on_tree_edge())))); DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n", distance.at(NODE_ACCEPT).str().c_str(), @@ -130,7 +130,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, static depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, NFAVertex src) { - if (isLeafNode(src, h)) { + if (isLeafNode(src, h)) { return depth::unreachable(); } @@ -139,31 +139,31 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::infinity(); } - boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); + boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); vector<int> distance(num); - auto colors = make_small_color_map(h); + auto colors = make_small_color_map(h); auto index_map = get(&NFAGraphVertexProps::index, g); // DAG shortest paths with negative edge weights. - dag_shortest_paths(g, src, + dag_shortest_paths(g, src, distance_map(make_iterator_property_map(distance.begin(), index_map)) .weight_map(boost::make_constant_property<NFAEdge>(-1)) - .color_map(colors)); + .color_map(colors)); depth acceptDepth, acceptEodDepth; - if (get(colors, h.accept) == small_color::white) { + if (get(colors, h.accept) == small_color::white) { acceptDepth = depth::unreachable(); } else { - acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); + acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); } - if (get(colors, h.acceptEod) == small_color::white) { + if (get(colors, h.acceptEod) == small_color::white) { acceptEodDepth = depth::unreachable(); } else { - acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); + acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); } depth d; |