diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph/ng.cpp | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfagraph/ng.cpp | 322 |
1 files changed, 161 insertions, 161 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp index 8dccf9863d..2d987102af 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp @@ -27,10 +27,10 @@ */ /** \file - * \brief NG and graph handling. + * \brief NG and graph handling. */ -#include "ng.h" - +#include "ng.h" + #include "grey.h" #include "ng_anchored_acyclic.h" #include "ng_anchored_dots.h" @@ -42,7 +42,7 @@ #include "ng_equivalence.h" #include "ng_extparam.h" #include "ng_fixed_width.h" -#include "ng_fuzzy.h" +#include "ng_fuzzy.h" #include "ng_haig.h" #include "ng_literal_component.h" #include "ng_literal_decorated.h" @@ -58,14 +58,14 @@ #include "ng_small_literal_set.h" #include "ng_som.h" #include "ng_vacuous.h" -#include "ng_violet.h" +#include "ng_violet.h" #include "ng_utf8.h" #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" -#include "rose/rose_build.h" +#include "rose/rose_build.h" #include "smallwrite/smallwrite_build.h" #include "util/compile_error.h" #include "util/container.h" @@ -78,15 +78,15 @@ using namespace std; namespace ue2 { -NG::NG(const CompileContext &in_cc, size_t num_patterns, - unsigned in_somPrecision) +NG::NG(const CompileContext &in_cc, size_t num_patterns, + unsigned in_somPrecision) : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength), minWidth(depth::infinity()), rm(in_cc.grey), ssm(in_somPrecision), cc(in_cc), - smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), - rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { + smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), + rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { } NG::~NG() { @@ -102,16 +102,16 @@ NG::~NG() { * \throw CompileError if SOM cannot be supported for the component. */ static -bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, +bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { DEBUG_PRINTF("doing som\n"); - dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); + dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); - assert(allMatchStatesHaveReports(g)); + assert(allMatchStatesHaveReports(g)); // First, we try the "SOM chain" support in ng_som.cpp. - sombe_rv rv = doSom(ng, g, expr, comp_id, som); + sombe_rv rv = doSom(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -120,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, assert(rv == SOMBE_FAIL); /* Next, Sombe style approaches */ - rv = doSomWithHaig(ng, g, expr, comp_id, som); + rv = doSomWithHaig(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -134,8 +134,8 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, vector<vector<CharReach> > triggers; /* empty for outfix */ assert(g.kind == NFA_OUTFIX); - dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); - makeReportsSomPass(ng.rm, g); + dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); + makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); if (haig) { @@ -147,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, /* Our various strategies for supporting SOM for this pattern have failed. * Provide a generic pattern not supported/too large return value as it is * unclear what the meaning of a specific SOM error would be */ - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); assert(0); // unreachable return false; @@ -173,7 +173,7 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, changed |= removeEdgeRedundancy(g, som, cc); changed |= reduceGraphEquivalences(g, cc); changed |= removeRedundancy(g, som); - changed |= removeCyclicPathRedundancy(g); + changed |= removeCyclicPathRedundancy(g); if (!changed) { DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass); break; @@ -202,35 +202,35 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, } static -bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, - const som_type som, const u32 comp_id) { +bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, + const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", - expr.index, comp_id, num_vertices(g), num_edges(g)); + expr.index, comp_id, num_vertices(g), num_edges(g)); - dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - - assert(allMatchStatesHaveReports(g)); + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - reduceExtendedParams(g, ng.rm, som); - reduceGraph(g, som, expr.utf8, cc); + assert(allMatchStatesHaveReports(g)); - dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); + reduceExtendedParams(g, ng.rm, som); + reduceGraph(g, som, expr.utf8, cc); + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); + // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { removeRegionRedundancy(g, som); } - // We might be done at this point: if we've run out of vertices, we can - // stop processing. - if (num_vertices(g) == N_SPECIALS) { - DEBUG_PRINTF("all vertices claimed\n"); - return true; - } - + // We might be done at this point: if we've run out of vertices, we can + // stop processing. + if (num_vertices(g) == N_SPECIALS) { + DEBUG_PRINTF("all vertices claimed\n"); + return true; + } + // "Short Exhaustible Passthrough" patterns always become outfixes. if (!som && isSEP(g, ng.rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); @@ -241,13 +241,13 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, // Start Of Match handling. if (som) { - if (addComponentSom(ng, g, expr, som, comp_id)) { + if (addComponentSom(ng, g, expr, som, comp_id)) { return true; } } - assert(allMatchStatesHaveReports(g)); - + assert(allMatchStatesHaveReports(g)); + if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; } @@ -261,11 +261,11 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, return true; } - if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { return true; } - if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { return true; } @@ -278,7 +278,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, return true; } - if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { return true; } @@ -293,7 +293,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, // Returns true if all components have been added. static -bool processComponents(NG &ng, ExpressionInfo &expr, +bool processComponents(NG &ng, ExpressionInfo &expr, deque<unique_ptr<NGHolder>> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); @@ -303,7 +303,7 @@ bool processComponents(NG &ng, ExpressionInfo &expr, if (!g_comp[i]) { continue; } - if (addComponent(ng, *g_comp[i], expr, som, i)) { + if (addComponent(ng, *g_comp[i], expr, som, i)) { g_comp[i].reset(); continue; } @@ -323,70 +323,70 @@ bool processComponents(NG &ng, ExpressionInfo &expr, return false; } -bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { - assert(g_ptr); - NGHolder &g = *g_ptr; - +bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { + assert(g_ptr); + NGHolder &g = *g_ptr; + // remove reports that aren't on vertices connected to accept. - clearReports(g); + clearReports(g); - som_type som = expr.som; - if (som && isVacuous(g)) { - throw CompileError(expr.index, "Start of match is not " + som_type som = expr.som; + if (som && isVacuous(g)) { + throw CompileError(expr.index, "Start of match is not " "currently supported for patterns which match an " "empty buffer."); } - dumpDotWrapper(g, expr, "01_initial", cc.grey); - assert(allMatchStatesHaveReports(g)); + dumpDotWrapper(g, expr, "01_initial", cc.grey); + assert(allMatchStatesHaveReports(g)); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, g, expr); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - bool hamming = expr.hamm_distance > 0; - u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; - - DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false"); - - // validate graph's suitability for fuzzing before resolving asserts - validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); - - resolveAsserts(rm, g, expr); - dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); - assert(allMatchStatesHaveReports(g)); - - make_fuzzy(g, e_dist, hamming, cc.grey); - dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); - - pruneUseless(g); - pruneEmptyVertices(g); - - if (can_never_match(g)) { - throw CompileError(expr.index, "Pattern can never match."); - } - - optimiseVirtualStarts(g); /* good for som */ - - propagateExtendedParams(g, expr, rm); - reduceExtendedParams(g, rm, som); - - // We may have removed all the edges to accept, in which case this - // expression cannot match. - if (can_never_match(g)) { - throw CompileError(expr.index, "Extended parameter constraints can not " - "be satisfied for any match from this " - "expression."); - } - - if (any_of_in(all_reports(g), [&](ReportID id) { - return rm.getReport(id).minLength; - })) { - // We have at least one report with a minimum length constraint, which - // we currently use SOM to satisfy. + ensureCodePointStart(rm, g, expr); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + bool hamming = expr.hamm_distance > 0; + u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; + + DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false"); + + // validate graph's suitability for fuzzing before resolving asserts + validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); + + resolveAsserts(rm, g, expr); + dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); + assert(allMatchStatesHaveReports(g)); + + make_fuzzy(g, e_dist, hamming, cc.grey); + dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + optimiseVirtualStarts(g); /* good for som */ + + propagateExtendedParams(g, expr, rm); + reduceExtendedParams(g, rm, som); + + // We may have removed all the edges to accept, in which case this + // expression cannot match. + if (can_never_match(g)) { + throw CompileError(expr.index, "Extended parameter constraints can not " + "be satisfied for any match from this " + "expression."); + } + + if (any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength; + })) { + // We have at least one report with a minimum length constraint, which + // we currently use SOM to satisfy. som = SOM_LEFT; ssm.somPrecision(8); } @@ -398,104 +398,104 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { // first, we can perform graph work that can be done on an individual // expression basis. - if (expr.utf8) { - relaxForbiddenUtf8(g, expr); + if (expr.utf8) { + relaxForbiddenUtf8(g, expr); } - if (all_of_in(all_reports(g), [&](ReportID id) { - const auto &report = rm.getReport(id); - return report.ekey != INVALID_EKEY && !report.minLength && - !report.minOffset; - })) { + if (all_of_in(all_reports(g), [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.ekey != INVALID_EKEY && !report.minLength && + !report.minOffset; + })) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. - // TODO: shift the report checking down into pruneHighlanderAccepts() - // to allow us to handle the parts we can in mixed cases. - pruneHighlanderAccepts(g, rm); + // TODO: shift the report checking down into pruneHighlanderAccepts() + // to allow us to handle the parts we can in mixed cases. + pruneHighlanderAccepts(g, rm); } - dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); // If we're a vacuous pattern, we can handle this early. - if (splitOffVacuous(boundary, rm, g, expr)) { + if (splitOffVacuous(boundary, rm, g, expr)) { DEBUG_PRINTF("split off vacuous\n"); } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(g) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); return true; } // Now that vacuous edges have been removed, update the min width exclusive // of boundary reports. - minWidth = min(minWidth, findMinWidth(g)); + minWidth = min(minWidth, findMinWidth(g)); // Add the pattern to the small write builder. - smwr->add(g, expr); + smwr->add(g, expr); if (!som) { - removeSiblingsOfStartDotStar(g); + removeSiblingsOfStartDotStar(g); } - dumpDotWrapper(g, expr, "03_early", cc.grey); - - // Perform a reduction pass to merge sibling character classes together. - if (cc.grey.performGraphSimplification) { - removeRedundancy(g, som); - prunePathsRedundantWithSuccessorOfCyclics(g, som); - } - - dumpDotWrapper(g, expr, "04_reduced", cc.grey); + dumpDotWrapper(g, expr, "03_early", cc.grey); + // Perform a reduction pass to merge sibling character classes together. + if (cc.grey.performGraphSimplification) { + removeRedundancy(g, som); + prunePathsRedundantWithSuccessorOfCyclics(g, som); + } + + dumpDotWrapper(g, expr, "04_reduced", cc.grey); + // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { - if (splitOffLiterals(*this, g)) { + if (splitOffLiterals(*this, g)) { DEBUG_PRINTF("some vertices claimed by literals\n"); } } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(g) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed before calc components\n"); return true; } - // Split the graph into a set of connected components and process those. - // Note: this invalidates g_ptr. + // Split the graph into a set of connected components and process those. + // Note: this invalidates g_ptr. - auto g_comp = calcComponents(std::move(g_ptr), cc.grey); + auto g_comp = calcComponents(std::move(g_ptr), cc.grey); assert(!g_comp.empty()); if (!som) { - for (auto &gc : g_comp) { - assert(gc); - reformLeadingDots(*gc); + for (auto &gc : g_comp) { + assert(gc); + reformLeadingDots(*gc); } - recalcComponents(g_comp, cc.grey); + recalcComponents(g_comp, cc.grey); } - if (processComponents(*this, expr, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } // If we're in prefiltering mode, we can run the prefilter reductions and // have another shot at accepting the graph. - if (cc.grey.prefilterReductions && expr.prefilter) { - for (auto &gc : g_comp) { - if (!gc) { + if (cc.grey.prefilterReductions && expr.prefilter) { + for (auto &gc : g_comp) { + if (!gc) { continue; } - prefilterReductions(*gc, cc); + prefilterReductions(*gc, cc); } - if (processComponents(*this, expr, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } } @@ -505,7 +505,7 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { if (g_comp[i]) { DEBUG_PRINTF("could not compile component %u with %zu vertices\n", i, num_vertices(*g_comp[i])); - throw CompileError(expr.index, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -514,60 +514,60 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) { } /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ -bool NG::addHolder(NGHolder &g) { - DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); - assert(allMatchStatesHaveReports(g)); - assert(hasCorrectlyNumberedVertices(g)); +bool NG::addHolder(NGHolder &g) { + DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); /* We don't update the global minWidth here as we care about the min width * of the whole pattern - not a just a prefix of it. */ bool prefilter = false; - //dumpDotComp(comp, g, *this, 20, "prefix_init"); + //dumpDotComp(comp, g, *this, 20, "prefix_init"); som_type som = SOM_NONE; /* the prefixes created by the SOM code do not themselves track som */ bool utf8 = false; // handling done earlier - reduceGraph(g, som, utf8, cc); + reduceGraph(g, som, utf8, cc); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(g, som); + removeRegionRedundancy(g, som); } // "Short Exhaustible Passthrough" patterns always become outfixes. - if (isSEP(g, rm, cc.grey)) { + if (isSEP(g, rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); - if (rose->addOutfix(g)) { + if (rose->addOutfix(g)) { return true; } } - if (splitOffAnchoredAcyclic(*rose, g, cc)) { + if (splitOffAnchoredAcyclic(*rose, g, cc)) { return true; } - if (handleSmallLiteralSets(*rose, g, cc) - || handleFixedWidth(*rose, g, cc.grey)) { + if (handleSmallLiteralSets(*rose, g, cc) + || handleFixedWidth(*rose, g, cc.grey)) { return true; } - if (handleDecoratedLiterals(*rose, g, cc)) { + if (handleDecoratedLiterals(*rose, g, cc)) { return true; } - if (doViolet(*rose, g, prefilter, false, rm, cc)) { + if (doViolet(*rose, g, prefilter, false, rm, cc)) { return true; } - if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { + if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { return true; } - if (doViolet(*rose, g, prefilter, true, rm, cc)) { + if (doViolet(*rose, g, prefilter, true, rm, cc)) { return true; } DEBUG_PRINTF("trying for outfix\n"); - if (rose->addOutfix(g)) { + if (rose->addOutfix(g)) { DEBUG_PRINTF("ok\n"); return true; } @@ -617,8 +617,8 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, minWidth = min(minWidth, depth(literal.length())); - /* inform small write handler about this literal */ - smwr->add(literal, id); + /* inform small write handler about this literal */ + smwr->add(literal, id); return true; } |