aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfagraph/ng.cpp
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:10 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:10 +0300
commit1aeb9a455974457866f78722ad98114bafc84e8a (patch)
treee4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph/ng.cpp
parentbd5ef432f5cfb1e18851381329d94665a4c22470 (diff)
downloadydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng.cpp')
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng.cpp322
1 files changed, 161 insertions, 161 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
index 8dccf9863d..2d987102af 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
@@ -27,10 +27,10 @@
*/
/** \file
- * \brief NG and graph handling.
+ * \brief NG and graph handling.
*/
-#include "ng.h"
-
+#include "ng.h"
+
#include "grey.h"
#include "ng_anchored_acyclic.h"
#include "ng_anchored_dots.h"
@@ -42,7 +42,7 @@
#include "ng_equivalence.h"
#include "ng_extparam.h"
#include "ng_fixed_width.h"
-#include "ng_fuzzy.h"
+#include "ng_fuzzy.h"
#include "ng_haig.h"
#include "ng_literal_component.h"
#include "ng_literal_decorated.h"
@@ -58,14 +58,14 @@
#include "ng_small_literal_set.h"
#include "ng_som.h"
#include "ng_vacuous.h"
-#include "ng_violet.h"
+#include "ng_violet.h"
#include "ng_utf8.h"
#include "ng_util.h"
#include "ng_width.h"
#include "ue2common.h"
-#include "compiler/compiler.h"
+#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
-#include "rose/rose_build.h"
+#include "rose/rose_build.h"
#include "smallwrite/smallwrite_build.h"
#include "util/compile_error.h"
#include "util/container.h"
@@ -78,15 +78,15 @@ using namespace std;
namespace ue2 {
-NG::NG(const CompileContext &in_cc, size_t num_patterns,
- unsigned in_somPrecision)
+NG::NG(const CompileContext &in_cc, size_t num_patterns,
+ unsigned in_somPrecision)
: maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
minWidth(depth::infinity()),
rm(in_cc.grey),
ssm(in_somPrecision),
cc(in_cc),
- smwr(makeSmallWriteBuilder(num_patterns, rm, cc)),
- rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) {
+ smwr(makeSmallWriteBuilder(num_patterns, rm, cc)),
+ rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) {
}
NG::~NG() {
@@ -102,16 +102,16 @@ NG::~NG() {
* \throw CompileError if SOM cannot be supported for the component.
*/
static
-bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
+bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
DEBUG_PRINTF("doing som\n");
- dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
+ dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
assert(hasCorrectlyNumberedVertices(g));
- assert(allMatchStatesHaveReports(g));
+ assert(allMatchStatesHaveReports(g));
// First, we try the "SOM chain" support in ng_som.cpp.
- sombe_rv rv = doSom(ng, g, expr, comp_id, som);
+ sombe_rv rv = doSom(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
@@ -120,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
assert(rv == SOMBE_FAIL);
/* Next, Sombe style approaches */
- rv = doSomWithHaig(ng, g, expr, comp_id, som);
+ rv = doSomWithHaig(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
@@ -134,8 +134,8 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
vector<vector<CharReach> > triggers; /* empty for outfix */
assert(g.kind == NFA_OUTFIX);
- dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
- makeReportsSomPass(ng.rm, g);
+ dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
+ makeReportsSomPass(ng.rm, g);
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
ng.cc.grey);
if (haig) {
@@ -147,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
/* Our various strategies for supporting SOM for this pattern have failed.
* Provide a generic pattern not supported/too large return value as it is
* unclear what the meaning of a specific SOM error would be */
- throw CompileError(expr.index, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
assert(0); // unreachable
return false;
@@ -173,7 +173,7 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
changed |= removeEdgeRedundancy(g, som, cc);
changed |= reduceGraphEquivalences(g, cc);
changed |= removeRedundancy(g, som);
- changed |= removeCyclicPathRedundancy(g);
+ changed |= removeCyclicPathRedundancy(g);
if (!changed) {
DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
break;
@@ -202,35 +202,35 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
}
static
-bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
- const som_type som, const u32 comp_id) {
+bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
+ const som_type som, const u32 comp_id) {
const CompileContext &cc = ng.cc;
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
- expr.index, comp_id, num_vertices(g), num_edges(g));
+ expr.index, comp_id, num_vertices(g), num_edges(g));
- dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
-
- assert(allMatchStatesHaveReports(g));
+ dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
- reduceExtendedParams(g, ng.rm, som);
- reduceGraph(g, som, expr.utf8, cc);
+ assert(allMatchStatesHaveReports(g));
- dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
+ reduceExtendedParams(g, ng.rm, som);
+ reduceGraph(g, som, expr.utf8, cc);
+ dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
+
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
removeRegionRedundancy(g, som);
}
- // We might be done at this point: if we've run out of vertices, we can
- // stop processing.
- if (num_vertices(g) == N_SPECIALS) {
- DEBUG_PRINTF("all vertices claimed\n");
- return true;
- }
-
+ // We might be done at this point: if we've run out of vertices, we can
+ // stop processing.
+ if (num_vertices(g) == N_SPECIALS) {
+ DEBUG_PRINTF("all vertices claimed\n");
+ return true;
+ }
+
// "Short Exhaustible Passthrough" patterns always become outfixes.
if (!som && isSEP(g, ng.rm, cc.grey)) {
DEBUG_PRINTF("graph is SEP\n");
@@ -241,13 +241,13 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
// Start Of Match handling.
if (som) {
- if (addComponentSom(ng, g, expr, som, comp_id)) {
+ if (addComponentSom(ng, g, expr, som, comp_id)) {
return true;
}
}
- assert(allMatchStatesHaveReports(g));
-
+ assert(allMatchStatesHaveReports(g));
+
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
return true;
}
@@ -261,11 +261,11 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
return true;
}
- if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
+ if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
return true;
}
- if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
+ if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
return true;
}
@@ -278,7 +278,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
return true;
}
- if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
+ if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
return true;
}
@@ -293,7 +293,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
// Returns true if all components have been added.
static
-bool processComponents(NG &ng, ExpressionInfo &expr,
+bool processComponents(NG &ng, ExpressionInfo &expr,
deque<unique_ptr<NGHolder>> &g_comp,
const som_type som) {
const u32 num_components = g_comp.size();
@@ -303,7 +303,7 @@ bool processComponents(NG &ng, ExpressionInfo &expr,
if (!g_comp[i]) {
continue;
}
- if (addComponent(ng, *g_comp[i], expr, som, i)) {
+ if (addComponent(ng, *g_comp[i], expr, som, i)) {
g_comp[i].reset();
continue;
}
@@ -323,70 +323,70 @@ bool processComponents(NG &ng, ExpressionInfo &expr,
return false;
}
-bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
- assert(g_ptr);
- NGHolder &g = *g_ptr;
-
+bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
+ assert(g_ptr);
+ NGHolder &g = *g_ptr;
+
// remove reports that aren't on vertices connected to accept.
- clearReports(g);
+ clearReports(g);
- som_type som = expr.som;
- if (som && isVacuous(g)) {
- throw CompileError(expr.index, "Start of match is not "
+ som_type som = expr.som;
+ if (som && isVacuous(g)) {
+ throw CompileError(expr.index, "Start of match is not "
"currently supported for patterns which match an "
"empty buffer.");
}
- dumpDotWrapper(g, expr, "01_initial", cc.grey);
- assert(allMatchStatesHaveReports(g));
+ dumpDotWrapper(g, expr, "01_initial", cc.grey);
+ assert(allMatchStatesHaveReports(g));
/* ensure utf8 starts at cp boundary */
- ensureCodePointStart(rm, g, expr);
-
- if (can_never_match(g)) {
- throw CompileError(expr.index, "Pattern can never match.");
- }
-
- bool hamming = expr.hamm_distance > 0;
- u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
-
- DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false");
-
- // validate graph's suitability for fuzzing before resolving asserts
- validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);
-
- resolveAsserts(rm, g, expr);
- dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
- assert(allMatchStatesHaveReports(g));
-
- make_fuzzy(g, e_dist, hamming, cc.grey);
- dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
-
- pruneUseless(g);
- pruneEmptyVertices(g);
-
- if (can_never_match(g)) {
- throw CompileError(expr.index, "Pattern can never match.");
- }
-
- optimiseVirtualStarts(g); /* good for som */
-
- propagateExtendedParams(g, expr, rm);
- reduceExtendedParams(g, rm, som);
-
- // We may have removed all the edges to accept, in which case this
- // expression cannot match.
- if (can_never_match(g)) {
- throw CompileError(expr.index, "Extended parameter constraints can not "
- "be satisfied for any match from this "
- "expression.");
- }
-
- if (any_of_in(all_reports(g), [&](ReportID id) {
- return rm.getReport(id).minLength;
- })) {
- // We have at least one report with a minimum length constraint, which
- // we currently use SOM to satisfy.
+ ensureCodePointStart(rm, g, expr);
+
+ if (can_never_match(g)) {
+ throw CompileError(expr.index, "Pattern can never match.");
+ }
+
+ bool hamming = expr.hamm_distance > 0;
+ u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
+
+ DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false");
+
+ // validate graph's suitability for fuzzing before resolving asserts
+ validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);
+
+ resolveAsserts(rm, g, expr);
+ dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
+ assert(allMatchStatesHaveReports(g));
+
+ make_fuzzy(g, e_dist, hamming, cc.grey);
+ dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
+
+ pruneUseless(g);
+ pruneEmptyVertices(g);
+
+ if (can_never_match(g)) {
+ throw CompileError(expr.index, "Pattern can never match.");
+ }
+
+ optimiseVirtualStarts(g); /* good for som */
+
+ propagateExtendedParams(g, expr, rm);
+ reduceExtendedParams(g, rm, som);
+
+ // We may have removed all the edges to accept, in which case this
+ // expression cannot match.
+ if (can_never_match(g)) {
+ throw CompileError(expr.index, "Extended parameter constraints can not "
+ "be satisfied for any match from this "
+ "expression.");
+ }
+
+ if (any_of_in(all_reports(g), [&](ReportID id) {
+ return rm.getReport(id).minLength;
+ })) {
+ // We have at least one report with a minimum length constraint, which
+ // we currently use SOM to satisfy.
som = SOM_LEFT;
ssm.somPrecision(8);
}
@@ -398,104 +398,104 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
// first, we can perform graph work that can be done on an individual
// expression basis.
- if (expr.utf8) {
- relaxForbiddenUtf8(g, expr);
+ if (expr.utf8) {
+ relaxForbiddenUtf8(g, expr);
}
- if (all_of_in(all_reports(g), [&](ReportID id) {
- const auto &report = rm.getReport(id);
- return report.ekey != INVALID_EKEY && !report.minLength &&
- !report.minOffset;
- })) {
+ if (all_of_in(all_reports(g), [&](ReportID id) {
+ const auto &report = rm.getReport(id);
+ return report.ekey != INVALID_EKEY && !report.minLength &&
+ !report.minOffset;
+ })) {
// In highlander mode: if we don't have constraints on our reports that
// may prevent us accepting our first match (i.e. extended params) we
// can prune the other out-edges of all vertices connected to accept.
- // TODO: shift the report checking down into pruneHighlanderAccepts()
- // to allow us to handle the parts we can in mixed cases.
- pruneHighlanderAccepts(g, rm);
+ // TODO: shift the report checking down into pruneHighlanderAccepts()
+ // to allow us to handle the parts we can in mixed cases.
+ pruneHighlanderAccepts(g, rm);
}
- dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
+ dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
// If we're a vacuous pattern, we can handle this early.
- if (splitOffVacuous(boundary, rm, g, expr)) {
+ if (splitOffVacuous(boundary, rm, g, expr)) {
DEBUG_PRINTF("split off vacuous\n");
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
- if (num_vertices(g) == N_SPECIALS) {
+ if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
return true;
}
// Now that vacuous edges have been removed, update the min width exclusive
// of boundary reports.
- minWidth = min(minWidth, findMinWidth(g));
+ minWidth = min(minWidth, findMinWidth(g));
// Add the pattern to the small write builder.
- smwr->add(g, expr);
+ smwr->add(g, expr);
if (!som) {
- removeSiblingsOfStartDotStar(g);
+ removeSiblingsOfStartDotStar(g);
}
- dumpDotWrapper(g, expr, "03_early", cc.grey);
-
- // Perform a reduction pass to merge sibling character classes together.
- if (cc.grey.performGraphSimplification) {
- removeRedundancy(g, som);
- prunePathsRedundantWithSuccessorOfCyclics(g, som);
- }
-
- dumpDotWrapper(g, expr, "04_reduced", cc.grey);
+ dumpDotWrapper(g, expr, "03_early", cc.grey);
+ // Perform a reduction pass to merge sibling character classes together.
+ if (cc.grey.performGraphSimplification) {
+ removeRedundancy(g, som);
+ prunePathsRedundantWithSuccessorOfCyclics(g, som);
+ }
+
+ dumpDotWrapper(g, expr, "04_reduced", cc.grey);
+
// If we've got some literals that span the graph from start to accept, we
// can split them off into Rose from here.
if (!som) {
- if (splitOffLiterals(*this, g)) {
+ if (splitOffLiterals(*this, g)) {
DEBUG_PRINTF("some vertices claimed by literals\n");
}
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
- if (num_vertices(g) == N_SPECIALS) {
+ if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed before calc components\n");
return true;
}
- // Split the graph into a set of connected components and process those.
- // Note: this invalidates g_ptr.
+ // Split the graph into a set of connected components and process those.
+ // Note: this invalidates g_ptr.
- auto g_comp = calcComponents(std::move(g_ptr), cc.grey);
+ auto g_comp = calcComponents(std::move(g_ptr), cc.grey);
assert(!g_comp.empty());
if (!som) {
- for (auto &gc : g_comp) {
- assert(gc);
- reformLeadingDots(*gc);
+ for (auto &gc : g_comp) {
+ assert(gc);
+ reformLeadingDots(*gc);
}
- recalcComponents(g_comp, cc.grey);
+ recalcComponents(g_comp, cc.grey);
}
- if (processComponents(*this, expr, g_comp, som)) {
+ if (processComponents(*this, expr, g_comp, som)) {
return true;
}
// If we're in prefiltering mode, we can run the prefilter reductions and
// have another shot at accepting the graph.
- if (cc.grey.prefilterReductions && expr.prefilter) {
- for (auto &gc : g_comp) {
- if (!gc) {
+ if (cc.grey.prefilterReductions && expr.prefilter) {
+ for (auto &gc : g_comp) {
+ if (!gc) {
continue;
}
- prefilterReductions(*gc, cc);
+ prefilterReductions(*gc, cc);
}
- if (processComponents(*this, expr, g_comp, som)) {
+ if (processComponents(*this, expr, g_comp, som)) {
return true;
}
}
@@ -505,7 +505,7 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
if (g_comp[i]) {
DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
i, num_vertices(*g_comp[i]));
- throw CompileError(expr.index, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
}
@@ -514,60 +514,60 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
}
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
-bool NG::addHolder(NGHolder &g) {
- DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
- assert(allMatchStatesHaveReports(g));
- assert(hasCorrectlyNumberedVertices(g));
+bool NG::addHolder(NGHolder &g) {
+ DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
+ assert(allMatchStatesHaveReports(g));
+ assert(hasCorrectlyNumberedVertices(g));
/* We don't update the global minWidth here as we care about the min width
* of the whole pattern - not a just a prefix of it. */
bool prefilter = false;
- //dumpDotComp(comp, g, *this, 20, "prefix_init");
+ //dumpDotComp(comp, g, *this, 20, "prefix_init");
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
themselves track som */
bool utf8 = false; // handling done earlier
- reduceGraph(g, som, utf8, cc);
+ reduceGraph(g, som, utf8, cc);
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
- removeRegionRedundancy(g, som);
+ removeRegionRedundancy(g, som);
}
// "Short Exhaustible Passthrough" patterns always become outfixes.
- if (isSEP(g, rm, cc.grey)) {
+ if (isSEP(g, rm, cc.grey)) {
DEBUG_PRINTF("graph is SEP\n");
- if (rose->addOutfix(g)) {
+ if (rose->addOutfix(g)) {
return true;
}
}
- if (splitOffAnchoredAcyclic(*rose, g, cc)) {
+ if (splitOffAnchoredAcyclic(*rose, g, cc)) {
return true;
}
- if (handleSmallLiteralSets(*rose, g, cc)
- || handleFixedWidth(*rose, g, cc.grey)) {
+ if (handleSmallLiteralSets(*rose, g, cc)
+ || handleFixedWidth(*rose, g, cc.grey)) {
return true;
}
- if (handleDecoratedLiterals(*rose, g, cc)) {
+ if (handleDecoratedLiterals(*rose, g, cc)) {
return true;
}
- if (doViolet(*rose, g, prefilter, false, rm, cc)) {
+ if (doViolet(*rose, g, prefilter, false, rm, cc)) {
return true;
}
- if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
+ if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
return true;
}
- if (doViolet(*rose, g, prefilter, true, rm, cc)) {
+ if (doViolet(*rose, g, prefilter, true, rm, cc)) {
return true;
}
DEBUG_PRINTF("trying for outfix\n");
- if (rose->addOutfix(g)) {
+ if (rose->addOutfix(g)) {
DEBUG_PRINTF("ok\n");
return true;
}
@@ -617,8 +617,8 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
minWidth = min(minWidth, depth(literal.length()));
- /* inform small write handler about this literal */
- smwr->add(literal, id);
+ /* inform small write handler about this literal */
+ smwr->add(literal, id);
return true;
}