diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp | 814 |
1 files changed, 407 insertions, 407 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp index 6eb23113f3..4be5b73f77 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,22 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Propagate extended parameters to vertex reports and reduce graph if * possible. * * This code handles the propagation of the extension parameters specified by - * the user with the \ref hs_expr_ext structure into the reports on the graph's + * the user with the \ref hs_expr_ext structure into the reports on the graph's * vertices. * * There are also some analyses that prune edges that cannot contribute to a * match given these constraints, or transform the graph in order to make a * constraint implicit. */ - -#include "ng_extparam.h" - + +#include "ng_extparam.h" + #include "ng.h" #include "ng_depth.h" #include "ng_dump.h" @@ -51,7 +51,7 @@ #include "ng_width.h" #include "ng_util.h" #include "ue2common.h" -#include "compiler/compiler.h" +#include "compiler/compiler.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/compile_error.h" @@ -69,28 +69,28 @@ namespace ue2 { static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; -/** True if all the given reports have the same extparam bounds. */ -template<typename Container> -bool hasSameBounds(const Container &reports, const ReportManager &rm) { - assert(!reports.empty()); - - const auto &first = rm.getReport(*reports.begin()); - for (auto id : reports) { - const auto &report = rm.getReport(id); - if (report.minOffset != first.minOffset || - report.maxOffset != first.maxOffset || - report.minLength != first.minLength) { - return false; - } - } - - return true; -} - -/** - * \brief Find the (min, max) offset adjustment for the reports on a given - * vertex. - */ +/** True if all the given reports have the same extparam bounds. */ +template<typename Container> +bool hasSameBounds(const Container &reports, const ReportManager &rm) { + assert(!reports.empty()); + + const auto &first = rm.getReport(*reports.begin()); + for (auto id : reports) { + const auto &report = rm.getReport(id); + if (report.minOffset != first.minOffset || + report.maxOffset != first.maxOffset || + report.minLength != first.minLength) { + return false; + } + } + + return true; +} + +/** + * \brief Find the (min, max) offset adjustment for the reports on a given + * vertex. + */ static pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm, const NGHolder &g, NFAVertex v) { @@ -151,76 +151,76 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { return match_depths; } -template<typename Function> -void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, - Function func) { +template<typename Function> +void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen, + Function func) { for (auto v : inv_adjacent_vertices_range(accept, g)) { if (v == g.accept) { - // Don't operate on accept: the accept->acceptEod edge is stylised. + // Don't operate on accept: the accept->acceptEod edge is stylised. assert(accept == g.acceptEod); - assert(g[v].reports.empty()); + assert(g[v].reports.empty()); continue; } - if (!seen.insert(v).second) { - continue; // We have already processed v. + if (!seen.insert(v).second) { + continue; // We have already processed v. } auto &reports = g[v].reports; - if (reports.empty()) { - continue; - } - decltype(g[v].reports) new_reports; - for (auto id : g[v].reports) { - new_reports.insert(func(v, id)); - } - reports = std::move(new_reports); - } -} - -/** - * Generic function for replacing all the reports in the graph. - * - * Pass this a function that takes a vertex and a ReportID returns another - * ReportID (or the same one) to replace it with. - */ -template<typename Function> -void replaceReports(NGHolder &g, Function func) { - flat_set<NFAVertex> seen; - replaceReports(g, g.accept, seen, func); - replaceReports(g, g.acceptEod, seen, func); -} - -/** \brief Replace the graph's reports with new reports that specify bounds. */ -static -void updateReportBounds(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr) { - DEBUG_PRINTF("updating report bounds\n"); - replaceReports(g, [&](NFAVertex, ReportID id) { - Report report = rm.getReport(id); // make a copy - assert(!report.hasBounds()); - - // Note that we need to cope with offset adjustment here. - - report.minOffset = expr.min_offset - report.offsetAdjust; - if (expr.max_offset == MAX_OFFSET) { - report.maxOffset = MAX_OFFSET; - } else { - report.maxOffset = expr.max_offset - report.offsetAdjust; - } - assert(report.maxOffset >= report.minOffset); - - report.minLength = expr.min_length; - if (expr.min_length && !expr.som) { - report.quashSom = true; + if (reports.empty()) { + continue; + } + decltype(g[v].reports) new_reports; + for (auto id : g[v].reports) { + new_reports.insert(func(v, id)); + } + reports = std::move(new_reports); + } +} + +/** + * Generic function for replacing all the reports in the graph. + * + * Pass this a function that takes a vertex and a ReportID returns another + * ReportID (or the same one) to replace it with. + */ +template<typename Function> +void replaceReports(NGHolder &g, Function func) { + flat_set<NFAVertex> seen; + replaceReports(g, g.accept, seen, func); + replaceReports(g, g.acceptEod, seen, func); +} + +/** \brief Replace the graph's reports with new reports that specify bounds. */ +static +void updateReportBounds(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + DEBUG_PRINTF("updating report bounds\n"); + replaceReports(g, [&](NFAVertex, ReportID id) { + Report report = rm.getReport(id); // make a copy + assert(!report.hasBounds()); + + // Note that we need to cope with offset adjustment here. + + report.minOffset = expr.min_offset - report.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { + report.maxOffset = MAX_OFFSET; + } else { + report.maxOffset = expr.max_offset - report.offsetAdjust; + } + assert(report.maxOffset >= report.minOffset); + + report.minLength = expr.min_length; + if (expr.min_length && !expr.som) { + report.quashSom = true; } - DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " - "min_length=%llu\n", id, report.minOffset, - report.maxOffset, report.minLength); - - return rm.getInternalId(report); - }); + DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " + "min_length=%llu\n", id, report.minOffset, + report.maxOffset, report.minLength); + + return rm.getInternalId(report); + }); } static @@ -233,93 +233,93 @@ bool hasVirtualStarts(const NGHolder &g) { return false; } -/** Set the min_length param for all reports to zero. */ -static -void clearMinLengthParam(NGHolder &g, ReportManager &rm) { - DEBUG_PRINTF("clearing min length\n"); - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - new_report.minLength = 0; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for - * all reports. - */ -static -void clearOffsetParams(NGHolder &g, ReportManager &rm) { - DEBUG_PRINTF("clearing min and max offset\n"); - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - new_report.minOffset = 0; - new_report.maxOffset = MAX_OFFSET; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * If the pattern is unanchored, has a max_offset and has not asked for SOM, we - * can use that knowledge to anchor it which will limit its lifespan. Note that - * we can't use this transformation if there's a min_length, as it's currently - * handled using "sly SOM". +/** Set the min_length param for all reports to zero. */ +static +void clearMinLengthParam(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min length\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for + * all reports. + */ +static +void clearOffsetParams(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min and max offset\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minOffset = 0; + new_report.maxOffset = MAX_OFFSET; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * If the pattern is unanchored, has a max_offset and has not asked for SOM, we + * can use that knowledge to anchor it which will limit its lifespan. Note that + * we can't use this transformation if there's a min_length, as it's currently + * handled using "sly SOM". * * Note that it is possible to handle graphs that have a combination of * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { - if (!isFloating(g)) { - return false; - } - - const auto &reports = all_reports(g); - if (reports.empty()) { - return false; - } - - if (any_of_in(reports, [&](ReportID id) { - const auto &report = rm.getReport(id); - return report.maxOffset == MAX_OFFSET || report.minLength || - report.offsetAdjust; - })) { - return false; - } - - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("mixed report bounds\n"); - return false; - } - - const depth minWidth = findMinWidth(g); - const depth maxWidth = findMaxWidth(g); - +bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { + if (!isFloating(g)) { + return false; + } + + const auto &reports = all_reports(g); + if (reports.empty()) { + return false; + } + + if (any_of_in(reports, [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.maxOffset == MAX_OFFSET || report.minLength || + report.offsetAdjust; + })) { + return false; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const depth minWidth = findMinWidth(g); + const depth maxWidth = findMaxWidth(g); + assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); - const auto &first_report = rm.getReport(*reports.begin()); - const auto min_offset = first_report.minOffset; - const auto max_offset = first_report.maxOffset; - assert(max_offset < MAX_OFFSET); - + const auto &first_report = rm.getReport(*reports.begin()); + const auto min_offset = first_report.minOffset; + const auto max_offset = first_report.maxOffset; + assert(max_offset < MAX_OFFSET); + DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), - min_offset, max_offset); + minWidth.str().c_str(), maxWidth.str().c_str(), + min_offset, max_offset); - if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } - if (max_offset < minWidth) { + if (max_offset < minWidth) { assert(0); return false; } @@ -340,10 +340,10 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = max_offset - minWidth; + max_bound = max_offset - minWidth; } else { - min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; - max_bound = max_offset - minWidth; + min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; + max_bound = max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -393,44 +393,44 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { add_edge(u, v, g); } - renumber_vertices(g); - renumber_edges(g); - - if (minWidth == maxWidth) { - // For a fixed width pattern, we can retire the offsets as - // they are implicit in the graph now. - clearOffsetParams(g, rm); - } + renumber_vertices(g); + renumber_edges(g); - clearReports(g); + if (minWidth == maxWidth) { + // For a fixed width pattern, we can retire the offsets as + // they are implicit in the graph now. + clearOffsetParams(g, rm); + } + + clearReports(g); return true; } static NFAVertex findSingleCyclic(const NGHolder &g) { - NFAVertex v = NGHolder::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); for (const auto &e : edges_range(g)) { if (source(e, g) == target(e, g)) { if (source(e, g) == g.startDs) { continue; } - if (v != NGHolder::null_vertex()) { + if (v != NGHolder::null_vertex()) { // More than one cyclic vertex. - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } v = source(e, g); } } - if (v != NGHolder::null_vertex()) { - DEBUG_PRINTF("cyclic is %zu\n", g[v].index); + if (v != NGHolder::null_vertex()) { + DEBUG_PRINTF("cyclic is %zu\n", g[v].index); assert(!is_special(v, g)); } return v; } static -bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, +bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -451,30 +451,30 @@ bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, return true; } -/** - * If the pattern has a min_length and is of "ratchet" form with one unbounded +/** + * If the pattern has a min_length and is of "ratchet" form with one unbounded * repeat, that repeat can become a bounded repeat. * * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { - const auto &reports = all_reports(g); - - if (reports.empty()) { - return false; - } +bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { + const auto &reports = all_reports(g); - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("mixed report bounds\n"); - return false; - } - - const auto &min_length = rm.getReport(*reports.begin()).minLength; - if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { + if (reports.empty()) { return false; } + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const auto &min_length = rm.getReport(*reports.begin()).minLength; + if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { + return false; + } + // If the pattern has virtual starts, we probably don't want to touch it. if (hasVirtualStarts(g)) { DEBUG_PRINTF("virtual starts, bailing\n"); @@ -484,11 +484,11 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // The graph must contain a single cyclic vertex (other than startDs), and // that vertex can have one pred and one successor. NFAVertex cyclic = findSingleCyclic(g); - if (cyclic == NGHolder::null_vertex()) { + if (cyclic == NGHolder::null_vertex()) { return false; } - NGHolder::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; tie(ai, ae) = adjacent_vertices(g.start, g); if (*ai == g.startDs) { ++ai; @@ -504,9 +504,9 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); + auto succ = succs(v, g); if (contains(succ, cyclic)) { if (succ.size() == 1) { v = cyclic; @@ -534,7 +534,7 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Check the cyclic state is A-OK. v = getSoleDestVertex(g, cyclic); - if (v == NGHolder::null_vertex()) { + if (v == NGHolder::null_vertex()) { DEBUG_PRINTF("cyclic has more than one successor\n"); return false; } @@ -542,9 +542,9 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { // Walk from the cyclic state to an accept and ensure we have a chain of // vertices. while (!is_any_accept(v, g)) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); + auto succ = succs(v, g); if (succ.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; @@ -559,20 +559,20 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); width += offsetAdjust; - DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, + DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= min_length) { + if (width >= min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - min_length, width); - clearMinLengthParam(g, rm); + min_length, width); + clearMinLengthParam(g, rm); return true; } vector<NFAVertex> preds; vector<NFAEdge> dead; for (auto u : inv_adjacent_vertices_range(cyclic, g)) { - DEBUG_PRINTF("pred %zu\n", g[u].index); + DEBUG_PRINTF("pred %zu\n", g[u].index); if (u == cyclic) { continue; } @@ -593,7 +593,7 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < min_length - width - 1; ++i) { + for (u32 i = 0; i < min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -608,22 +608,22 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { add_edge(u, cyclic, g); } - renumber_vertices(g); - renumber_edges(g); - clearMinLengthParam(g, rm); + renumber_vertices(g); + renumber_edges(g); + clearMinLengthParam(g, rm); clearReports(g); return true; } static -bool hasExtParams(const ExpressionInfo &expr) { - if (expr.min_length != 0) { +bool hasExtParams(const ExpressionInfo &expr) { + if (expr.min_length != 0) { return true; } - if (expr.min_offset != 0) { + if (expr.min_offset != 0) { return true; } - if (expr.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { return true; } return false; @@ -650,13 +650,13 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGHolder &g, const Report &report, +bool isEdgePrunable(const NGHolder &g, const Report &report, const vector<NFAVertexBidiDepth> &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); + DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); // Leave our special-to-special edges alone. if (is_special(u, g) && is_special(v, g)) { @@ -679,29 +679,29 @@ bool isEdgePrunable(const NGHolder &g, const Report &report, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (report.minOffset) { - depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < report.minOffset) { + if (report.minOffset) { + depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv); + if (max_offset.is_finite() && max_offset < report.minOffset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (report.maxOffset != MAX_OFFSET) { + if (report.maxOffset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > report.maxOffset) { + if (min_offset > report.maxOffset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (report.minLength && is_any_accept(v, g)) { + if (report.minLength && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. - const depth &width = maxDistFromInit(du); - if (width.is_finite() && width < report.minLength) { + const depth &width = maxDistFromInit(du); + if (width.is_finite() && width < report.minLength) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -712,25 +712,25 @@ bool isEdgePrunable(const NGHolder &g, const Report &report, } static -void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { - const auto &reports = all_reports(g); - if (reports.empty()) { - return; - } - - if (!hasSameBounds(reports, rm)) { - DEBUG_PRINTF("report bounds vary\n"); - return; - } - - const auto &report = rm.getReport(*reports.begin()); - - auto depths = calcBidiDepths(g); - +void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { + const auto &reports = all_reports(g); + if (reports.empty()) { + return; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("report bounds vary\n"); + return; + } + + const auto &report = rm.getReport(*reports.begin()); + + auto depths = calcBidiDepths(g); + vector<NFAEdge> dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, report, depths, e)) { + if (isEdgePrunable(g, report, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -742,45 +742,45 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { remove_edges(dead, g); pruneUseless(g); - clearReports(g); + clearReports(g); } -/** - * Remove vacuous edges in graphs where the min_offset or min_length - * constraints dictate that they can never produce a match. - */ +/** + * Remove vacuous edges in graphs where the min_offset or min_length + * constraints dictate that they can never produce a match. + */ static -void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { +void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { vector<NFAEdge> dead; - auto has_min_offset = [&](NFAVertex v) { - assert(!g[v].reports.empty()); // must be reporter - return all_of_in(g[v].reports, [&](ReportID id) { - return rm.getReport(id).minOffset > 0; - }); - }; - - auto has_min_length = [&](NFAVertex v) { - assert(!g[v].reports.empty()); // must be reporter - return all_of_in(g[v].reports, [&](ReportID id) { - return rm.getReport(id).minLength > 0; - }); - }; - + auto has_min_offset = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minOffset > 0; + }); + }; + + auto has_min_length = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minLength > 0; + }); + }; + for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - // Special case: Crudely remove vacuous edges from start in graphs with - // a min_offset. - if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { + // Special case: Crudely remove vacuous edges from start in graphs with + // a min_offset. + if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { + if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -791,14 +791,14 @@ void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { return; } - DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); + DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); remove_edges(dead, g); pruneUseless(g); - clearReports(g); + clearReports(g); } static -void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, +void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, const ReportManager &rm, NFAVertex accept) { vector<NFAEdge> dead; @@ -809,11 +809,11 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, continue; } - if (!hasSameBounds(g[v].reports, rm)) { - continue; - } - const auto &report = rm.getReport(*g[v].reports.begin()); - + if (!hasSameBounds(g[v].reports, rm)) { + continue; + } + const auto &report = rm.getReport(*g[v].reports.begin()); + u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v); @@ -822,16 +822,16 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < report.minLength) { + if (d.max.is_finite() && d.max < report.minLength) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), report.minLength); + d.max.str().c_str(), report.minLength); dead.push_back(e); continue; } - if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { + if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), report.maxOffset); + d.min.str().c_str(), report.maxOffset); dead.push_back(e); continue; } @@ -840,15 +840,15 @@ void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths, remove_edges(dead, g); } -/** - * Remove edges to accepts that can never produce a match long enough to - * satisfy our min_length and max_offset constraints. - */ +/** + * Remove edges to accepts that can never produce a match long enough to + * satisfy our min_length and max_offset constraints. + */ static -void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { - if (!any_of_in(all_reports(g), [&](ReportID id) { - return rm.getReport(id).minLength > 0; - })) { +void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { + if (!any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { return; } @@ -858,19 +858,19 @@ void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); - clearReports(g); + clearReports(g); } static bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { - return any_of_in(all_reports(g), [&rm](ReportID id) { - return rm.getReport(id).offsetAdjust != 0; - }); + return any_of_in(all_reports(g), [&rm](ReportID id) { + return rm.getReport(id).offsetAdjust != 0; + }); } -void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, - ReportManager &rm) { - if (!hasExtParams(expr)) { +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm) { + if (!hasExtParams(expr)) { return; } @@ -882,154 +882,154 @@ void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" - << expr.min_offset << " as it can only produce matches of length " + << expr.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; - throw CompileError(expr.index, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (minWidth > expr.max_offset) { + if (minWidth > expr.max_offset) { ostringstream oss; - oss << "Expression has max_offset=" << expr.max_offset - << " but requires " << minWidth << " bytes to match."; - throw CompileError(expr.index, oss.str()); + oss << "Expression has max_offset=" << expr.max_offset + << " but requires " << minWidth << " bytes to match."; + throw CompileError(expr.index, oss.str()); } - if (maxWidth.is_finite() && match_depths.max < expr.min_length) { + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { ostringstream oss; - oss << "Expression has min_length=" << expr.min_length << " but can " + oss << "Expression has min_length=" << expr.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; - throw CompileError(expr.index, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (expr.min_length && expr.min_length <= match_depths.min) { + if (expr.min_length && expr.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", - expr.min_length); - expr.min_length = 0; - } - - if (!hasExtParams(expr)) { - return; + expr.min_length); + expr.min_length = 0; } - updateReportBounds(rm, g, expr); -} - -/** - * If the pattern is completely anchored and has a min_length set, this can - * be converted to a min_offset. - */ -static -void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { - if (has_proper_successor(g.startDs, g)) { - return; // not wholly anchored - } - - replaceReports(g, [&rm](NFAVertex, ReportID id) { - const auto &report = rm.getReport(id); - if (report.minLength) { - Report new_report = report; - u64a min_len_offset = report.minLength - report.offsetAdjust; - new_report.minOffset = max(report.minOffset, min_len_offset); - new_report.minLength = 0; - return rm.getInternalId(new_report); - } - return id; - }); -} - -/** - * Clear offset bounds on reports that are not needed because they're satisfied - * by vertex depth. - */ -static -void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { - auto depths = calcDepths(g); - - replaceReports(g, [&](NFAVertex v, ReportID id) { - const auto &d = depths.at(g[v].index); - const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); - const depth &max_depth = maxDistFromStartOfData(d); - - DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, - min_depth.str().c_str(), max_depth.str().c_str()); - - Report report = rm.getReport(id); // copy - bool modified = false; - if (report.minOffset && !report.offsetAdjust && - report.minOffset <= min_depth) { - report.minOffset = 0; - modified = true; - } - if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && - report.maxOffset >= max_depth) { - report.maxOffset = MAX_OFFSET; - modified = true; - } - if (modified) { - DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", - g[v].index, report.minOffset, report.maxOffset); - return rm.getInternalId(report); - } - - return id; - }); -} - -void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { - if (!any_of_in(all_reports(g), - [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { - DEBUG_PRINTF("no extparam bounds\n"); - return; - } - - DEBUG_PRINTF("graph has extparam bounds\n"); - - pruneVacuousEdges(g, rm); - if (can_never_match(g)) { + if (!hasExtParams(expr)) { return; } - pruneUnmatchable(g, rm); - if (can_never_match(g)) { - return; - } - - if (!hasOffsetAdjustments(rm, g)) { - pruneExtUnreachable(g, rm); - if (can_never_match(g)) { - return; - } - } - - replaceMinLengthWithOffset(g, rm); - if (can_never_match(g)) { + updateReportBounds(rm, g, expr); +} + +/** + * If the pattern is completely anchored and has a min_length set, this can + * be converted to a min_offset. + */ +static +void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { + if (has_proper_successor(g.startDs, g)) { + return; // not wholly anchored + } + + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + u64a min_len_offset = report.minLength - report.offsetAdjust; + new_report.minOffset = max(report.minOffset, min_len_offset); + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Clear offset bounds on reports that are not needed because they're satisfied + * by vertex depth. + */ +static +void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { + auto depths = calcDepths(g); + + replaceReports(g, [&](NFAVertex v, ReportID id) { + const auto &d = depths.at(g[v].index); + const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); + const depth &max_depth = maxDistFromStartOfData(d); + + DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, + min_depth.str().c_str(), max_depth.str().c_str()); + + Report report = rm.getReport(id); // copy + bool modified = false; + if (report.minOffset && !report.offsetAdjust && + report.minOffset <= min_depth) { + report.minOffset = 0; + modified = true; + } + if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && + report.maxOffset >= max_depth) { + report.maxOffset = MAX_OFFSET; + modified = true; + } + if (modified) { + DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", + g[v].index, report.minOffset, report.maxOffset); + return rm.getInternalId(report); + } + + return id; + }); +} + +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { + if (!any_of_in(all_reports(g), + [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { + DEBUG_PRINTF("no extparam bounds\n"); + return; + } + + DEBUG_PRINTF("graph has extparam bounds\n"); + + pruneVacuousEdges(g, rm); + if (can_never_match(g)) { + return; + } + + pruneUnmatchable(g, rm); + if (can_never_match(g)) { + return; + } + + if (!hasOffsetAdjustments(rm, g)) { + pruneExtUnreachable(g, rm); + if (can_never_match(g)) { + return; + } + } + + replaceMinLengthWithOffset(g, rm); + if (can_never_match(g)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - transformMinLengthToRepeat(g, rm); - if (can_never_match(g)) { - return; + transformMinLengthToRepeat(g, rm); + if (can_never_match(g)) { + return; } // If the pattern is unanchored, has a max_offset and has not asked for // SOM, we can use that knowledge to anchor it which will limit its // lifespan. Note that we can't use this transformation if there's a // min_length, as it's currently handled using "sly SOM". - if (som == SOM_NONE) { - anchorPatternWithBoundedRepeat(g, rm); - if (can_never_match(g)) { - return; + if (som == SOM_NONE) { + anchorPatternWithBoundedRepeat(g, rm); + if (can_never_match(g)) { + return; } } - removeUnneededOffsetBounds(g, rm); + removeUnneededOffsetBounds(g, rm); } } // namespace ue2 |