aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp')
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp434
1 files changed, 434 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
new file mode 100644
index 0000000000..adda70312f
--- /dev/null
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for pruning unreachable vertices or reports from the graph.
+ */
+#include "ng_prune.h"
+
+#include "ng_dominators.h"
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/graph_small_color_map.h"
+#include "util/report_manager.h"
+
+#include <deque>
+#include <map>
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::default_color_type;
+using boost::reverse_graph;
+
+namespace ue2 {
+
+/** Remove any vertices that can't be reached by traversing the graph in
+ * reverse from acceptEod. */
+void pruneUnreachable(NGHolder &g) {
+ deque<NFAVertex> dead;
+
+ if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g)
+ && edge(g.accept, g.acceptEod, g).second) {
+ // Trivial case: there are no in-edges to our accepts (other than
+ // accept->acceptEod), so all non-specials are unreachable.
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g)) {
+ dead.push_back(v);
+ }
+ }
+ } else {
+ // Walk a reverse graph from acceptEod with Boost's depth_first_visit
+ // call.
+ typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph;
+ RevNFAGraph revg(g);
+
+ map<RevNFAGraph::vertex_descriptor, default_color_type> colours;
+
+ depth_first_visit(revg, g.acceptEod,
+ make_dfs_visitor(boost::null_visitor()),
+ make_assoc_property_map(colours));
+
+ DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
+
+ // All non-special vertices that aren't in the colour map (because they
+ // weren't reached) can be removed.
+ for (auto v : vertices_range(revg)) {
+ if (is_special(v, revg)) {
+ continue;
+ }
+ if (!contains(colours, v)) {
+ dead.push_back(v);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ DEBUG_PRINTF("no unreachable vertices\n");
+ return;
+ }
+
+ remove_vertices(dead, g, false);
+ DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
+}
+
+template<class nfag_t>
+static
+bool pruneForwardUseless(NGHolder &h, const nfag_t &g,
+ typename nfag_t::vertex_descriptor s,
+ decltype(make_small_color_map(NGHolder())) &colors) {
+ // Begin with all vertices set to white, as DFV only marks visited
+ // vertices.
+ colors.fill(small_color::white);
+
+ depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors);
+
+ vector<NFAVertex> dead;
+
+ // All non-special vertices that are still white can be removed.
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g) && get(colors, v) == small_color::white) {
+ DEBUG_PRINTF("vertex %zu is unreachable from %zu\n",
+ g[v].index, g[s].index);
+ dead.push_back(NFAVertex(v));
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu vertices\n", dead.size());
+ remove_vertices(dead, h, false);
+ return true;
+}
+
+/** Remove any vertices which can't be reached by traversing the graph forward
+ * from start or in reverse from acceptEod. If \p renumber is false, no
+ * vertex/edge renumbering is done. */
+void pruneUseless(NGHolder &g, bool renumber) {
+ DEBUG_PRINTF("pruning useless vertices\n");
+ assert(hasCorrectlyNumberedVertices(g));
+ auto colors = make_small_color_map(g);
+
+ bool work_done = pruneForwardUseless(g, g, g.start, colors);
+ work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g),
+ g.acceptEod, colors);
+
+ if (!work_done) {
+ return;
+ }
+
+ if (renumber) {
+ renumber_edges(g);
+ renumber_vertices(g);
+ }
+}
+
+/** This code removes any vertices which do not accept any symbols. Any
+ * vertices which no longer lie on a path from a start to an accept are also
+ * pruned. */
+void pruneEmptyVertices(NGHolder &g) {
+ DEBUG_PRINTF("pruning empty vertices\n");
+ vector<NFAVertex> dead;
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+ if (cr.none()) {
+ DEBUG_PRINTF("empty: %zu\n", g[v].index);
+ dead.push_back(v);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_vertices(dead, g);
+ pruneUseless(g);
+}
+
+/** Remove any edges from vertices that generate accepts (for Highlander
+ * graphs). */
+void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
+ // Safety check: all reports must be simple exhaustible reports, or this is
+ // not safe. This optimisation should be called early enough that no
+ // internal reports have been added.
+ for (auto report_id : all_reports(g)) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
+ !isExternalReport(ir)) {
+ DEBUG_PRINTF("report %u is not external highlander with "
+ "no bounds\n", report_id);
+ return;
+ }
+ }
+
+ vector<NFAEdge> dead;
+ for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
+ // We can prune any out-edges that aren't accepts
+ for (const auto &e : out_edges_range(u, g)) {
+ if (!is_any_accept(target(e, g), g)) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+}
+
+static
+bool isDominatedByReporter(const NGHolder &g,
+ const unordered_map<NFAVertex, NFAVertex> &dom,
+ NFAVertex v, ReportID report_id) {
+ for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
+ NFAVertex u = it->second;
+ // Note: reporters with edges only to acceptEod are not considered to
+ // dominate.
+ if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
+ DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n",
+ g[v].index, g[u].index, report_id);
+ return true;
+ }
+ v = u;
+ }
+ return false;
+}
+
+/**
+ * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
+ * itself and (c) only simple exhaustible reports.
+ */
+static
+bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
+ const ReportManager &rm,
+ NFAVertex v) {
+ if (!edge(v, v, g).second) {
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && w != g.accept) {
+ return false;
+ }
+ }
+
+ for (const auto &report_id : g[v].reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
+ vector<NFAVertex> reporters;
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+
+ if (reporters.empty()) {
+ return;
+ }
+
+
+ sort(begin(reporters), end(reporters));
+ reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
+
+ DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
+ reporters.size());
+
+ const auto &dom = findDominators(g);
+ bool modified = false;
+
+ // If a reporter vertex is dominated by another with the same report, we
+ // can remove that report; if all reports are removed, we can remove the
+ // vertex entirely.
+ for (const auto v : reporters) {
+ const auto reports = g[v].reports; // copy, as we're going to mutate
+ for (const auto &report_id : reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ continue;
+ }
+ if (isDominatedByReporter(g, dom, v, report_id)) {
+ DEBUG_PRINTF("removed dominated report %u from vertex %zu\n",
+ report_id, g[v].index);
+ g[v].reports.erase(report_id);
+ }
+ }
+
+ if (g[v].reports.empty()) {
+ DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n",
+ g[v].index);
+ remove_edge(v, g.accept, g);
+ remove_edge(v, g.acceptEod, g);
+ modified = true;
+ }
+ }
+
+ // If a reporter vertex has a self-loop, but otherwise only leads to accept
+ // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
+ // the self-loop.
+ for (const auto v : reporters) {
+ if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
+ remove_edge(v, v, g);
+ modified = true;
+ DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index);
+ }
+ }
+
+ if (!modified) {
+ return;
+ }
+
+ pruneUseless(g);
+
+ // We may have only removed self-loops, in which case pruneUseless wouldn't
+ // renumber, so we do edge renumbering explicitly here.
+ renumber_edges(g);
+}
+
+/** Removes the given Report ID from vertices connected to accept, and then
+ * prunes useless vertices that have had their report sets reduced to empty. */
+void pruneReport(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
+ renumber_vertices(g);
+ renumber_edges(g);
+}
+
+/** Removes all Report IDs bar the given one from vertices connected to accept,
+ * and then prunes useless vertices that have had their report sets reduced to
+ * empty. */
+void pruneAllOtherReports(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
+ renumber_vertices(g);
+ renumber_edges(g);
+}
+
+} // namespace ue2