aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_merge.cpp')
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_merge.cpp2818
1 files changed, 2818 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
new file mode 100644
index 0000000000..5066dbd578
--- /dev/null
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
@@ -0,0 +1,2818 @@
+/*
+ * Copyright (c) 2015-2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build: functions for reducing the size of the Rose graph
+ * through merging.
+ */
+#include "rose_build_merge.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_haig.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_puff.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/hash.h"
+#include "util/insertion_ordered.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/unordered.h"
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+static const size_t NARROW_START_MAX = 10;
+static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
+static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
+static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
+static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
+static const size_t MERGE_GROUP_SIZE_MAX = 200;
+static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
+
+/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
+static const size_t DFA_CHUNK_SIZE_MAX = 200;
+
+/** \brief Max DFA states in a merged DFA. */
+static const size_t DFA_MERGE_MAX_STATES = 8000;
+
+/** \brief In block mode, merge two prefixes even if they don't have identical
+ * literal sets if they have fewer than this many states and the merged graph
+ * is also small. */
+static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32;
+
+static
+size_t small_merge_max_vertices(const CompileContext &cc) {
+ return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
+ : SMALL_MERGE_MAX_VERTICES_BLOCK;
+}
+
+static
+size_t small_rose_threshold(const CompileContext &cc) {
+ return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
+ : SMALL_ROSE_THRESHOLD_BLOCK;
+}
+
+/**
+ * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
+ * reports should not contribute to the hash.
+ */
+static
+size_t hashLeftfix(const left_id &left) {
+ size_t val = 0;
+
+ if (left.castle()) {
+ hash_combine(val, left.castle()->reach());
+ for (const auto &pr : left.castle()->repeats) {
+ hash_combine(val, pr.first); // top
+ hash_combine(val, pr.second.bounds);
+ }
+ } else if (left.graph()) {
+ hash_combine(val, hash_holder(*left.graph()));
+ }
+
+ return val;
+}
+
+namespace {
+
+/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
+struct RoseGroup {
+ RoseGroup(const RoseBuildImpl &build, RoseVertex v)
+ : left_hash(hashLeftfix(build.g[v].left)),
+ lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
+ const RoseGraph &g = build.g;
+ assert(in_degree(v, g) == 1);
+ RoseVertex u = *inv_adjacent_vertices(v, g).first;
+ parent = g[u].index;
+ }
+
+ bool operator<(const RoseGroup &b) const {
+ const RoseGroup &a = *this;
+ ORDER_CHECK(parent);
+ ORDER_CHECK(left_hash);
+ ORDER_CHECK(lag);
+ ORDER_CHECK(eod_table);
+ return false;
+ }
+
+private:
+ /** Parent vertex index. We must use the index, rather than the descriptor,
+ * for compile determinism. */
+ size_t parent;
+
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** Leftfix lag value. */
+ u32 lag;
+
+ /** True if associated vertex (successor) is in the EOD table. We don't
+ * allow sharing of leftfix engines between "normal" and EOD operation. */
+ bool eod_table;
+};
+
+/**
+ * Intended to find graphs that are identical except for their report
+ * IDs. Relies on vertex and edge indices to pick up graphs that have been
+ * messily put together in different orderings. Only implemented for castles and
+ * holders.
+ */
+static
+bool is_equal(const left_id &u_left, ReportID u_report,
+ const left_id &v_left, ReportID v_report) {
+ if (u_left.castle() && v_left.castle()) {
+ return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
+ }
+
+ if (!u_left.graph() || !v_left.graph()) {
+ return false;
+ }
+
+ return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
+}
+
+} // namespace
+
+/**
+ * This pass performs work similar to \ref dedupeSuffixes - it removes
+ * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
+ * share the same trigger vertex and lag. Leftfixes are first grouped by
+ * parent role and lag to reduce the number of candidates to be inspected
+ * for each leftfix. The graphs in each cluster are then compared with each
+ * other and the graph is updated to only refer to a canonical version of each
+ * graph.
+ *
+ * Note: only roles with a single predecessor vertex are considered for this
+ * transform - it should probably be generalised to work for roles which share
+ * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
+ * it should be retired entirely.
+ */
+bool dedupeLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping leftfixes\n");
+ map<RoseGroup, deque<RoseVertex>> roses;
+ bool work_done = false;
+
+ /* Note: a leftfix's transientness will not be altered by deduping */
+
+ // Collect leftfixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+ const left_id left(g[v].left);
+
+ if (left.haig()) {
+ /* TODO: allow merging of identical haigs */
+ continue;
+ }
+
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ roses[RoseGroup(tbi, v)].push_back(v);
+ }
+
+ DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
+
+ // Walk groups and dedupe the roses therein.
+ for (deque<RoseVertex> &verts : roses | map_values) {
+ DEBUG_PRINTF("group has %zu vertices\n", verts.size());
+
+ unordered_set<left_id> seen;
+
+ for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
+ RoseVertex v = *jt;
+ left_id left(g[v].left);
+
+ // Skip cases we've already handled, and mark as seen otherwise.
+ if (!seen.insert(left).second) {
+ continue;
+ }
+
+ // Scan the rest of the list for dupes.
+ for (auto kt = std::next(jt); kt != jte; ++kt) {
+ if (g[v].left == g[*kt].left
+ || !is_equal(g[v].left, g[v].left.leftfix_report,
+ g[*kt].left, g[*kt].left.leftfix_report)) {
+ continue;
+ }
+
+ // Dupe found.
+ DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
+ g[*kt].index, g[v].index);
+ assert(g[v].left.lag == g[*kt].left.lag);
+ g[*kt].left = g[v].left;
+ work_done = true;
+ }
+ }
+ }
+
+ return work_done;
+}
+
+/**
+ * \brief Returns a numeric key that can be used to group this suffix with
+ * others that may be its duplicate.
+ */
+static
+size_t suffix_size_key(const suffix_id &s) {
+ if (s.graph()) {
+ return num_vertices(*s.graph());
+ }
+ if (s.castle()) {
+ return s.castle()->repeats.size();
+ }
+ return 0;
+}
+
+static
+bool is_equal(const suffix_id &s1, const suffix_id &s2) {
+ if (s1.graph() && s2.graph()) {
+ return is_equal(*s1.graph(), *s2.graph());
+ } else if (s1.castle() && s2.castle()) {
+ return is_equal(*s1.castle(), *s2.castle());
+ }
+ return false;
+}
+
+/**
+ * This function simply looks for suffix NGHolder graphs which are identical
+ * and updates the roles in the RoseGraph to refer to only a single copy. This
+ * obviously has benefits in terms of both performance (as we don't run
+ * multiple engines doing the same work) and stream state. This function first
+ * groups all suffixes by number of vertices and report set to restrict the set
+ * of possible candidates. Each group is then walked to find duplicates using
+ * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
+ * goes.
+ *
+ * Note: does not dedupe suffixes of vertices in the EOD table.
+ */
+void dedupeSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping suffixes\n");
+
+ unordered_map<suffix_id, set<RoseVertex>> suffix_map;
+ map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
+
+ // Collect suffixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix || tbi.isInETable(v)) {
+ continue;
+ }
+
+ const suffix_id s(g[v].suffix);
+
+ if (!(s.graph() || s.castle())) {
+ continue; // e.g. Haig
+ }
+
+ set<RoseVertex> &verts = suffix_map[s];
+ if (verts.empty()) {
+ part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
+ }
+ verts.insert(v);
+ }
+
+ DEBUG_PRINTF("collected %zu groups\n", part.size());
+
+ for (const auto &cand : part | map_values) {
+ if (cand.size() <= 1) {
+ continue;
+ }
+ DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
+
+ for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
+ if (suffix_map[*jt].empty()) {
+ continue;
+ }
+ for (auto kt = next(jt); kt != cand.end(); ++kt) {
+ if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
+ continue;
+ }
+ DEBUG_PRINTF("found dupe\n");
+ for (auto v : suffix_map[*kt]) {
+ RoseVertex dupe = *suffix_map[*jt].begin();
+ assert(dupe != v);
+ g[v].suffix.graph = g[dupe].suffix.graph;
+ g[v].suffix.castle = g[dupe].suffix.castle;
+ assert(suffix_id(g[v].suffix) ==
+ suffix_id(g[dupe].suffix));
+ suffix_map[*jt].insert(v);
+ }
+ suffix_map[*kt].clear();
+ }
+ }
+ }
+}
+
+namespace {
+
+/**
+ * This class stores a mapping from an engine reference (left_id, suffix_id,
+ * etc) to a list of vertices, and also allows us to iterate over the set of
+ * engine references in insertion order -- we add to the mapping in vertex
+ * iteration order, so this allows us to provide a consistent ordering.
+ */
+template<class EngineRef>
+class Bouquet {
+private:
+ list<EngineRef> ordering; // Unique list in insert order.
+ using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
+ BouquetMap bouquet;
+public:
+ void insert(const EngineRef &h, RoseVertex v) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet[h].push_back(v);
+ } else {
+ f->second.push_back(v);
+ }
+ }
+
+ void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet.insert(make_pair(h, verts));
+ } else {
+ f->second.insert(f->second.end(), verts.begin(), verts.end());
+ }
+ }
+
+ const deque<RoseVertex> &vertices(const EngineRef &h) const {
+ typename BouquetMap::const_iterator it = bouquet.find(h);
+ assert(it != bouquet.end()); // must be present
+ return it->second;
+ }
+
+ void erase(const EngineRef &h) {
+ assert(bouquet.find(h) != bouquet.end());
+ bouquet.erase(h);
+ ordering.remove(h);
+ }
+
+ /** Remove all the elements in the given iterator range. */
+ template <class Iter>
+ void erase_all(Iter erase_begin, Iter erase_end) {
+ for (Iter it = erase_begin; it != erase_end; ++it) {
+ bouquet.erase(*it);
+ }
+
+ // Use a quick-lookup container so that we only have to traverse the
+ // 'ordering' list once.
+ const set<EngineRef> dead(erase_begin, erase_end);
+ for (iterator it = begin(); it != end(); /* incremented inside */) {
+ if (contains(dead, *it)) {
+ ordering.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+ }
+
+ void clear() {
+ ordering.clear();
+ bouquet.clear();
+ }
+
+ size_t size() const { return bouquet.size(); }
+
+ // iterate over holders in insert order
+ typedef typename list<EngineRef>::iterator iterator;
+ iterator begin() { return ordering.begin(); }
+ iterator end() { return ordering.end(); }
+
+ // const iterate over holders in insert order
+ typedef typename list<EngineRef>::const_iterator const_iterator;
+ const_iterator begin() const { return ordering.begin(); }
+ const_iterator end() const { return ordering.end(); }
+};
+
+typedef Bouquet<left_id> LeftfixBouquet;
+typedef Bouquet<suffix_id> SuffixBouquet;
+
+} // namespace
+
+/**
+ * Split a \ref Bouquet of some type into several smaller ones.
+ */
+template <class EngineRef>
+static void chunkBouquets(const Bouquet<EngineRef> &in,
+ deque<Bouquet<EngineRef>> &out,
+ const size_t chunk_size) {
+ if (in.size() <= chunk_size) {
+ out.push_back(in);
+ return;
+ }
+
+ out.push_back(Bouquet<EngineRef>());
+ for (const auto &engine : in) {
+ if (out.back().size() >= chunk_size) {
+ out.push_back(Bouquet<EngineRef>());
+ }
+ out.back().insert(engine, in.vertices(engine));
+ }
+}
+
+static
+bool stringsCanFinishAtSameSpot(const ue2_literal &u,
+ ue2_literal::const_iterator v_b,
+ ue2_literal::const_iterator v_e) {
+ ue2_literal::const_iterator u_e = u.end();
+ ue2_literal::const_iterator u_b = u.begin();
+
+ while (u_e != u_b && v_e != v_b) {
+ --u_e;
+ --v_e;
+
+ if (!overlaps(*u_e, *v_e)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Check that if after u has been seen, that it is impossible for the arrival of
+ * v to require the inspection of an engine earlier than u did.
+ *
+ * Let delta be the earliest that v can be seen after u (may be zero)
+ *
+ * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
+ * ==> - ulag <= delta - vlag
+ * ==> vlag - ulag <= delta
+ */
+static
+bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
+ const rose_literal_id &vl, const u32 vlag) {
+ DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
+ escapeString(vl.s).c_str(), vlag);
+
+ if (vl.delay || ul.delay) {
+ /* engine related literals should not be delayed anyway */
+ return false;
+ }
+
+ if (ulag >= vlag) {
+ assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
+ return true;
+ }
+
+ size_t min_allowed_delta = vlag - ulag;
+ DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
+
+ for (size_t i = 0; i < min_allowed_delta; i++) {
+ if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+ DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("OK\n");
+ return true;
+}
+
+static
+bool hasSameEngineType(const RoseVertexProps &u_prop,
+ const RoseVertexProps &v_prop) {
+ const left_id u_left = u_prop.left;
+ const left_id v_left = v_prop.left;
+
+ return !u_left.haig() == !v_left.haig()
+ && !u_left.dfa() == !v_left.dfa()
+ && !u_left.castle() == !v_left.castle()
+ && !u_left.graph() == !v_left.graph();
+}
+
+/**
+ * Verifies that merging the leftfix of vertices does not cause conflicts due
+ * to the literals on the right.
+ *
+ * The main concern is that the lags of the literals and overlap between them
+ * allow the engine check offset to potentially regress.
+ *
+ * Parameters are vectors of literals + lag pairs.
+ *
+ * Note: if more constraints of when the leftfixes were going to be checked
+ * (mandatory lookarounds passing, offset checks), more merges may be allowed.
+ */
+static
+bool compatibleLiteralsForMerge(
+ const vector<pair<const rose_literal_id *, u32>> &ulits,
+ const vector<pair<const rose_literal_id *, u32>> &vlits) {
+ assert(!ulits.empty());
+ assert(!vlits.empty());
+
+ // We cannot merge engines that prefix literals in different tables.
+ if (ulits[0].first->table != vlits[0].first->table) {
+ DEBUG_PRINTF("literals in different tables\n");
+ return false;
+ }
+
+ // We don't handle delayed cases yet.
+ for (const auto &ue : ulits) {
+ const rose_literal_id &ul = *ue.first;
+ if (ul.delay) {
+ return false;
+ }
+ }
+
+ for (const auto &ve : vlits) {
+ const rose_literal_id &vl = *ve.first;
+ if (vl.delay) {
+ return false;
+ }
+ }
+
+ /* An engine requires that all accesses to it are ordered by offsets. (ie,
+ we can not check an engine's state at offset Y, if we have already
+ checked its status at offset X and X > Y). If we can not establish that
+ the literals used for triggering will satisfy this property, then it is
+ not safe to merge the engine. */
+ for (const auto &ue : ulits) {
+ const rose_literal_id &ul = *ue.first;
+ u32 ulag = ue.second;
+
+ for (const auto &ve : vlits) {
+ const rose_literal_id &vl = *ve.first;
+ u32 vlag = ve.second;
+
+ if (!checkPrefix(ul, ulag, vl, vlag)
+ || !checkPrefix(vl, vlag, ul, ulag)) {
+ DEBUG_PRINTF("prefix check failed\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
+ * True if this graph has few enough accel states to be implemented as an NFA
+ * with all of those states actually becoming accel schemes.
+ */
+static
+bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) {
+ u32 num = countAccelStates(g, &build.rm, build.cc);
+ DEBUG_PRINTF("graph with %zu vertices has %u accel states\n",
+ num_vertices(g), num);
+ return num <= NFA_MAX_ACCEL_STATES;
+}
+
+/**
+ * In block mode, we want to be a little more selective -- We will only merge
+ * prefix engines when the literal sets are the same or if the merged graph
+ * has only grown by a small amount.
+ */
+static
+bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
+ RoseVertex v) {
+ assert(!build.cc.streaming);
+ assert(build.isRootSuccessor(u) == build.isRootSuccessor(v));
+
+ // Always merge infixes if we can (subject to the other criteria in
+ // mergeableRoseVertices).
+ if (!build.isRootSuccessor(u)) {
+ return true;
+ }
+
+ const RoseGraph &g = build.g;
+
+ // Merge prefixes with identical literal sets (as we'd have to run them
+ // both when we see those literals anyway).
+ if (g[u].literals == g[v].literals) {
+ return true;
+ }
+
+ // The rest of this function only deals with the case when both vertices
+ // have graph leftfixes.
+ if (!g[u].left.graph || !g[v].left.graph) {
+ return false;
+ }
+
+ const size_t u_count = num_vertices(*g[u].left.graph);
+ const size_t v_count = num_vertices(*g[v].left.graph);
+ DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
+ u_count, v_count);
+ if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
+ v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+ DEBUG_PRINTF("prefixes too big already\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("trying merge\n");
+ NGHolder h;
+ cloneHolder(h, *g[v].left.graph);
+ if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) {
+ DEBUG_PRINTF("couldn't merge\n");
+ return false;
+ }
+
+ const size_t merged_count = num_vertices(h);
+ DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
+ if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+ DEBUG_PRINTF("exceeded limit\n");
+ return false;
+ }
+
+ // We want to only perform merges that take advantage of some
+ // commonality in the two input graphs, so we check that the number of
+ // vertices has only grown a small amount: somewhere between the sum
+ // (no commonality) and the max (no growth at all) of the vertex counts
+ // of the input graphs.
+ const size_t max_size = u_count + v_count;
+ const size_t min_size = max(u_count, v_count);
+ const size_t max_growth = ((max_size - min_size) * 25) / 100;
+ if (merged_count > min_size + max_growth) {
+ DEBUG_PRINTF("grew too much\n");
+ return false;
+ }
+
+ // We don't want to squander any chances at accelerating.
+ if (!isAccelerableLeftfix(build, h) &&
+ (isAccelerableLeftfix(build, *g[u].left.graph) ||
+ isAccelerableLeftfix(build, *g[v].left.graph))) {
+ DEBUG_PRINTF("would lose accel property\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("safe to merge\n");
+ return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+ RoseVertex v) {
+ assert(u != v);
+
+ if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
+ return false;
+ }
+
+ if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) {
+ return false;
+ }
+
+ /* We cannot merge prefixes/vertices if they are successors of different
+ * root vertices */
+ if (tbi.isRootSuccessor(u)) {
+ assert(tbi.isRootSuccessor(v));
+ set<RoseVertex> u_preds;
+ set<RoseVertex> v_preds;
+ insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
+ insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
+
+ if (u_preds != v_preds) {
+ return false;
+ }
+ }
+
+ u32 ulag = tbi.g[u].left.lag;
+ vector<pair<const rose_literal_id *, u32>> ulits;
+ ulits.reserve(tbi.g[u].literals.size());
+ for (u32 id : tbi.g[u].literals) {
+ ulits.emplace_back(&tbi.literals.at(id), ulag);
+ }
+
+ u32 vlag = tbi.g[v].left.lag;
+ vector<pair<const rose_literal_id *, u32>> vlits;
+ vlits.reserve(tbi.g[v].literals.size());
+ for (u32 id : tbi.g[v].literals) {
+ vlits.emplace_back(&tbi.literals.at(id), vlag);
+ }
+
+ if (!compatibleLiteralsForMerge(ulits, vlits)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
+ tbi.g[v].index);
+ return true;
+}
+
+/* We cannot merge an engine, if a trigger literal and a post literal overlap
+ * in such a way that engine status needs to be check at a point before the
+ * engine's current location.
+ *
+ * i.e., for a trigger literal u and a pos literal v,
+ * where delta is the earliest v can appear after t,
+ * we require that v_loc - v_lag >= u_loc
+ * ==> u_loc + delta - v_lag >= u_loc
+ * ==> delta >= v_lag
+ *
+ */
+static
+bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
+ u32 vlag) {
+ DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
+ escapeString(vl.s).c_str(), vlag);
+
+ for (size_t i = 0; i < vlag; i++) {
+ if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+ DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("OK\n");
+ return true;
+}
+
+template<typename VertexCont>
+static never_inline
+bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
+ const VertexCont &v2) {
+ flat_set<RoseVertex> preds;
+ for (auto v : v1) {
+ insert(&preds, inv_adjacent_vertices(v, build.g));
+ }
+
+ flat_set<u32> pred_lits;
+
+ /* No need to examine delays of a common pred - as it must already have
+ * survived the delay checks.
+ *
+ * This is important when the pred is in the anchored table as
+ * the literal is no longer available. */
+ flat_set<RoseVertex> known_good_preds;
+ for (auto v : v2) {
+ insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
+ }
+
+ for (auto u : preds) {
+ if (!contains(known_good_preds, u)) {
+ insert(&pred_lits, build.g[u].literals);
+ }
+ }
+
+ vector<const rose_literal_id *> pred_rose_lits;
+ pred_rose_lits.reserve(pred_lits.size());
+ for (const auto &p : pred_lits) {
+ pred_rose_lits.push_back(&build.literals.at(p));
+ }
+
+ for (auto v : v2) {
+ u32 vlag = build.g[v].left.lag;
+ if (!vlag) {
+ continue;
+ }
+
+ for (const u32 vlit : build.g[v].literals) {
+ const rose_literal_id &vl = build.literals.at(vlit);
+ assert(!vl.delay); // this should never have got this far?
+ for (const auto &ul : pred_rose_lits) {
+ assert(!ul->delay); // this should never have got this far?
+
+ if (!checkPredDelay(*ul, vl, vlag)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+ const deque<RoseVertex> &verts1,
+ const deque<RoseVertex> &verts2) {
+ assert(!verts1.empty());
+ assert(!verts2.empty());
+
+ RoseVertex u_front = verts1.front();
+ RoseVertex v_front = verts2.front();
+
+ /* all vertices must have the same engine type: assume all verts in each
+ * group are already of the same type */
+ if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
+ return false;
+ }
+
+ bool is_prefix = tbi.isRootSuccessor(u_front);
+
+ /* We cannot merge prefixes/vertices if they are successors of different
+ * root vertices: similarly, assume the grouped vertices are compatible */
+ if (is_prefix) {
+ assert(tbi.isRootSuccessor(v_front));
+ set<RoseVertex> u_preds;
+ set<RoseVertex> v_preds;
+ insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
+ insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
+
+ if (u_preds != v_preds) {
+ return false;
+ }
+ }
+
+ vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+ for (auto a : verts1) {
+ if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
+ return false;
+ }
+
+ u32 ulag = tbi.g[a].left.lag;
+ for (u32 id : tbi.g[a].literals) {
+ ulits.emplace_back(&tbi.literals.at(id), ulag);
+ }
+ }
+
+ vector<pair<const rose_literal_id *, u32>> vlits;
+ for (auto a : verts2) {
+ if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) {
+ return false;
+ }
+
+ u32 vlag = tbi.g[a].left.lag;
+ for (u32 id : tbi.g[a].literals) {
+ vlits.emplace_back(&tbi.literals.at(id), vlag);
+ }
+ }
+
+ if (!compatibleLiteralsForMerge(ulits, vlits)) {
+ return false;
+ }
+
+ // Check preds are compatible as well.
+ if (!checkPredDelays(tbi, verts1, verts2)
+ || !checkPredDelays(tbi, verts2, verts1)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("vertex sets are mergeable\n");
+ return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
+ const set<RoseVertex> &v2) {
+ const deque<RoseVertex> vv1(v1.begin(), v1.end());
+ const deque<RoseVertex> vv2(v2.begin(), v2.end());
+ return mergeableRoseVertices(tbi, vv1, vv2);
+}
+
+/** \brief Priority queue element for Rose merges. */
+namespace {
+struct RoseMergeCandidate {
+ RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
+ u32 tb)
+ : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
+ tie_breaker(tb) {
+ if (r1.graph() && r2.graph()) {
+ const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
+ /* som_none as haigs don't merge and just a guiding heuristic */
+ CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
+ CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
+ stopxor = (stop1 ^ stop2).count();
+
+ // We use the number of vertices as an approximation of the state
+ // count here, as this is just feeding a comparison.
+ u32 vertex_count = num_vertices(h1) + num_vertices(h2);
+ states = vertex_count - min(vertex_count, cpl);
+ } else if (r1.castle() && r2.castle()) {
+ // FIXME
+ }
+ }
+
+ bool operator<(const RoseMergeCandidate &a) const {
+ if (stopxor != a.stopxor) {
+ return stopxor > a.stopxor;
+ }
+ if (cpl != a.cpl) {
+ return cpl < a.cpl;
+ }
+ if (states != a.states) {
+ return states > a.states;
+ }
+ return tie_breaker < a.tie_breaker;
+ }
+
+ left_id r1;
+ left_id r2;
+ u32 stopxor;
+ u32 cpl; //!< common prefix length
+ u32 states;
+ u32 tie_breaker; //!< determinism
+};
+}
+
+static
+bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
+ const vector<RoseVertex> &verts1,
+ const vector<RoseVertex> &verts2) {
+ assert(!verts1.empty() && !verts2.empty());
+
+ DEBUG_PRINTF("merging pair of leftfixes:\n");
+ DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(),
+ as_string_list(all_tops(r1)).c_str());
+ DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(),
+ as_string_list(all_tops(r2)).c_str());
+
+ RoseGraph &g = build.g;
+
+ if (r1.graph()) {
+ assert(r2.graph());
+ assert(r1.graph()->kind == r2.graph()->kind);
+ if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) {
+ DEBUG_PRINTF("nfa merge failed\n");
+ return false;
+ }
+
+ /* The graph in r1 has been merged into the graph in r2. Update r1's
+ * vertices with the new graph ptr. mergeNfaPair() does not alter the
+ * tops from the input graph so no need to update top values.
+ *
+ * It is the responsibility of the caller to ensure that the tops are
+ * distinct when they have different trigger conditions.
+ * [Note: mergeLeftfixesVariableLag() should have a common parent set]
+ */
+ shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
+ for (RoseVertex v : verts1) {
+ g[v].left.graph = h;
+ }
+
+ return true;
+ } else if (r1.castle()) {
+ assert(r2.castle());
+ assert(build.cc.grey.allowCastle);
+
+ map<u32, u32> top_map;
+ if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
+ DEBUG_PRINTF("castle merge failed\n");
+ return false;
+ }
+
+ // The castle in r1 has been merged into the castle in r2, with tops
+ // remapped as per top_map.
+ const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
+ for (RoseVertex v : verts1) {
+ g[v].left.castle = c;
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+ }
+ return true;
+ }
+
+ assert(0);
+ return false;
+}
+
+/**
+ * Checks that there is no problem due to the involved vertices if we merge two
+ * leftfix engines.
+ *
+ * This functions takes the vertices on the right of the two engines.
+ *
+ * Unlike mergeableRoseVertices(), this does not:
+ * - check that engines themselves can be merged
+ * - use heuristics to find out if merging the engines is wise.
+ */
+static
+bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
+ const vector<RoseVertex> &targets_1,
+ const vector<RoseVertex> &targets_2) {
+ assert(!targets_1.empty());
+ assert(!targets_2.empty());
+
+ vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+ for (auto a : targets_1) {
+ u32 ulag = build.g[a].left.lag;
+ for (u32 id : build.g[a].literals) {
+ ulits.emplace_back(&build.literals.at(id), ulag);
+ }
+ }
+
+ vector<pair<const rose_literal_id *, u32>> vlits;
+ for (auto a : targets_2) {
+ u32 vlag = build.g[a].left.lag;
+ for (u32 id : build.g[a].literals) {
+ vlits.emplace_back(&build.literals.at(id), vlag);
+ }
+ }
+
+ if (!compatibleLiteralsForMerge(ulits, vlits)) {
+ return false;
+ }
+
+ // Check preds are compatible as well.
+ if (!checkPredDelays(build, targets_1, targets_2)
+ || !checkPredDelays(build, targets_2, targets_1)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("vertex sets are mergeable\n");
+ return true;
+}
+
+/**
+ * In block mode, we want to be a little more selective -- we will only merge
+ * prefix engines when the literal sets are the same or if the merged graph
+ * has only grown by a small amount.
+ */
+static
+bool goodBlockModeMerge(const RoseBuildImpl &build,
+ const vector<RoseVertex> &u_verts, const left_id &u_eng,
+ const vector<RoseVertex> &v_verts,
+ const left_id &v_eng) {
+ assert(!build.cc.streaming);
+
+ // Always merge infixes if we can (subject to the other criteria in
+ // mergeableRoseVertices).
+ if (!build.isRootSuccessor(u_verts.front())) {
+ return true;
+ }
+
+ const RoseGraph &g = build.g;
+
+ flat_set<u32> u_lits;
+ for (RoseVertex u : u_verts) {
+ insert(&u_lits, g[u].literals);
+ }
+
+ flat_set<u32> v_lits;
+ for (RoseVertex v : v_verts) {
+ insert(&v_lits, g[v].literals);
+ }
+
+ // Merge prefixes with identical literal sets (as we'd have to run them
+ // both when we see those literals anyway).
+ if (u_lits == v_lits) {
+ return true;
+ }
+
+ // The rest of this function only deals with the case when have graph
+ // leftfixes.
+ if (!u_eng.graph()) {
+ return false;
+ }
+ assert(v_eng.graph());
+ const NGHolder &ug = *u_eng.graph();
+ const NGHolder &vg = *v_eng.graph();
+
+ size_t u_count = num_vertices(ug);
+ size_t v_count = num_vertices(vg);
+ DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
+ u_count, v_count);
+ if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
+ v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+ DEBUG_PRINTF("prefixes too big already\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("trying merge\n");
+ NGHolder h;
+ cloneHolder(h, vg);
+ if (!mergeNfaPair(ug, h, nullptr, build.cc)) {
+ DEBUG_PRINTF("couldn't merge\n");
+ return false;
+ }
+
+ const size_t merged_count = num_vertices(h);
+ DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
+ if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+ DEBUG_PRINTF("exceeded limit\n");
+ return false;
+ }
+
+ // We want to only perform merges that take advantage of some
+ // commonality in the two input graphs, so we check that the number of
+ // vertices has only grown a small amount: somewhere between the sum
+ // (no commonality) and the max (no growth at all) of the vertex counts
+ // of the input graphs.
+ size_t max_size = u_count + v_count;
+ size_t min_size = max(u_count, v_count);
+ size_t max_growth = ((max_size - min_size) * 25) / 100;
+ if (merged_count > min_size + max_growth) {
+ DEBUG_PRINTF("grew too much\n");
+ return false;
+ }
+
+ // We don't want to squander any chances at accelerating.
+ if (!isAccelerableLeftfix(build, h)
+ && (isAccelerableLeftfix(build, ug)
+ || isAccelerableLeftfix(build, vg))) {
+ DEBUG_PRINTF("would lose accel property\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("safe to merge\n");
+ return true;
+}
+
+/**
+ * Merge r1 into r2 if safe and appropriate. Returns true on success.
+ */
+static
+bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
+ const vector<RoseVertex> &targets_1,
+ left_id &r2,
+ const vector<RoseVertex> &targets_2) {
+ if (targets_1.empty() || targets_2.empty()) {
+ /* one of the engines has already been merged away */
+ return false;
+ }
+
+ assert(!r1.graph() == !r2.graph());
+ if (r1.graph()) {
+ NGHolder *h1 = r1.graph();
+ NGHolder *h2 = r2.graph();
+ CharReach stop1 = findStopAlphabet(*h1, SOM_NONE);
+ CharReach stop2 = findStopAlphabet(*h2, SOM_NONE);
+ CharReach stopboth = stop1 & stop2;
+ DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(),
+ stop2.count(), stopboth.count());
+ if (stopboth.count() < 10
+ && (stop1.count() > 10 || stop2.count() > 10)) {
+ DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
+ return false;
+ }
+ size_t maxstop = max(stop1.count(), stop2.count());
+ if (maxstop > 200 && stopboth.count() < 200) {
+ DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
+ return false;
+ }
+ }
+
+ /* Rechecking that the targets are compatible, as we may have already
+ * merged new states into r1 or r2 and we need to verify that this
+ * candidate is still ok. */
+ if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
+ return false;
+ }
+
+ if (!build.cc.streaming
+ && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
+ return false;
+ }
+
+ return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
+}
+
+static
+bool nfaHasNarrowStart(const NGHolder &g) {
+ if (out_degree(g.startDs, g) > 1) {
+ return false; // unanchored
+ }
+
+ CharReach cr;
+
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ cr |= g[v].char_reach;
+ }
+ return cr.count() <= NARROW_START_MAX;
+}
+
+static
+bool nfaHasFiniteMaxWidth(const NGHolder &g) {
+ return findMaxWidth(g).is_finite();
+}
+
+static
+bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
+ if (!proper_out_degree(h.startDs, h)) {
+ return false;
+ }
+
+ assert(!is_triggered(h));
+
+ NGHolder h_temp;
+ cloneHolder(h_temp, h);
+
+ vector<BoundedRepeatData> repeats;
+ bool suitable_for_sds_reforming = false;
+ const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
+ const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
+ const bool simple_model_selection = true; // FIRST is considered simple
+ analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
+ simple_model_selection, grey, &suitable_for_sds_reforming);
+
+ return suitable_for_sds_reforming;
+}
+
+static
+u32 commonPrefixLength(left_id &r1, left_id &r2) {
+ if (r1.graph() && r2.graph()) {
+ return commonPrefixLength(*r1.graph(), *r2.graph());
+ } else if (r1.castle() && r2.castle()) {
+ return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
+ }
+ return 0;
+}
+
+namespace {
+struct MergeKey {
+ MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
+ parents(std::move(parents_in)) {
+
+ // We want to distinguish prefixes (but not infixes) on whether they
+ // have a narrow start or max width.
+ if (left.graph() && !is_triggered(*left.graph())) {
+ const NGHolder &h = *left.graph();
+ narrowStart = nfaHasNarrowStart(h);
+ hasMaxWidth = nfaHasFiniteMaxWidth(h);
+ } else {
+ narrowStart = false;
+ hasMaxWidth = false;
+ }
+
+ if (left.castle()) {
+ /* castles should have a non-empty reach */
+ assert(left.castle()->reach().any());
+ castle_cr = left.castle()->reach();
+ } else {
+ assert(left.graph());
+ }
+ }
+
+ bool operator<(const MergeKey &b) const {
+ const MergeKey &a = *this;
+ ORDER_CHECK(narrowStart);
+ ORDER_CHECK(hasMaxWidth);
+ ORDER_CHECK(castle_cr);
+ ORDER_CHECK(parents);
+ return false;
+ }
+
+ // NOTE: these two bool discriminators are only used for prefixes, not
+ // infixes.
+ bool narrowStart;
+ bool hasMaxWidth;
+ CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */
+
+ flat_set<RoseVertex> parents;
+};
+}
+
+template <typename T>
+static
+void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) {
+ if (in.size() <= chunk_size) {
+ out->push_back(std::move(in));
+ return;
+ }
+
+ out->push_back(vector<T>());
+ out->back().reserve(chunk_size);
+ for (const auto &t : in) {
+ if (out->back().size() >= chunk_size) {
+ out->push_back(vector<T>());
+ out->back().reserve(chunk_size);
+ }
+ out->back().push_back(std::move(t));
+ }
+}
+
+static
+insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
+ insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
+ for (auto v : vertices_range(g)) {
+ const auto &left = g[v].left;
+ if (!left) {
+ continue;
+ }
+ assert(contains(all_reports(left), left.leftfix_report));
+ eng_verts[left].push_back(v);
+ }
+
+ return eng_verts;
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines which share a common set of
+ * parent vertices.
+ *
+ * Engines are greedily merged pairwise by this process based on a priority
+ * queue keyed off the common prefix length.
+ *
+ * Engines are not merged if the lags are not compatible or if it would damage
+ * the stop alphabet.
+ *
+ * Infixes:
+ * - It is expected that when this is run all infixes are still at the single
+ * top stage as we have not yet merged unrelated infixes together. After
+ * execution, castles may have multiple (but equivalent) tops.
+ *
+ * Prefixes:
+ * - transient prefixes are not considered.
+ * - with a max width or a narrow start are kept segregated by
+ * this phase and can only be merged with similar infixes.
+ * - in block mode, merges are only performed if literal sets are the same.
+ * - merges are not considered in cases where dot star start state will be
+ * reformed to optimise a leading repeat.
+ */
+void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
+ if (!build.cc.grey.mergeRose) {
+ return;
+ }
+ assert(!hasOrphanedTops(build));
+
+ RoseGraph &g = build.g;
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("entry\n");
+ DEBUG_PRINTF("-----\n");
+
+ auto eng_verts = get_eng_verts(g);
+
+ map<MergeKey, vector<left_id>> engine_groups;
+ for (const auto &e : eng_verts) {
+ const left_id &left = e.first;
+ const auto &verts = e.second;
+ // Only non-transient for the moment.
+ if (contains(build.transient, left)) {
+ continue;
+ }
+
+ // No forced McClellan or Haig infix merges.
+ if (left.dfa() || left.haig()) {
+ continue;
+ }
+ assert(left.graph() || left.castle());
+
+ if (left.graph()) {
+ const NGHolder &h = *left.graph();
+ /* we should not have merged yet */
+ assert(!is_triggered(h) || onlyOneTop(h));
+
+ if (hasReformedStartDotStar(h, build.cc.grey)) {
+ continue; // preserve the optimisation of the leading repeat
+ }
+ } else {
+ assert(left.castle());
+
+ if (!build.cc.grey.allowCastle) {
+ DEBUG_PRINTF("castle merging disallowed by greybox\n");
+ continue;
+ }
+ }
+
+ // We collapse the anchored root into the root vertex when calculating
+ // parents, so that we can merge differently-anchored prefix roses
+ // together. (Prompted by UE-2100)
+
+ flat_set<RoseVertex> parents;
+ for (RoseVertex v : verts) {
+ insert(&parents, inv_adjacent_vertices_range(v, g));
+ }
+
+ if (contains(parents, build.anchored_root)) {
+ parents.erase(build.anchored_root);
+ parents.insert(build.root);
+ }
+
+ assert(!parents.empty());
+
+#ifndef _WIN32
+ engine_groups[MergeKey(left, parents)].push_back(left);
+#else
+ // On windows, when passing MergeKey object into map 'engine_groups',
+ // it will not be copied, but will be freed along with
+ // engine_groups.clear().
+ // If we construct MergeKey object on the stack, it will be destructed
+ // on its life cycle ending, then on engine_groups.clear(), which
+ // will cause is_block_type_valid() assertion error in MergeKey
+ // destructor.
+ MergeKey *mk = new MergeKey(left, parents);
+ engine_groups[*mk].push_back(left);
+#endif
+ }
+
+ vector<vector<left_id>> chunks;
+ for (auto &raw_group : engine_groups | map_values) {
+ chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
+ }
+ engine_groups.clear();
+
+ DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size());
+
+ for (auto &roses : chunks) {
+ if (roses.size() < 2) {
+ continue;
+ }
+ // All pairs on the prio queue.
+ u32 tie_breaker = 0;
+ priority_queue<RoseMergeCandidate> pq;
+ for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
+ left_id r1 = *it;
+ const vector<RoseVertex> &targets_1 = eng_verts[r1];
+
+ for (auto jt = next(it); jt != ite; ++jt) {
+ left_id r2 = *jt;
+
+ /* we should have already split on engine types and reach */
+ assert(!r1.castle() == !r2.castle());
+ assert(!r1.graph() == !r2.graph());
+ assert(!r1.castle()
+ || r1.castle()->reach() == r2.castle()->reach());
+
+ const vector<RoseVertex> &targets_2 = eng_verts[r2];
+ if (!checkVerticesOkForLeftfixMerge(build, targets_1,
+ targets_2)) {
+ continue; // No point queueing unmergeable cases.
+ }
+
+ u32 cpl = commonPrefixLength(r1, r2);
+ pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++));
+ }
+ }
+
+ DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
+
+ while (!pq.empty()) {
+ left_id r1 = pq.top().r1;
+ left_id r2 = pq.top().r2;
+ DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n",
+ r1.graph(), r2.graph(), pq.top().cpl, pq.top().states);
+ pq.pop();
+ vector<RoseVertex> &targets_1 = eng_verts[r1];
+ vector<RoseVertex> &targets_2 = eng_verts[r2];
+ if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2,
+ targets_2)) {
+ insert(&targets_2, targets_2.end(), targets_1);
+ targets_1.clear();
+ }
+ }
+ }
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("exit\n");
+ DEBUG_PRINTF("-----\n");
+ assert(!hasOrphanedTops(build));
+}
+
+namespace {
+
+/**
+ * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
+ */
+struct DedupeLeftKey {
+ DedupeLeftKey(const RoseBuildImpl &build,
+ flat_set<pair<size_t, u32>> preds_in, const left_id &left)
+ : left_hash(hashLeftfix(left)), preds(move(preds_in)),
+ transient(contains(build.transient, left)) {
+ }
+
+ bool operator<(const DedupeLeftKey &b) const {
+ return tie(left_hash, preds, transient)
+ < tie(b.left_hash, b.preds, b.transient);
+ }
+
+private:
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** For each in-edge, the pair of (parent index, edge top). */
+ flat_set<pair<size_t, u32>> preds;
+
+ /** We don't want to combine transient with non-transient. */
+ bool transient;
+};
+
+} // namespace
+
+static
+flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
+ flat_set<pair<size_t, u32>> preds;
+ for (const auto &e : in_edges_range(v, g)) {
+ preds.emplace(g[source(e, g)].index, g[e].rose_top);
+ }
+ return preds;
+}
+
+/**
+ * This is a generalisation of \ref dedupeLeftfixes which relaxes two
+ * restrictions: multiple predecessor roles are allowed and the delay used by
+ * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
+ * the leftfixes' successor vertices are first grouped to reduce the number of
+ * potential candidates - the grouping in this case is by the set of
+ * predecessor roles with their associated top events. For the dedupe to be
+ * possible, it is required that:
+ *
+ * 1. the nfa graphs with respect to the relevant reports are identical
+ * 2. the nfa graphs are triggered by the same roles with same events (ensured
+ * by the initial grouping pass)
+ * 3. all the successor roles of either graph can inspect the combined leftfix
+ * without advancing the state of the leftfix past the point that another
+ * successor may want to inspect it; the overlap relationships between the
+ * involved literals are examined to ensure that this property holds.
+ *
+ * Note: this is unable to dedupe when delayed literals are involved unlike
+ * dedupeLeftfixes.
+ */
+void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
+ DEBUG_PRINTF("entry\n");
+
+ RoseGraph &g = build.g;
+ auto eng_verts = get_eng_verts(g);
+
+ map<DedupeLeftKey, vector<left_id>> engine_groups;
+ for (const auto &e : eng_verts) {
+ const left_id &left = e.first;
+ const auto &verts = e.second;
+
+ /* There should only be one report on an engine as no merges have
+ * happened yet. (aside from eod prefixes) */
+ if (all_reports(left).size() != 1) {
+ assert(any_of_in(adjacent_vertices_range(verts.front(), g),
+ [&](RoseVertex w) { return g[w].eod_accept; }));
+ continue;
+ }
+
+ if (left.haig()) {
+ /* TODO: allow deduping of identical haigs */
+ continue;
+ }
+
+ if (left.graph()) {
+ /* we should not have merged yet */
+ assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
+ }
+
+ auto preds = get_pred_tops(verts.front(), g);
+ for (RoseVertex v : verts) {
+ if (preds != get_pred_tops(v, g)) {
+ DEBUG_PRINTF("distinct pred sets\n");
+ continue;
+ }
+ }
+ engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
+ }
+
+ /* We don't bother chunking as we expect deduping to be successful if the
+ * hashes match */
+
+ for (auto &group : engine_groups | map_values) {
+ DEBUG_PRINTF("group of %zu roses\n", group.size());
+
+ if (group.size() < 2) {
+ continue;
+ }
+
+ for (auto it = group.begin(); it != group.end(); ++it) {
+ left_id r1 = *it;
+ vector<RoseVertex> &verts1 = eng_verts[r1];
+ assert(!verts1.empty()); /* cleared engines should be behind us */
+
+ assert(all_reports(r1).size() == 1);
+ ReportID r1_report = *all_reports(r1).begin();
+
+ for (auto jt = next(it); jt != group.end(); ++jt) {
+ left_id r2 = *jt;
+ vector<RoseVertex> &verts2 = eng_verts[r2];
+ assert(!verts2.empty());
+ assert(all_reports(r2).size() == 1);
+ ReportID r2_report = *all_reports(r2).begin();
+
+ if (!is_equal(r1, r1_report, r2, r2_report)) {
+ continue;
+ }
+
+ if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
+
+ // Replace r1 with r2.
+
+ for (auto v : verts1) {
+ DEBUG_PRINTF("replacing report %u with %u on %zu\n",
+ r2_report, r1_report, g[v].index);
+ u32 orig_lag = g[v].left.lag;
+ g[v].left = g[verts2.front()].left;
+ g[v].left.lag = orig_lag;
+ }
+
+ insert(&verts2, verts2.end(), verts1);
+ verts1.clear();
+
+ /* remove stale entry from transient set, if present */
+ build.transient.erase(r1);
+
+ break;
+ }
+ }
+ }
+}
+
+static
+u32 findUnusedTop(const flat_set<u32> &tops) {
+ u32 i = 0;
+ while (contains(tops, i)) {
+ i++;
+ }
+ return i;
+}
+
+// Replace top 't' on edges with new top 'u'.
+static
+void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
+ for (const auto &e : out_edges_range(h.start, h)) {
+ NFAVertex v = target(e, h);
+ if (v == h.startDs) {
+ continue;
+ }
+ flat_set<u32> new_tops;
+ for (u32 t : h[e].tops) {
+ DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
+ new_tops.insert(top_mapping.at(t));
+ }
+ h[e].tops = std::move(new_tops);
+ }
+}
+
+static
+bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
+ map<u32, u32> &top_mapping) {
+ flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
+
+ DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
+ tops2.size());
+
+ // If our tops don't intersect, we're OK to merge with no changes.
+ if (!has_intersection(tops1, tops2)) {
+ DEBUG_PRINTF("tops don't intersect\n");
+ return true;
+ }
+
+ // Otherwise, we have to renumber the tops in h1 so that they don't overlap
+ // with the tops in h2.
+ top_mapping.clear();
+ for (u32 t : tops1) {
+ u32 u = findUnusedTop(tops2);
+ DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
+ top_mapping.insert(make_pair(t, u));
+ assert(!contains(tops2, u));
+ tops2.insert(u);
+ }
+
+ replaceTops(h1, top_mapping);
+ return true;
+}
+
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
+ DEBUG_PRINTF("vertex %zu\n", g[v].index);
+ assert(!g[v].left.haig);
+ assert(!g[v].left.dfa);
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 t = g[e].rose_top;
+ DEBUG_PRINTF("t=%u\n", t);
+ assert(contains(top_mapping, t));
+ g[e].rose_top = top_mapping[t];
+ DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
+ g[source(e, g)].index, g[target(e, g)].index, t,
+ top_mapping[t]);
+ }
+ }
+
+ return true;
+}
+
+static
+bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
+ DEBUG_PRINTF("vertex %zu\n", g[v].index);
+ u32 t = g[v].suffix.top;
+ assert(contains(top_mapping, t));
+ g[v].suffix.top = top_mapping[t];
+ }
+
+ return true;
+}
+
+/** \brief Estimate the number of accel states in the given graph when built as
+ * an NFA.
+ *
+ * (The easiest way to estimate something like this is to actually build it:
+ * the criteria for NFA acceleration are quite complicated and buried in
+ * limex_compile.)
+ */
+static
+u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
+ return countAccelStates(h, &tbi.rm, tbi.cc);
+}
+
+static
+void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
+ RoseGraph &g = tbi.g;
+ DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
+
+ // We track the number of accelerable states for each graph in a map and
+ // only recompute them when the graph is modified.
+ unordered_map<left_id, u32> accel_count;
+ for (const auto &rose : roses) {
+ assert(rose.graph()->kind == NFA_INFIX);
+ accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
+ }
+
+ for (auto it = roses.begin(); it != roses.end(); ++it) {
+ left_id r1 = *it;
+ const deque<RoseVertex> &verts1 = roses.vertices(r1);
+
+ deque<left_id> merged;
+ for (auto jt = next(it); jt != roses.end(); ++jt) {
+ left_id r2 = *jt;
+ const deque<RoseVertex> &verts2 = roses.vertices(r2);
+
+ DEBUG_PRINTF("consider merging rose %p (%zu verts) "
+ "with %p (%zu verts)\n",
+ r1.graph(), verts1.size(), r2.graph(), verts2.size());
+
+ u32 accel1 = accel_count[r1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[r2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+
+ if (!mergeableRoseVertices(tbi, verts1, verts2)) {
+ DEBUG_PRINTF("not mergeable\n");
+ continue; // next h2
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *r2.graph());
+
+ // Store a copy of the in-edge properties in case we have to roll
+ // back.
+ map<RoseEdge, RoseEdgeProps> edge_props;
+ for (auto v : verts2) {
+ for (const auto &e : in_edges_range(v, g)) {
+ edge_props[e] = g[e];
+ }
+ }
+
+ if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ assert(victim.kind == r1.graph()->kind);
+ assert(!generates_callbacks(*r1.graph()));
+ if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : edge_props) {
+ g[m.first] = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
+ for (auto v : verts2) {
+ g[v].left.graph = winner;
+ }
+ roses.insert(r1, verts2);
+
+ merged.push_back(r2);
+
+ if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*winner));
+ break; // next h1
+ }
+
+ // Update h1's accel count estimate.
+ accel_count[r1] = estimatedAccelStates(tbi, *winner);
+ }
+
+ DEBUG_PRINTF("%zu roses merged\n", merged.size());
+ roses.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines with a small number of
+ * vertices together into larger engines. The engines must not be have a
+ * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
+ * that have compatible lag are greedily grouped such that they remain
+ * accelerable and only have a small number of states. Note: if a role has an
+ * infix with multiple trigger vertices, the role will be left unchanged by this
+ * pass and will remain using an unmerged graph.
+ */
+void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
+ LeftfixBouquet nfa_leftfixes;
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
+ // Handle single-parent infixes only.
+ if (tbi.isRootSuccessor(v)) {
+ continue;
+ }
+
+ left_id left(g[v].left);
+
+ // Only non-transient for the moment.
+ if (contains(tbi.transient, left)) {
+ continue;
+ }
+
+ // No DFAs or Haigs right now.
+ if (left.dfa() || left.haig()) {
+ continue;
+ }
+
+ // Castles are handled by a different pass.
+ if (left.castle()) {
+ continue;
+ }
+
+ assert(left.graph());
+ NGHolder &h = *left.graph();
+
+ /* Ensure that kind on the graph is correct */
+ assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
+
+ if (hasReformedStartDotStar(h, tbi.cc.grey)) {
+ /* We would lose optimisations of the leading repeat by merging. */
+ continue;
+ }
+
+ // Small roses only.
+ if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
+ continue;
+ }
+
+ nfa_leftfixes.insert(left, v);
+ }
+
+ deque<LeftfixBouquet> leftfix_groups;
+ chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
+ nfa_leftfixes.clear();
+ DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
+ leftfix_groups.size());
+
+ for (auto &group : leftfix_groups) {
+ mergeNfaLeftfixes(tbi, group);
+ }
+}
+
+static
+void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
+ insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
+ /* caller must have already ensured that candidates have the same reach */
+ RoseGraph &g = build.g;
+ DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size());
+
+ for (auto it = cands.begin(); it != cands.end(); ++it) {
+ left_id &cand_1 = *it;
+ vector<RoseVertex> &verts_1 = eng_verts[cand_1];
+ if (verts_1.empty()) {
+ continue;
+ }
+
+ for (auto jt = next(it); jt != cands.end(); ++jt) {
+ const left_id &cand_2 = *jt;
+ vector<RoseVertex> &verts_2 = eng_verts[cand_2];
+ if (verts_2.empty()) {
+ continue;
+ }
+
+ assert(cand_1.castle()->reach() == cand_2.castle()->reach());
+
+ if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) {
+ DEBUG_PRINTF("not mergeable\n");
+ continue; // next cand_2
+ }
+
+ DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(),
+ cand_1.castle()->repeats.size());
+ DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(),
+ cand_2.castle()->repeats.size());
+
+ map<u32, u32> top_map;
+ if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) {
+ DEBUG_PRINTF("couldn't merge\n");
+ continue; // next cand_2
+ }
+
+ // Update castle2's roses to point to castle1 now.
+ shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle;
+ for (auto v : verts_2) {
+ assert(g[v].left.castle.get() == cand_2.castle());
+ g[v].left.castle = winner;
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+ }
+
+ insert(&verts_1, verts_1.end(), verts_2);
+ verts_2.clear();
+ }
+ }
+}
+
+/**
+ * Merges castles with the same reach together regardless of where in the rose
+ * graph they are. Note: there is no requirement for the castles to have common
+ * parent or target vertices.
+ *
+ * There are no heuristics for reducing block mode merges as castle speed
+ * mainly depends on the reach being scanned.
+ */
+void mergeCastleLeftfixes(RoseBuildImpl &build) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses
+ || !build.cc.grey.allowCastle) {
+ return;
+ }
+
+ RoseGraph &g = build.g;
+
+ insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left.castle) {
+ continue;
+ }
+
+ // Handle infixes only.
+ if (build.isRootSuccessor(v)) {
+ continue;
+ }
+
+ eng_verts[g[v].left].push_back(v);
+ }
+
+ map<CharReach, vector<left_id>> by_reach;
+ for (const auto &left : eng_verts | map_keys) {
+ by_reach[left.castle()->reach()].push_back(left);
+ }
+
+ vector<vector<left_id>> chunks;
+ for (auto &raw_group : by_reach | map_values) {
+ chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
+ }
+ by_reach.clear();
+
+ DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
+
+ for (auto &chunk : chunks) {
+ mergeCastleChunk(build, chunk, eng_verts);
+ }
+}
+
+static
+void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
+ const bool acyclic) {
+ RoseGraph &g = tbi.g;
+
+ DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
+
+ // If this isn't an acyclic case, we track the number of accelerable states
+ // for each graph in a map and only recompute them when the graph is
+ // modified.
+ unordered_map<suffix_id, u32> accel_count;
+ if (!acyclic) {
+ for (const auto &suffix : suffixes) {
+ assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
+ accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
+ }
+ }
+
+ for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
+ suffix_id s1 = *it;
+ const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
+ assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
+
+ // Caller should ensure that we don't propose merges of graphs that are
+ // already too big.
+ assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
+
+ deque<suffix_id> merged;
+ for (auto jt = next(it); jt != suffixes.end(); ++jt) {
+ suffix_id s2 = *jt;
+ const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
+ assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
+
+ if (!acyclic) {
+ u32 accel1 = accel_count[s1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[s2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *s2.graph());
+
+ // Store a copy of the suffix tops in case we have to roll back.
+ map<RoseVertex, u32> old_tops;
+ for (auto v : verts2) {
+ old_tops[v] = g[v].suffix.top;
+ }
+
+ if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : old_tops) {
+ g[m.first].suffix.top = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
+ for (auto v : verts2) {
+ g[v].suffix.graph = winner;
+ }
+ suffixes.insert(s1, verts2);
+ merged.push_back(s2);
+
+ if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*s1.graph()));
+ break; // next h1
+ }
+
+ if (!acyclic) {
+ // Update h1's accel count estimate.
+ accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
+ }
+ }
+
+ DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
+ suffixes.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ SuffixBouquet suffixes;
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+
+ assert(!g[v].suffix.haig);
+
+ if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
+ continue;
+ }
+
+ if (!isAcyclic(*h)) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, true);
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeSmallSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+ SuffixBouquet suffixes;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+
+ // Leave acyclics out for the moment.
+ if (isAcyclic(*h)) {
+ continue;
+ }
+
+ // Small-ish suffixes only.
+ if (num_vertices(*h) > 32) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, false);
+ }
+}
+
+static
+void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
+ auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
+ outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
+ end(outfixes));
+}
+
+static
+void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
+ assert(!winner.is_dead());
+
+ winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
+ winner.minWidth = min(winner.minWidth, victim.minWidth);
+ winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
+ winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
+ mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
+
+ // This outfix can be ignored in small block mode if both were. The dedupe
+ // layer at runtime will protect us from extra matches if only one was in
+ // the small block matcher.
+ winner.in_sbmatcher &= victim.in_sbmatcher;
+}
+
+static
+map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
+ const vector<NGHolder *> &nfas) {
+ map<NGHolder *, NGHolder *> merged;
+
+ vector<NGHolder *> batch;
+ for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
+ batch.push_back(*it);
+ assert((*it)->kind == NFA_OUTFIX);
+ if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
+ auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
+ insert(&merged, batch_merged);
+ batch.clear();
+ }
+ }
+
+ return merged;
+}
+
+static
+void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
+ if (nfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ map<NGHolder *, size_t> nfa_mapping;
+ for (size_t i = 0; i < outfixes.size(); i++) {
+ auto *holder = outfixes[i].holder();
+ if (holder) {
+ nfa_mapping[holder] = i;
+ }
+ }
+
+ map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
+ if (merged.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("%zu nfas merged\n", merged.size());
+
+ // Update the outfix info for merged holders.
+ for (const auto &m : merged) {
+ OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
+ OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
+ mergeOutfixInfo(winner, victim);
+ victim.clear();
+ }
+
+ removeDeadOutfixes(outfixes);
+}
+
+namespace {
+struct MergeMcClellan {
+ MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
+ : rm(rm_in), grey(grey_in) {}
+
+ unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
+ assert(d1 && d2);
+ return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
+ }
+
+private:
+ const ReportManager &rm;
+ const Grey &grey;
+};
+
+struct MergeHaig {
+ explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
+
+ unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
+ const raw_som_dfa *d2) const {
+ assert(d1 && d2);
+ return attemptToMergeHaig({d1, d2}, limit);
+ }
+
+private:
+ const u32 limit; //!< state limit for merged result.
+};
+}
+
+/**
+ * Generic pairwise merge algorithm that can be used for either McClellan
+ * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
+ * operation to a merge functor, which allows the caller to set some policy
+ * (state limits, etc).
+ *
+ * This is currently astonishingly simple and just considers every pair of
+ * DFAs, slow and steady. We may wish to actually apply a merge ordering
+ * strategy in the future.
+ */
+template<class RawDfa, class MergeFunctor>
+static
+void pairwiseDfaMerge(vector<RawDfa *> &dfas,
+ unordered_map<RawDfa *, size_t> &dfa_mapping,
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
+
+ for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
+ if (!*it) {
+ continue;
+ }
+ for (auto jt = next(it); jt != ite; ++jt) {
+ if (!*jt) {
+ continue;
+ }
+
+ DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
+ unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
+ if (!rdfa) {
+ continue; // Merge failed.
+ }
+
+ DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
+ OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
+ OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
+ assert(!winner.is_dead() && !victim.is_dead());
+
+ RawDfa *dfa_ptr = rdfa.get();
+ dfa_mapping[dfa_ptr] = dfa_mapping[*it];
+ dfa_mapping.erase(*it);
+ winner.proto = move(rdfa);
+
+ mergeOutfixInfo(winner, victim);
+
+ victim.clear();
+ *jt = nullptr; // to be deleted.
+ *it = dfa_ptr;
+ }
+ }
+}
+
+template<class RawDfa, class MergeFunctor>
+static
+void chunkedDfaMerge(vector<RawDfa *> &dfas,
+ unordered_map<RawDfa *, size_t> &dfa_mapping,
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
+
+ vector<RawDfa *> out_dfas;
+ vector<RawDfa *> chunk;
+ for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
+ chunk.push_back(*it);
+ if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
+ pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
+ out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
+ chunk.clear();
+ }
+ }
+
+ // Remove null (merged) DFAs and update vector for subsequent use.
+ out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
+ out_dfas.end());
+ dfas.swap(out_dfas);
+ DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
+}
+
+static
+void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
+ unordered_map<raw_dfa *, size_t> dfa_mapping;
+ for (size_t i = 0; i < outfixes.size(); i++) {
+ auto *rdfa = outfixes[i].rdfa();
+ if (rdfa) {
+ dfa_mapping[rdfa] = i;
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(outfixes);
+}
+
+static
+void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
+ const Grey &grey) {
+ if (!grey.roseMcClellanOutfix) {
+ return;
+ }
+
+ DEBUG_PRINTF("merge combo\n");
+
+ bool seen_dfa = false;
+ u32 nfa_count = 0;
+ for (const auto &outfix : tbi.outfixes) {
+ if (outfix.holder()) {
+ DEBUG_PRINTF("nfa\n");
+ nfa_count++;
+ } else if (outfix.rdfa()) {
+ DEBUG_PRINTF("dfa\n");
+ seen_dfa = true;
+ }
+ }
+
+ DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
+ (int)seen_dfa);
+ if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
+ DEBUG_PRINTF("no combo merges possible\n");
+ return;
+ }
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
+ size_t new_dfas = 0;
+ unordered_map<raw_dfa *, size_t> dfa_mapping;
+ vector<raw_dfa *> dfas;
+
+ for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
+ auto &outfix = *it;
+ assert(!outfix.is_dead());
+
+ if (outfix.rdfa()) {
+ auto *rdfa = outfix.rdfa();
+ dfas.push_back(rdfa);
+ dfa_mapping[rdfa] = it - tbi.outfixes.begin();
+ continue;
+ }
+
+ if (!outfix.holder()) {
+ continue;
+ }
+
+ NGHolder *h = outfix.holder();
+ assert(h->kind == NFA_OUTFIX);
+ auto rdfa = buildMcClellan(*h, &rm, grey);
+ if (rdfa) {
+ // Transform this outfix into a DFA and add it to the merge set.
+ dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
+ dfas.push_back(rdfa.get());
+ outfix.proto = move(rdfa);
+ new_dfas++;
+ }
+ }
+
+ DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
+
+ if (!new_dfas) {
+ /* assumes normal dfas have already been fully merged */
+ return;
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(tbi.outfixes);
+}
+
+static
+void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
+ u32 limit) {
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ unordered_map<raw_som_dfa *, size_t> dfa_mapping;
+ for (size_t i = 0; i < outfixes.size(); i++) {
+ auto *haig = outfixes[i].haig();
+ if (haig) {
+ dfa_mapping[haig] = i;
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
+ removeDeadOutfixes(outfixes);
+}
+
+/**
+ * This pass attempts to merge outfix engines together. At this point in time,
+ * the engine type (NFA, DFA, Haig) has already been decided for each outfix
+ * and outfixes can only merged with others of their same type. NFAs are merged
+ * in a priority order based on common prefix length. The other types are
+ * merged blindly. Engines are merged to the extent that they can still be
+ * implemented efficiently.
+ */
+void mergeOutfixes(RoseBuildImpl &tbi) {
+ if (!tbi.cc.grey.mergeOutfixes) {
+ return;
+ }
+
+ vector<NGHolder *> nfas;
+ vector<raw_dfa *> dfas;
+ vector<raw_som_dfa *> som_dfas;
+
+ for (auto &outfix : tbi.outfixes) {
+ if (outfix.rdfa()) {
+ dfas.push_back(outfix.rdfa());
+ } else if (outfix.holder()) {
+ nfas.push_back(outfix.holder());
+ } else if (outfix.haig()) {
+ som_dfas.push_back(outfix.haig());
+ }
+ }
+
+ DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
+ dfas.size(), nfas.size());
+
+ mergeOutfixNfas(tbi, nfas);
+ mergeOutfixDfas(tbi, dfas);
+ mergeOutfixHaigs(tbi, som_dfas, 255);
+ mergeOutfixHaigs(tbi, som_dfas, 8192);
+ mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
+ const RoseBuildImpl &tbi,
+ RoseVertex tv) {
+ CharReach accept_cr;
+ DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
+
+ const RoseGraph &g = tbi.g;
+
+ /* TODO: inspect further back in the pattern */
+ for (u32 lit_id : g[tv].literals) {
+ const rose_literal_id &lit = tbi.literals.at(lit_id);
+ if (lit.delay) {
+ return 0; /* TODO: better */
+ }
+ if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
+ return 0;
+ }
+ assert(!lit.s.empty());
+ accept_cr |= *lit.s.rbegin();
+ }
+
+ DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+ if ((accept_cr & cr).any()) {
+ DEBUG_PRINTF("no squash\n");
+ return 0; /* the accept byte doesn't always kill the puffette. TODO:
+ * maybe if we look further back we could find something that
+ * would kill the puffette... */
+ }
+
+ DEBUG_PRINTF("allowed to squash %u\n", min_width);
+ return min_width;
+}
+
+void mergePuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+ assert(!g[v].eod_accept);
+
+ assert(onlyOneTop(*h)); /* we should not have merged yet */
+ bool fixed_depth = g[v].min_offset == g[v].max_offset;
+
+ if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(*h, repeat)) {
+ assert(0);
+ continue;
+ }
+
+ if (repeat.bounds.min == depth(0)) {
+ assert(0); // No vacuous puffs allowed.
+ continue;
+ }
+
+ assert(repeat.bounds.min.is_finite() &&
+ repeat.bounds.max.is_reachable());
+ assert(repeat.bounds.max == repeat.bounds.min ||
+ repeat.bounds.max.is_infinite());
+
+ const bool unbounded = repeat.bounds.max.is_infinite();
+ const set<ReportID> reports = all_reports(*h);
+ assert(reports.size() == 1);
+ ReportID report = *reports.begin();
+
+ DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
+ repeat.bounds.str().c_str());
+
+ raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
+
+ u32 queue;
+ u32 event;
+ tbi.addChainTail(rp, &queue, &event);
+ u32 squashDistance =
+ allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
+
+ Report ir = makeMpvTrigger(event, squashDistance);
+ ReportID id = tbi.rm.getInternalId(ir);
+
+ DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
+ g[v].suffix.reset();
+ g[v].reports.insert(id);
+ }
+}
+
+static
+void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
+ u32 top, const vector<RoseVertex> &verts) {
+ DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
+ m.get(), verts.size());
+
+ for (auto v : verts) {
+ assert(g[v].suffix.castle);
+ g[v].suffix.castle = m;
+ g[v].suffix.top = top;
+ }
+}
+
+static
+void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
+ const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
+ if (castles.size() <= 1) {
+ return;
+ }
+
+ DEBUG_PRINTF("merging reach %s, %zu elements\n",
+ describeClass(castles[0]->reach()).c_str(), castles.size());
+
+ CastleProto *m = nullptr;
+
+ for (CastleProto *c : castles) {
+ assert(c->repeats.size() == 1); // Not yet merged.
+ assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
+ if (!m) {
+ m = c;
+ continue;
+ }
+
+ u32 top = m->merge(c->repeats[0]);
+ if (top == CastleProto::max_occupancy) {
+ // No room left to merge into 'm'. This one becomes the new 'm'.
+ DEBUG_PRINTF("next mergee\n");
+ m = c;
+ continue;
+ }
+ updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top,
+ eng_verts.at(c));
+ DEBUG_PRINTF("added to %p, top %u\n", m, top);
+ }
+}
+
+void mergeCastleSuffixes(RoseBuildImpl &build) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
+ map<CharReach, vector<CastleProto *>> by_reach;
+
+ RoseGraph &g = build.g;
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix.castle) {
+ continue;
+ }
+
+ CastleProto *c = g[v].suffix.castle.get();
+
+ if (c->repeats.size() != 1) {
+ // This code assumes it's the only place merging is being done.
+ assert(0);
+ continue;
+ }
+
+ if (!contains(eng_verts, c)) {
+ by_reach[c->reach()].push_back(c);
+ }
+ eng_verts[c].push_back(v);
+ }
+
+ for (auto &chunk : by_reach | map_values) {
+ mergeCastleSuffixChunk(g, chunk, eng_verts);
+ }
+}
+
+} // namespace ue2