intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
1 files changed, 2818 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
new file mode 100644
index 0000000000..5066dbd578
--- /dev/null
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
@@ -0,0 +1,2818 @@
+/*
+ * Copyright (c) 2015-2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build: functions for reducing the size of the Rose graph
+ * through merging.
+ */
+#include "rose_build_merge.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_haig.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_puff.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/hash.h"
+#include "util/insertion_ordered.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/unordered.h"
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+static const size_t NARROW_START_MAX = 10;
+static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
+static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
+static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
+static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
+static const size_t MERGE_GROUP_SIZE_MAX = 200;
+static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
+
+/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
+static const size_t DFA_CHUNK_SIZE_MAX = 200;
+
+/** \brief Max DFA states in a merged DFA. */
+static const size_t DFA_MERGE_MAX_STATES = 8000;
+
+/** \brief In block mode, merge two prefixes even if they don't have identical
+ * literal sets if they have fewer than this many states and the merged graph
+ * is also small. */
+static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32;
+
+static
+size_t small_merge_max_vertices(const CompileContext &cc) {
+    return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
+                        : SMALL_MERGE_MAX_VERTICES_BLOCK;
+}
+
+static
+size_t small_rose_threshold(const CompileContext &cc) {
+    return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
+                        : SMALL_ROSE_THRESHOLD_BLOCK;
+}
+
+/**
+ * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
+ * reports should not contribute to the hash.
+ */
+static
+size_t hashLeftfix(const left_id &left) {
+    size_t val = 0;
+
+    if (left.castle()) {
+        hash_combine(val, left.castle()->reach());
+        for (const auto &pr : left.castle()->repeats) {
+            hash_combine(val, pr.first); // top
+            hash_combine(val, pr.second.bounds);
+        }
+    } else if (left.graph()) {
+        hash_combine(val, hash_holder(*left.graph()));
+    }
+
+    return val;
+}
+
+namespace {
+
+/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
+struct RoseGroup {
+    RoseGroup(const RoseBuildImpl &build, RoseVertex v)
+        : left_hash(hashLeftfix(build.g[v].left)),
+          lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
+        const RoseGraph &g = build.g;
+        assert(in_degree(v, g) == 1);
+        RoseVertex u = *inv_adjacent_vertices(v, g).first;
+        parent = g[u].index;
+    }
+
+    bool operator<(const RoseGroup &b) const {
+        const RoseGroup &a = *this;
+        ORDER_CHECK(parent);
+        ORDER_CHECK(left_hash);
+        ORDER_CHECK(lag);
+        ORDER_CHECK(eod_table);
+        return false;
+    }
+
+private:
+    /** Parent vertex index. We must use the index, rather than the descriptor,
+     * for compile determinism. */
+    size_t parent;
+
+    /** Quick hash of the leftfix itself. Must be identical for a given pair of
+     * graphs if is_equal would return true. */
+    size_t left_hash;
+
+    /** Leftfix lag value. */
+    u32 lag;
+
+    /** True if associated vertex (successor) is in the EOD table. We don't
+     * allow sharing of leftfix engines between "normal" and EOD operation. */
+    bool eod_table;
+};
+
+/**
+ * Intended to find graphs that are identical except for their report
+ * IDs. Relies on vertex and edge indices to pick up graphs that have been
+ * messily put together in different orderings. Only implemented for castles and
+ * holders.
+ */
+static
+bool is_equal(const left_id &u_left, ReportID u_report,
+              const left_id &v_left, ReportID v_report) {
+    if (u_left.castle() && v_left.castle()) {
+        return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
+    }
+
+    if (!u_left.graph() || !v_left.graph()) {
+        return false;
+    }
+
+    return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
+}
+
+} // namespace
+
+/**
+ * This pass performs work similar to \ref dedupeSuffixes - it removes
+ * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
+ * share the same trigger vertex and lag. Leftfixes are first grouped by
+ * parent role and lag to reduce the number of candidates to be inspected
+ * for each leftfix. The graphs in each cluster are then compared with each
+ * other and the graph is updated to only refer to a canonical version of each
+ * graph.
+ *
+ * Note: only roles with a single predecessor vertex are considered for this
+ * transform - it should probably be generalised to work for roles which share
+ * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
+ * it should be retired entirely.
+ */
+bool dedupeLeftfixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("deduping leftfixes\n");
+    map<RoseGroup, deque<RoseVertex>> roses;
+    bool work_done = false;
+
+    /* Note: a leftfix's transientness will not be altered by deduping */
+
+    // Collect leftfixes into groups.
+    RoseGraph &g = tbi.g;
+    for (auto v : vertices_range(g)) {
+        if (!g[v].left) {
+            continue;
+        }
+        const left_id left(g[v].left);
+
+        if (left.haig()) {
+            /* TODO: allow merging of identical haigs */
+            continue;
+        }
+
+        if (in_degree(v, g) != 1) {
+            continue;
+        }
+
+        roses[RoseGroup(tbi, v)].push_back(v);
+    }
+
+    DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
+
+    // Walk groups and dedupe the roses therein.
+    for (deque<RoseVertex> &verts : roses | map_values) {
+        DEBUG_PRINTF("group has %zu vertices\n", verts.size());
+
+        unordered_set<left_id> seen;
+
+        for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
+            RoseVertex v = *jt;
+            left_id left(g[v].left);
+
+            // Skip cases we've already handled, and mark as seen otherwise.
+            if (!seen.insert(left).second) {
+                continue;
+            }
+
+            // Scan the rest of the list for dupes.
+            for (auto kt = std::next(jt); kt != jte; ++kt) {
+                if (g[v].left == g[*kt].left
+                    || !is_equal(g[v].left, g[v].left.leftfix_report,
+                                 g[*kt].left, g[*kt].left.leftfix_report)) {
+                    continue;
+                }
+
+                // Dupe found.
+                DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
+                             g[*kt].index, g[v].index);
+                assert(g[v].left.lag == g[*kt].left.lag);
+                g[*kt].left = g[v].left;
+                work_done = true;
+            }
+        }
+    }
+
+    return work_done;
+}
+
+/**
+ * \brief Returns a numeric key that can be used to group this suffix with
+ * others that may be its duplicate.
+ */
+static
+size_t suffix_size_key(const suffix_id &s) {
+    if (s.graph()) {
+        return num_vertices(*s.graph());
+    }
+    if (s.castle()) {
+        return s.castle()->repeats.size();
+    }
+    return 0;
+}
+
+static
+bool is_equal(const suffix_id &s1, const suffix_id &s2) {
+    if (s1.graph() && s2.graph()) {
+        return is_equal(*s1.graph(), *s2.graph());
+    } else if (s1.castle() && s2.castle()) {
+        return is_equal(*s1.castle(), *s2.castle());
+    }
+    return false;
+}
+
+/**
+ * This function simply looks for suffix NGHolder graphs which are identical
+ * and updates the roles in the RoseGraph to refer to only a single copy. This
+ * obviously has benefits in terms of both performance (as we don't run
+ * multiple engines doing the same work) and stream state. This function first
+ * groups all suffixes by number of vertices and report set to restrict the set
+ * of possible candidates. Each group is then walked to find duplicates using
+ * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
+ * goes.
+ *
+ * Note: does not dedupe suffixes of vertices in the EOD table.
+ */
+void dedupeSuffixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("deduping suffixes\n");
+
+    unordered_map<suffix_id, set<RoseVertex>> suffix_map;
+    map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
+
+    // Collect suffixes into groups.
+    RoseGraph &g = tbi.g;
+    for (auto v : vertices_range(g)) {
+        if (!g[v].suffix || tbi.isInETable(v)) {
+            continue;
+        }
+
+        const suffix_id s(g[v].suffix);
+
+        if (!(s.graph() || s.castle())) {
+            continue; // e.g. Haig
+        }
+
+        set<RoseVertex> &verts = suffix_map[s];
+        if (verts.empty()) {
+            part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
+        }
+        verts.insert(v);
+    }
+
+    DEBUG_PRINTF("collected %zu groups\n", part.size());
+
+    for (const auto &cand : part | map_values) {
+        if (cand.size() <= 1) {
+            continue;
+        }
+        DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
+
+        for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
+            if (suffix_map[*jt].empty()) {
+                continue;
+            }
+            for (auto kt = next(jt); kt != cand.end(); ++kt) {
+                if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
+                    continue;
+                }
+                DEBUG_PRINTF("found dupe\n");
+                for (auto v : suffix_map[*kt]) {
+                    RoseVertex dupe = *suffix_map[*jt].begin();
+                    assert(dupe != v);
+                    g[v].suffix.graph = g[dupe].suffix.graph;
+                    g[v].suffix.castle = g[dupe].suffix.castle;
+                    assert(suffix_id(g[v].suffix) ==
+                           suffix_id(g[dupe].suffix));
+                    suffix_map[*jt].insert(v);
+                }
+                suffix_map[*kt].clear();
+            }
+        }
+    }
+}
+
+namespace {
+
+/**
+ * This class stores a mapping from an engine reference (left_id, suffix_id,
+ * etc) to a list of vertices, and also allows us to iterate over the set of
+ * engine references in insertion order -- we add to the mapping in vertex
+ * iteration order, so this allows us to provide a consistent ordering.
+ */
+template<class EngineRef>
+class Bouquet {
+private:
+    list<EngineRef> ordering; // Unique list in insert order.
+    using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
+    BouquetMap bouquet;
+public:
+    void insert(const EngineRef &h, RoseVertex v) {
+        typename BouquetMap::iterator f = bouquet.find(h);
+        if (f == bouquet.end()) {
+            ordering.push_back(h);
+            bouquet[h].push_back(v);
+        } else {
+            f->second.push_back(v);
+        }
+    }
+
+    void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
+        typename BouquetMap::iterator f = bouquet.find(h);
+        if (f == bouquet.end()) {
+            ordering.push_back(h);
+            bouquet.insert(make_pair(h, verts));
+        } else {
+            f->second.insert(f->second.end(), verts.begin(), verts.end());
+        }
+    }
+
+    const deque<RoseVertex> &vertices(const EngineRef &h) const {
+        typename BouquetMap::const_iterator it = bouquet.find(h);
+        assert(it != bouquet.end()); // must be present
+        return it->second;
+    }
+
+    void erase(const EngineRef &h) {
+        assert(bouquet.find(h) != bouquet.end());
+        bouquet.erase(h);
+        ordering.remove(h);
+    }
+
+    /** Remove all the elements in the given iterator range. */
+    template <class Iter>
+    void erase_all(Iter erase_begin, Iter erase_end) {
+        for (Iter it = erase_begin; it != erase_end; ++it) {
+            bouquet.erase(*it);
+        }
+
+        // Use a quick-lookup container so that we only have to traverse the
+        // 'ordering' list once.
+        const set<EngineRef> dead(erase_begin, erase_end);
+        for (iterator it = begin(); it != end(); /* incremented inside */) {
+            if (contains(dead, *it)) {
+                ordering.erase(it++);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    void clear() {
+        ordering.clear();
+        bouquet.clear();
+    }
+
+    size_t size() const { return bouquet.size(); }
+
+    // iterate over holders in insert order
+    typedef typename list<EngineRef>::iterator iterator;
+    iterator begin() { return ordering.begin(); }
+    iterator end() { return ordering.end(); }
+
+    // const iterate over holders in insert order
+    typedef typename list<EngineRef>::const_iterator const_iterator;
+    const_iterator begin() const { return ordering.begin(); }
+    const_iterator end() const { return ordering.end(); }
+};
+
+typedef Bouquet<left_id> LeftfixBouquet;
+typedef Bouquet<suffix_id> SuffixBouquet;
+
+} // namespace
+
+/**
+ * Split a \ref Bouquet of some type into several smaller ones.
+ */
+template <class EngineRef>
+static void chunkBouquets(const Bouquet<EngineRef> &in,
+                          deque<Bouquet<EngineRef>> &out,
+                          const size_t chunk_size) {
+    if (in.size() <= chunk_size) {
+        out.push_back(in);
+        return;
+    }
+
+    out.push_back(Bouquet<EngineRef>());
+    for (const auto &engine : in) {
+        if (out.back().size() >= chunk_size) {
+            out.push_back(Bouquet<EngineRef>());
+        }
+        out.back().insert(engine, in.vertices(engine));
+    }
+}
+
+static
+bool stringsCanFinishAtSameSpot(const ue2_literal &u,
+                                ue2_literal::const_iterator v_b,
+                                ue2_literal::const_iterator v_e) {
+    ue2_literal::const_iterator u_e = u.end();
+    ue2_literal::const_iterator u_b = u.begin();
+
+    while (u_e != u_b && v_e != v_b) {
+        --u_e;
+        --v_e;
+
+        if (!overlaps(*u_e, *v_e)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * Check that if after u has been seen, that it is impossible for the arrival of
+ * v to require the inspection of an engine earlier than u did.
+ *
+ * Let delta be the earliest that v can be seen after u (may be zero)
+ *
+ * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
+ * ==> - ulag <= delta - vlag
+ * ==> vlag - ulag <= delta
+ */
+static
+bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
+                 const rose_literal_id &vl, const u32 vlag) {
+    DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
+                 escapeString(vl.s).c_str(), vlag);
+
+    if (vl.delay || ul.delay) {
+        /* engine related literals should not be delayed anyway */
+        return false;
+    }
+
+    if (ulag >= vlag) {
+        assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
+        return true;
+    }
+
+    size_t min_allowed_delta = vlag - ulag;
+    DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
+
+    for (size_t i = 0; i < min_allowed_delta; i++) {
+        if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+            DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+            return false;
+        }
+    }
+
+    DEBUG_PRINTF("OK\n");
+    return true;
+}
+
+static
+bool hasSameEngineType(const RoseVertexProps &u_prop,
+                       const RoseVertexProps &v_prop) {
+    const left_id u_left = u_prop.left;
+    const left_id v_left = v_prop.left;
+
+    return !u_left.haig() == !v_left.haig()
+        && !u_left.dfa() == !v_left.dfa()
+        && !u_left.castle() == !v_left.castle()
+        && !u_left.graph() == !v_left.graph();
+}
+
+/**
+ * Verifies that merging the leftfix of vertices does not cause conflicts due
+ * to the literals on the right.
+ *
+ * The main concern is that the lags of the literals and overlap between them
+ * allow the engine check offset to potentially regress.
+ *
+ * Parameters are vectors of literals + lag pairs.
+ *
+ * Note: if more constraints of when the leftfixes were going to be checked
+ * (mandatory lookarounds passing, offset checks), more merges may be allowed.
+ */
+static
+bool compatibleLiteralsForMerge(
+                     const vector<pair<const rose_literal_id *, u32>> &ulits,
+                     const vector<pair<const rose_literal_id *, u32>> &vlits) {
+    assert(!ulits.empty());
+    assert(!vlits.empty());
+
+    // We cannot merge engines that prefix literals in different tables.
+    if (ulits[0].first->table != vlits[0].first->table) {
+        DEBUG_PRINTF("literals in different tables\n");
+        return false;
+    }
+
+    // We don't handle delayed cases yet.
+    for (const auto &ue : ulits) {
+        const rose_literal_id &ul = *ue.first;
+        if (ul.delay) {
+            return false;
+        }
+    }
+
+    for (const auto &ve : vlits) {
+        const rose_literal_id &vl = *ve.first;
+        if (vl.delay) {
+            return false;
+        }
+    }
+
+    /* An engine requires that all accesses to it are ordered by offsets. (ie,
+       we can not check an engine's state at offset Y, if we have already
+       checked its status at offset X and X > Y). If we can not establish that
+       the literals used for triggering will satisfy this property, then it is
+       not safe to merge the engine. */
+    for (const auto &ue : ulits) {
+        const rose_literal_id &ul = *ue.first;
+        u32 ulag = ue.second;
+
+        for (const auto &ve : vlits) {
+            const rose_literal_id &vl = *ve.first;
+            u32 vlag = ve.second;
+
+            if (!checkPrefix(ul, ulag, vl, vlag)
+                || !checkPrefix(vl, vlag, ul, ulag)) {
+                DEBUG_PRINTF("prefix check failed\n");
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+/**
+ * True if this graph has few enough accel states to be implemented as an NFA
+ * with all of those states actually becoming accel schemes.
+ */
+static
+bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) {
+    u32 num = countAccelStates(g, &build.rm, build.cc);
+    DEBUG_PRINTF("graph with %zu vertices has %u accel states\n",
+                  num_vertices(g), num);
+    return num <= NFA_MAX_ACCEL_STATES;
+}
+
+/**
+ * In block mode, we want to be a little more selective -- We will only merge
+ * prefix engines when the literal sets are the same or if the merged graph
+ * has only grown by a small amount.
+ */
+static
+bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
+                        RoseVertex v) {
+    assert(!build.cc.streaming);
+    assert(build.isRootSuccessor(u) == build.isRootSuccessor(v));
+
+    // Always merge infixes if we can (subject to the other criteria in
+    // mergeableRoseVertices).
+    if (!build.isRootSuccessor(u)) {
+        return true;
+    }
+
+    const RoseGraph &g = build.g;
+
+    // Merge prefixes with identical literal sets (as we'd have to run them
+    // both when we see those literals anyway).
+    if (g[u].literals == g[v].literals) {
+        return true;
+    }
+
+    // The rest of this function only deals with the case when both vertices
+    // have graph leftfixes.
+    if (!g[u].left.graph || !g[v].left.graph) {
+        return false;
+    }
+
+    const size_t u_count = num_vertices(*g[u].left.graph);
+    const size_t v_count = num_vertices(*g[v].left.graph);
+    DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
+                 u_count, v_count);
+    if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
+        v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+        DEBUG_PRINTF("prefixes too big already\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("trying merge\n");
+    NGHolder h;
+    cloneHolder(h, *g[v].left.graph);
+    if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) {
+        DEBUG_PRINTF("couldn't merge\n");
+        return false;
+    }
+
+    const size_t merged_count = num_vertices(h);
+    DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
+    if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+        DEBUG_PRINTF("exceeded limit\n");
+        return false;
+    }
+
+    // We want to only perform merges that take advantage of some
+    // commonality in the two input graphs, so we check that the number of
+    // vertices has only grown a small amount: somewhere between the sum
+    // (no commonality) and the max (no growth at all) of the vertex counts
+    // of the input graphs.
+    const size_t max_size = u_count + v_count;
+    const size_t min_size = max(u_count, v_count);
+    const size_t max_growth = ((max_size - min_size) * 25) / 100;
+    if (merged_count > min_size + max_growth) {
+        DEBUG_PRINTF("grew too much\n");
+        return false;
+    }
+
+    // We don't want to squander any chances at accelerating.
+    if (!isAccelerableLeftfix(build, h) &&
+        (isAccelerableLeftfix(build, *g[u].left.graph) ||
+         isAccelerableLeftfix(build, *g[v].left.graph))) {
+        DEBUG_PRINTF("would lose accel property\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("safe to merge\n");
+    return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+                           RoseVertex v) {
+    assert(u != v);
+
+    if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
+        return false;
+    }
+
+    if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) {
+        return false;
+    }
+
+    /* We cannot merge prefixes/vertices if they are successors of different
+     * root vertices */
+    if (tbi.isRootSuccessor(u)) {
+        assert(tbi.isRootSuccessor(v));
+        set<RoseVertex> u_preds;
+        set<RoseVertex> v_preds;
+        insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
+        insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
+
+        if (u_preds != v_preds) {
+            return false;
+        }
+    }
+
+    u32 ulag = tbi.g[u].left.lag;
+    vector<pair<const rose_literal_id *, u32>> ulits;
+    ulits.reserve(tbi.g[u].literals.size());
+    for (u32 id : tbi.g[u].literals) {
+        ulits.emplace_back(&tbi.literals.at(id), ulag);
+    }
+
+    u32 vlag = tbi.g[v].left.lag;
+    vector<pair<const rose_literal_id *, u32>> vlits;
+    vlits.reserve(tbi.g[v].literals.size());
+    for (u32 id : tbi.g[v].literals) {
+        vlits.emplace_back(&tbi.literals.at(id), vlag);
+    }
+
+    if (!compatibleLiteralsForMerge(ulits, vlits)) {
+        return false;
+    }
+
+    DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
+                 tbi.g[v].index);
+    return true;
+}
+
+/* We cannot merge an engine, if a trigger literal and a post literal overlap
+ * in such a way that engine status needs to be check at a point before the
+ * engine's current location.
+ *
+ * i.e., for a trigger literal u and a pos literal v,
+ * where delta is the earliest v can appear after t,
+ * we require that v_loc - v_lag >= u_loc
+ * ==> u_loc + delta - v_lag >= u_loc
+ * ==> delta >= v_lag
+ *
+ */
+static
+bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
+                    u32 vlag) {
+    DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
+                 escapeString(vl.s).c_str(), vlag);
+
+    for (size_t i = 0; i < vlag; i++) {
+        if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+            DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+            return false;
+        }
+    }
+
+    DEBUG_PRINTF("OK\n");
+    return true;
+}
+
+template<typename VertexCont>
+static never_inline
+bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
+                     const VertexCont &v2) {
+    flat_set<RoseVertex> preds;
+    for (auto v : v1) {
+        insert(&preds, inv_adjacent_vertices(v, build.g));
+    }
+
+    flat_set<u32> pred_lits;
+
+    /* No need to examine delays of a common pred - as it must already have
+     * survived the delay checks.
+     *
+     * This is important when the pred is in the anchored table as
+     * the literal is no longer available. */
+    flat_set<RoseVertex> known_good_preds;
+    for (auto v : v2) {
+        insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
+    }
+
+    for (auto u : preds) {
+        if (!contains(known_good_preds, u)) {
+            insert(&pred_lits, build.g[u].literals);
+        }
+    }
+
+    vector<const rose_literal_id *> pred_rose_lits;
+    pred_rose_lits.reserve(pred_lits.size());
+    for (const auto &p : pred_lits) {
+        pred_rose_lits.push_back(&build.literals.at(p));
+    }
+
+    for (auto v : v2) {
+        u32 vlag = build.g[v].left.lag;
+        if (!vlag) {
+            continue;
+        }
+
+        for (const u32 vlit : build.g[v].literals) {
+            const rose_literal_id &vl = build.literals.at(vlit);
+            assert(!vl.delay); // this should never have got this far?
+            for (const auto &ul : pred_rose_lits) {
+                assert(!ul->delay); // this should never have got this far?
+
+                if (!checkPredDelay(*ul, vl, vlag)) {
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+static
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+                           const deque<RoseVertex> &verts1,
+                           const deque<RoseVertex> &verts2) {
+    assert(!verts1.empty());
+    assert(!verts2.empty());
+
+    RoseVertex u_front = verts1.front();
+    RoseVertex v_front = verts2.front();
+
+    /* all vertices must have the same engine type: assume all verts in each
+     * group are already of the same type */
+    if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
+        return false;
+    }
+
+    bool is_prefix = tbi.isRootSuccessor(u_front);
+
+    /* We cannot merge prefixes/vertices if they are successors of different
+     * root vertices: similarly, assume the grouped vertices are compatible */
+    if (is_prefix) {
+        assert(tbi.isRootSuccessor(v_front));
+        set<RoseVertex> u_preds;
+        set<RoseVertex> v_preds;
+        insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
+        insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
+
+        if (u_preds != v_preds) {
+            return false;
+        }
+    }
+
+    vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+    for (auto a : verts1) {
+        if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
+            return false;
+        }
+
+        u32 ulag = tbi.g[a].left.lag;
+        for (u32 id : tbi.g[a].literals) {
+            ulits.emplace_back(&tbi.literals.at(id), ulag);
+        }
+    }
+
+    vector<pair<const rose_literal_id *, u32>> vlits;
+    for (auto a : verts2) {
+        if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) {
+            return false;
+        }
+
+        u32 vlag = tbi.g[a].left.lag;
+        for (u32 id : tbi.g[a].literals) {
+            vlits.emplace_back(&tbi.literals.at(id), vlag);
+        }
+    }
+
+    if (!compatibleLiteralsForMerge(ulits, vlits)) {
+        return false;
+    }
+
+    // Check preds are compatible as well.
+    if (!checkPredDelays(tbi, verts1, verts2)
+        || !checkPredDelays(tbi, verts2, verts1)) {
+        return false;
+    }
+
+    DEBUG_PRINTF("vertex sets are mergeable\n");
+    return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
+                           const set<RoseVertex> &v2) {
+    const deque<RoseVertex> vv1(v1.begin(), v1.end());
+    const deque<RoseVertex> vv2(v2.begin(), v2.end());
+    return mergeableRoseVertices(tbi, vv1, vv2);
+}
+
+/** \brief Priority queue element for Rose merges. */
+namespace {
+struct RoseMergeCandidate {
+    RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
+                       u32 tb)
+        : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
+          tie_breaker(tb) {
+        if (r1.graph() && r2.graph()) {
+            const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
+            /* som_none as haigs don't merge and just a guiding heuristic */
+            CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
+            CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
+            stopxor = (stop1 ^ stop2).count();
+
+            // We use the number of vertices as an approximation of the state
+            // count here, as this is just feeding a comparison.
+            u32 vertex_count = num_vertices(h1) + num_vertices(h2);
+            states = vertex_count - min(vertex_count, cpl);
+        } else if (r1.castle() && r2.castle()) {
+            // FIXME
+        }
+    }
+
+    bool operator<(const RoseMergeCandidate &a) const {
+        if (stopxor != a.stopxor) {
+            return stopxor > a.stopxor;
+        }
+        if (cpl != a.cpl) {
+            return cpl < a.cpl;
+        }
+        if (states != a.states) {
+            return states > a.states;
+        }
+        return tie_breaker < a.tie_breaker;
+    }
+
+    left_id r1;
+    left_id r2;
+    u32 stopxor;
+    u32 cpl; //!< common prefix length
+    u32 states;
+    u32 tie_breaker; //!< determinism
+};
+}
+
+static
+bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
+                      const vector<RoseVertex> &verts1,
+                      const vector<RoseVertex> &verts2) {
+    assert(!verts1.empty() && !verts2.empty());
+
+    DEBUG_PRINTF("merging pair of leftfixes:\n");
+    DEBUG_PRINTF("  A:%016zx: tops %s\n", r1.hash(),
+                 as_string_list(all_tops(r1)).c_str());
+    DEBUG_PRINTF("  B:%016zx: tops %s\n", r2.hash(),
+                 as_string_list(all_tops(r2)).c_str());
+
+    RoseGraph &g = build.g;
+
+    if (r1.graph()) {
+        assert(r2.graph());
+        assert(r1.graph()->kind == r2.graph()->kind);
+        if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) {
+            DEBUG_PRINTF("nfa merge failed\n");
+            return false;
+        }
+
+        /* The graph in r1 has been merged into the graph in r2. Update r1's
+         * vertices with the new graph ptr. mergeNfaPair() does not alter the
+         * tops from the input graph so no need to update top values.
+         *
+         * It is the responsibility of the caller to ensure that the tops are
+         * distinct when they have different trigger conditions.
+         * [Note: mergeLeftfixesVariableLag() should have a common parent set]
+         */
+        shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
+        for (RoseVertex v : verts1) {
+            g[v].left.graph = h;
+        }
+
+        return true;
+    } else if (r1.castle()) {
+        assert(r2.castle());
+        assert(build.cc.grey.allowCastle);
+
+        map<u32, u32> top_map;
+        if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
+            DEBUG_PRINTF("castle merge failed\n");
+            return false;
+        }
+
+        // The castle in r1 has been merged into the castle in r2, with tops
+        // remapped as per top_map.
+        const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
+        for (RoseVertex v : verts1) {
+            g[v].left.castle = c;
+            for (const auto &e : in_edges_range(v, g)) {
+                g[e].rose_top = top_map.at(g[e].rose_top);
+            }
+        }
+        return true;
+    }
+
+    assert(0);
+    return false;
+}
+
+/**
+ * Checks that there is no problem due to the involved vertices if we merge two
+ * leftfix engines.
+ *
+ * This functions takes the vertices on the right of the two engines.
+ *
+ * Unlike mergeableRoseVertices(), this does not:
+ * - check that engines themselves can be merged
+ * - use heuristics to find out if merging the engines is wise.
+ */
+static
+bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
+                                    const vector<RoseVertex> &targets_1,
+                                    const vector<RoseVertex> &targets_2) {
+    assert(!targets_1.empty());
+    assert(!targets_2.empty());
+
+    vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+    for (auto a : targets_1) {
+        u32 ulag = build.g[a].left.lag;
+        for (u32 id : build.g[a].literals) {
+            ulits.emplace_back(&build.literals.at(id), ulag);
+        }
+    }
+
+    vector<pair<const rose_literal_id *, u32>> vlits;
+    for (auto a : targets_2) {
+        u32 vlag = build.g[a].left.lag;
+        for (u32 id : build.g[a].literals) {
+            vlits.emplace_back(&build.literals.at(id), vlag);
+        }
+    }
+
+    if (!compatibleLiteralsForMerge(ulits, vlits)) {
+        return false;
+    }
+
+    // Check preds are compatible as well.
+    if (!checkPredDelays(build, targets_1, targets_2)
+        || !checkPredDelays(build, targets_2, targets_1)) {
+        return false;
+    }
+
+    DEBUG_PRINTF("vertex sets are mergeable\n");
+    return true;
+}
+
+/**
+ * In block mode, we want to be a little more selective -- we will only merge
+ * prefix engines when the literal sets are the same or if the merged graph
+ * has only grown by a small amount.
+ */
+static
+bool goodBlockModeMerge(const RoseBuildImpl &build,
+                        const vector<RoseVertex> &u_verts, const left_id &u_eng,
+                        const vector<RoseVertex> &v_verts,
+                        const left_id &v_eng) {
+    assert(!build.cc.streaming);
+
+    // Always merge infixes if we can (subject to the other criteria in
+    // mergeableRoseVertices).
+    if (!build.isRootSuccessor(u_verts.front())) {
+        return true;
+    }
+
+    const RoseGraph &g = build.g;
+
+    flat_set<u32> u_lits;
+    for (RoseVertex u : u_verts) {
+        insert(&u_lits, g[u].literals);
+    }
+
+    flat_set<u32> v_lits;
+    for (RoseVertex v : v_verts) {
+        insert(&v_lits, g[v].literals);
+    }
+
+    // Merge prefixes with identical literal sets (as we'd have to run them
+    // both when we see those literals anyway).
+    if (u_lits == v_lits) {
+        return true;
+    }
+
+    // The rest of this function only deals with the case when have graph
+    // leftfixes.
+    if (!u_eng.graph()) {
+        return false;
+    }
+    assert(v_eng.graph());
+    const NGHolder &ug = *u_eng.graph();
+    const NGHolder &vg = *v_eng.graph();
+
+    size_t u_count = num_vertices(ug);
+    size_t v_count = num_vertices(vg);
+    DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
+                 u_count, v_count);
+    if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
+        v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+        DEBUG_PRINTF("prefixes too big already\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("trying merge\n");
+    NGHolder h;
+    cloneHolder(h, vg);
+    if (!mergeNfaPair(ug, h, nullptr, build.cc)) {
+        DEBUG_PRINTF("couldn't merge\n");
+        return false;
+    }
+
+    const size_t merged_count = num_vertices(h);
+    DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
+    if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
+        DEBUG_PRINTF("exceeded limit\n");
+        return false;
+    }
+
+    // We want to only perform merges that take advantage of some
+    // commonality in the two input graphs, so we check that the number of
+    // vertices has only grown a small amount: somewhere between the sum
+    // (no commonality) and the max (no growth at all) of the vertex counts
+    // of the input graphs.
+    size_t max_size = u_count + v_count;
+    size_t min_size = max(u_count, v_count);
+    size_t max_growth = ((max_size - min_size) * 25) / 100;
+    if (merged_count > min_size + max_growth) {
+        DEBUG_PRINTF("grew too much\n");
+        return false;
+    }
+
+    // We don't want to squander any chances at accelerating.
+    if (!isAccelerableLeftfix(build, h)
+        && (isAccelerableLeftfix(build, ug)
+            || isAccelerableLeftfix(build, vg))) {
+        DEBUG_PRINTF("would lose accel property\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("safe to merge\n");
+    return true;
+}
+
+/**
+ * Merge r1 into r2 if safe and appropriate. Returns true on success.
+ */
+static
+bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
+                                   const vector<RoseVertex> &targets_1,
+                                   left_id &r2,
+                                   const vector<RoseVertex> &targets_2) {
+    if (targets_1.empty() || targets_2.empty()) {
+        /* one of the engines has already been merged away */
+        return false;
+    }
+
+    assert(!r1.graph() == !r2.graph());
+    if (r1.graph()) {
+        NGHolder *h1 = r1.graph();
+        NGHolder *h2 = r2.graph();
+        CharReach stop1 = findStopAlphabet(*h1, SOM_NONE);
+        CharReach stop2 = findStopAlphabet(*h2, SOM_NONE);
+        CharReach stopboth = stop1 & stop2;
+        DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(),
+                     stop2.count(), stopboth.count());
+        if (stopboth.count() < 10
+            && (stop1.count() > 10 || stop2.count() > 10)) {
+            DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
+            return false;
+        }
+        size_t maxstop = max(stop1.count(), stop2.count());
+        if (maxstop > 200 && stopboth.count() < 200) {
+            DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
+            return false;
+        }
+    }
+
+    /* Rechecking that the targets are compatible, as we may have already
+     * merged new states into r1 or r2 and we need to verify that this
+     * candidate is still ok. */
+    if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
+        return false;
+    }
+
+    if (!build.cc.streaming
+        && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
+        return false;
+    }
+
+    return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
+}
+
+static
+bool nfaHasNarrowStart(const NGHolder &g) {
+    if (out_degree(g.startDs, g) > 1) {
+        return false; // unanchored
+    }
+
+    CharReach cr;
+
+    for (auto v : adjacent_vertices_range(g.start, g)) {
+        if (v == g.startDs) {
+            continue;
+        }
+        cr |= g[v].char_reach;
+    }
+    return cr.count() <= NARROW_START_MAX;
+}
+
+static
+bool nfaHasFiniteMaxWidth(const NGHolder &g) {
+    return findMaxWidth(g).is_finite();
+}
+
+static
+bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
+    if (!proper_out_degree(h.startDs, h)) {
+        return false;
+    }
+
+    assert(!is_triggered(h));
+
+    NGHolder h_temp;
+    cloneHolder(h_temp, h);
+
+    vector<BoundedRepeatData> repeats;
+    bool suitable_for_sds_reforming = false;
+    const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
+    const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
+    const bool simple_model_selection = true; // FIRST is considered simple
+    analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
+                   simple_model_selection, grey, &suitable_for_sds_reforming);
+
+    return suitable_for_sds_reforming;
+}
+
+static
+u32 commonPrefixLength(left_id &r1, left_id &r2) {
+    if (r1.graph() && r2.graph()) {
+        return commonPrefixLength(*r1.graph(), *r2.graph());
+    } else if (r1.castle() && r2.castle()) {
+        return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
+    }
+    return 0;
+}
+
+namespace {
+struct MergeKey {
+    MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
+        parents(std::move(parents_in)) {
+
+        // We want to distinguish prefixes (but not infixes) on whether they
+        // have a narrow start or max width.
+        if (left.graph() && !is_triggered(*left.graph())) {
+            const NGHolder &h = *left.graph();
+            narrowStart = nfaHasNarrowStart(h);
+            hasMaxWidth = nfaHasFiniteMaxWidth(h);
+        } else {
+            narrowStart = false;
+            hasMaxWidth = false;
+        }
+
+        if (left.castle()) {
+            /* castles should have a non-empty reach */
+            assert(left.castle()->reach().any());
+            castle_cr = left.castle()->reach();
+        } else {
+            assert(left.graph());
+        }
+    }
+
+    bool operator<(const MergeKey &b) const {
+        const MergeKey &a = *this;
+        ORDER_CHECK(narrowStart);
+        ORDER_CHECK(hasMaxWidth);
+        ORDER_CHECK(castle_cr);
+        ORDER_CHECK(parents);
+        return false;
+    }
+
+    // NOTE: these two bool discriminators are only used for prefixes, not
+    // infixes.
+    bool narrowStart;
+    bool hasMaxWidth;
+    CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */
+
+    flat_set<RoseVertex> parents;
+};
+}
+
+template <typename T>
+static
+void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) {
+    if (in.size() <= chunk_size) {
+        out->push_back(std::move(in));
+        return;
+    }
+
+    out->push_back(vector<T>());
+    out->back().reserve(chunk_size);
+    for (const auto &t : in) {
+        if (out->back().size() >= chunk_size) {
+            out->push_back(vector<T>());
+            out->back().reserve(chunk_size);
+        }
+        out->back().push_back(std::move(t));
+    }
+}
+
+static
+insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
+    insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
+    for (auto v : vertices_range(g)) {
+        const auto &left = g[v].left;
+        if (!left) {
+            continue;
+        }
+        assert(contains(all_reports(left), left.leftfix_report));
+        eng_verts[left].push_back(v);
+    }
+
+    return eng_verts;
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines which share a common set of
+ * parent vertices.
+ *
+ * Engines are greedily merged pairwise by this process based on a priority
+ * queue keyed off the common prefix length.
+ *
+ * Engines are not merged if the lags are not compatible or if it would damage
+ * the stop alphabet.
+ *
+ * Infixes:
+ * - It is expected that when this is run all infixes are still at the single
+ *   top stage as we have not yet merged unrelated infixes together. After
+ *   execution, castles may have multiple (but equivalent) tops.
+ *
+ * Prefixes:
+ * - transient prefixes are not considered.
+ * - with a max width or a narrow start are kept segregated by
+ *   this phase and can only be merged with similar infixes.
+ * - in block mode, merges are only performed if literal sets are the same.
+ * - merges are not considered in cases where dot star start state will be
+ *   reformed to optimise a leading repeat.
+ */
+void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
+    if (!build.cc.grey.mergeRose) {
+        return;
+    }
+    assert(!hasOrphanedTops(build));
+
+    RoseGraph &g = build.g;
+
+    DEBUG_PRINTF("-----\n");
+    DEBUG_PRINTF("entry\n");
+    DEBUG_PRINTF("-----\n");
+
+    auto eng_verts = get_eng_verts(g);
+
+    map<MergeKey, vector<left_id>> engine_groups;
+    for (const auto &e : eng_verts) {
+        const left_id &left = e.first;
+        const auto &verts = e.second;
+        // Only non-transient for the moment.
+        if (contains(build.transient, left)) {
+            continue;
+        }
+
+        // No forced McClellan or Haig infix merges.
+        if (left.dfa() || left.haig()) {
+            continue;
+        }
+        assert(left.graph() || left.castle());
+
+        if (left.graph()) {
+            const NGHolder &h = *left.graph();
+            /* we should not have merged yet */
+            assert(!is_triggered(h) || onlyOneTop(h));
+
+            if (hasReformedStartDotStar(h, build.cc.grey)) {
+                continue; // preserve the optimisation of the leading repeat
+            }
+        } else {
+            assert(left.castle());
+
+            if (!build.cc.grey.allowCastle) {
+                DEBUG_PRINTF("castle merging disallowed by greybox\n");
+                continue;
+            }
+        }
+
+        // We collapse the anchored root into the root vertex when calculating
+        // parents, so that we can merge differently-anchored prefix roses
+        // together. (Prompted by UE-2100)
+
+        flat_set<RoseVertex> parents;
+        for (RoseVertex v : verts) {
+            insert(&parents, inv_adjacent_vertices_range(v, g));
+        }
+
+        if (contains(parents, build.anchored_root)) {
+            parents.erase(build.anchored_root);
+            parents.insert(build.root);
+        }
+
+        assert(!parents.empty());
+
+#ifndef _WIN32
+        engine_groups[MergeKey(left, parents)].push_back(left);
+#else
+        // On windows, when passing MergeKey object into map 'engine_groups',
+        // it will not be copied, but will be freed along with
+        // engine_groups.clear().
+        // If we construct MergeKey object on the stack, it will be destructed
+        // on its life cycle ending, then on engine_groups.clear(), which
+        // will cause is_block_type_valid() assertion error in MergeKey
+        // destructor.
+        MergeKey *mk = new MergeKey(left, parents);
+        engine_groups[*mk].push_back(left);
+#endif
+    }
+
+    vector<vector<left_id>> chunks;
+    for (auto &raw_group : engine_groups | map_values) {
+        chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
+    }
+    engine_groups.clear();
+
+    DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size());
+
+    for (auto &roses : chunks) {
+        if (roses.size() < 2) {
+            continue;
+        }
+        // All pairs on the prio queue.
+        u32 tie_breaker = 0;
+        priority_queue<RoseMergeCandidate> pq;
+        for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
+            left_id r1 = *it;
+            const vector<RoseVertex> &targets_1 = eng_verts[r1];
+
+            for (auto jt = next(it); jt != ite; ++jt) {
+                left_id r2 = *jt;
+
+                /* we should have already split on engine types and reach */
+                assert(!r1.castle() == !r2.castle());
+                assert(!r1.graph() == !r2.graph());
+                assert(!r1.castle()
+                       || r1.castle()->reach() == r2.castle()->reach());
+
+                const vector<RoseVertex> &targets_2 = eng_verts[r2];
+                if (!checkVerticesOkForLeftfixMerge(build, targets_1,
+                                                    targets_2)) {
+                    continue; // No point queueing unmergeable cases.
+                }
+
+                u32 cpl = commonPrefixLength(r1, r2);
+                pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++));
+            }
+        }
+
+        DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
+
+        while (!pq.empty()) {
+            left_id r1 = pq.top().r1;
+            left_id r2 = pq.top().r2;
+            DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n",
+                         r1.graph(), r2.graph(), pq.top().cpl, pq.top().states);
+            pq.pop();
+            vector<RoseVertex> &targets_1 = eng_verts[r1];
+            vector<RoseVertex> &targets_2 = eng_verts[r2];
+            if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2,
+                                              targets_2)) {
+                insert(&targets_2, targets_2.end(), targets_1);
+                targets_1.clear();
+            }
+        }
+    }
+
+    DEBUG_PRINTF("-----\n");
+    DEBUG_PRINTF("exit\n");
+    DEBUG_PRINTF("-----\n");
+    assert(!hasOrphanedTops(build));
+}
+
+namespace {
+
+/**
+ * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
+ */
+struct DedupeLeftKey {
+    DedupeLeftKey(const RoseBuildImpl &build,
+                  flat_set<pair<size_t, u32>> preds_in, const left_id &left)
+        : left_hash(hashLeftfix(left)), preds(move(preds_in)),
+          transient(contains(build.transient, left)) {
+    }
+
+    bool operator<(const DedupeLeftKey &b) const {
+        return tie(left_hash, preds, transient)
+             < tie(b.left_hash, b.preds, b.transient);
+    }
+
+private:
+    /** Quick hash of the leftfix itself. Must be identical for a given pair of
+     * graphs if is_equal would return true. */
+    size_t left_hash;
+
+    /** For each in-edge, the pair of (parent index, edge top). */
+    flat_set<pair<size_t, u32>> preds;
+
+    /** We don't want to combine transient with non-transient. */
+    bool transient;
+};
+
+} // namespace
+
+static
+flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
+    flat_set<pair<size_t, u32>> preds;
+    for (const auto &e : in_edges_range(v, g)) {
+        preds.emplace(g[source(e, g)].index, g[e].rose_top);
+    }
+    return preds;
+}
+
+/**
+ * This is a generalisation of \ref dedupeLeftfixes which relaxes two
+ * restrictions: multiple predecessor roles are allowed and the delay used by
+ * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
+ * the leftfixes' successor vertices are first grouped to reduce the number of
+ * potential candidates - the grouping in this case is by the set of
+ * predecessor roles with their associated top events. For the dedupe to be
+ * possible, it is required that:
+ *
+ * 1. the nfa graphs with respect to the relevant reports are identical
+ * 2. the nfa graphs are triggered by the same roles with same events (ensured
+ *    by the initial grouping pass)
+ * 3. all the successor roles of either graph can inspect the combined leftfix
+ *    without advancing the state of the leftfix past the point that another
+ *    successor may want to inspect it; the overlap relationships between the
+ *    involved literals are examined to ensure that this property holds.
+ *
+ * Note: this is unable to dedupe when delayed literals are involved unlike
+ * dedupeLeftfixes.
+ */
+void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
+    DEBUG_PRINTF("entry\n");
+
+    RoseGraph &g = build.g;
+    auto eng_verts = get_eng_verts(g);
+
+    map<DedupeLeftKey, vector<left_id>> engine_groups;
+    for (const auto &e : eng_verts) {
+        const left_id &left = e.first;
+        const auto &verts = e.second;
+
+        /* There should only be one report on an engine as no merges have
+         * happened yet. (aside from eod prefixes) */
+        if (all_reports(left).size() != 1) {
+            assert(any_of_in(adjacent_vertices_range(verts.front(), g),
+                             [&](RoseVertex w) { return g[w].eod_accept; }));
+            continue;
+        }
+
+         if (left.haig()) {
+            /* TODO: allow deduping of identical haigs */
+            continue;
+        }
+
+        if (left.graph()) {
+            /* we should not have merged yet */
+            assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
+        }
+
+        auto preds = get_pred_tops(verts.front(), g);
+        for (RoseVertex v : verts) {
+            if (preds != get_pred_tops(v, g)) {
+                DEBUG_PRINTF("distinct pred sets\n");
+                continue;
+            }
+        }
+        engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
+    }
+
+    /* We don't bother chunking as we expect deduping to be successful if the
+     * hashes match */
+
+    for (auto &group : engine_groups | map_values) {
+        DEBUG_PRINTF("group of %zu roses\n", group.size());
+
+        if (group.size() < 2) {
+            continue;
+        }
+
+        for (auto it = group.begin(); it != group.end(); ++it) {
+            left_id r1 = *it;
+            vector<RoseVertex> &verts1 = eng_verts[r1];
+            assert(!verts1.empty()); /* cleared engines should be behind us */
+
+            assert(all_reports(r1).size() == 1);
+            ReportID r1_report = *all_reports(r1).begin();
+
+            for (auto jt = next(it); jt != group.end(); ++jt) {
+                left_id r2 = *jt;
+                vector<RoseVertex> &verts2 = eng_verts[r2];
+                assert(!verts2.empty());
+                assert(all_reports(r2).size() == 1);
+                ReportID r2_report = *all_reports(r2).begin();
+
+                if (!is_equal(r1, r1_report, r2, r2_report)) {
+                    continue;
+                }
+
+                if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
+                    continue;
+                }
+
+                DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
+
+                // Replace r1 with r2.
+
+                for (auto v : verts1) {
+                    DEBUG_PRINTF("replacing report %u with %u on %zu\n",
+                                 r2_report, r1_report, g[v].index);
+                    u32 orig_lag = g[v].left.lag;
+                    g[v].left = g[verts2.front()].left;
+                    g[v].left.lag = orig_lag;
+                }
+
+                insert(&verts2, verts2.end(), verts1);
+                verts1.clear();
+
+                /* remove stale entry from transient set, if present */
+                build.transient.erase(r1);
+
+                break;
+            }
+        }
+    }
+}
+
+static
+u32 findUnusedTop(const flat_set<u32> &tops) {
+    u32 i = 0;
+    while (contains(tops, i)) {
+        i++;
+    }
+    return i;
+}
+
+// Replace top 't' on edges with new top 'u'.
+static
+void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
+    for (const auto &e : out_edges_range(h.start, h)) {
+        NFAVertex v = target(e, h);
+        if (v == h.startDs) {
+            continue;
+        }
+        flat_set<u32> new_tops;
+        for (u32 t : h[e].tops) {
+            DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
+            new_tops.insert(top_mapping.at(t));
+        }
+        h[e].tops = std::move(new_tops);
+    }
+}
+
+static
+bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
+                     map<u32, u32> &top_mapping) {
+    flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
+
+    DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
+                 tops2.size());
+
+    // If our tops don't intersect, we're OK to merge with no changes.
+    if (!has_intersection(tops1, tops2)) {
+        DEBUG_PRINTF("tops don't intersect\n");
+        return true;
+    }
+
+    // Otherwise, we have to renumber the tops in h1 so that they don't overlap
+    // with the tops in h2.
+    top_mapping.clear();
+    for (u32 t : tops1) {
+        u32 u = findUnusedTop(tops2);
+        DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
+        top_mapping.insert(make_pair(t, u));
+        assert(!contains(tops2, u));
+        tops2.insert(u);
+    }
+
+    replaceTops(h1, top_mapping);
+    return true;
+}
+
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+                         const deque<RoseVertex> &verts1) {
+    map<u32, u32> top_mapping;
+    if (!setDistinctTops(h1, h2, top_mapping)) {
+        return false;
+    }
+
+    if (top_mapping.empty()) {
+        return true; // No remapping necessary.
+    }
+
+    for (auto v : verts1) {
+        DEBUG_PRINTF("vertex %zu\n", g[v].index);
+        assert(!g[v].left.haig);
+        assert(!g[v].left.dfa);
+        for (const auto &e : in_edges_range(v, g)) {
+            u32 t = g[e].rose_top;
+            DEBUG_PRINTF("t=%u\n", t);
+            assert(contains(top_mapping, t));
+            g[e].rose_top = top_mapping[t];
+            DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
+                         g[source(e, g)].index, g[target(e, g)].index, t,
+                         top_mapping[t]);
+        }
+    }
+
+    return true;
+}
+
+static
+bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+                           const deque<RoseVertex> &verts1) {
+    map<u32, u32> top_mapping;
+    if (!setDistinctTops(h1, h2, top_mapping)) {
+        return false;
+    }
+
+    if (top_mapping.empty()) {
+        return true; // No remapping necessary.
+    }
+
+    for (auto v : verts1) {
+        DEBUG_PRINTF("vertex %zu\n", g[v].index);
+        u32 t = g[v].suffix.top;
+        assert(contains(top_mapping, t));
+        g[v].suffix.top = top_mapping[t];
+    }
+
+    return true;
+}
+
+/** \brief Estimate the number of accel states in the given graph when built as
+ * an NFA.
+ *
+ * (The easiest way to estimate something like this is to actually build it:
+ * the criteria for NFA acceleration are quite complicated and buried in
+ * limex_compile.)
+ */
+static
+u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
+    return countAccelStates(h, &tbi.rm, tbi.cc);
+}
+
+static
+void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
+    RoseGraph &g = tbi.g;
+    DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
+
+    // We track the number of accelerable states for each graph in a map and
+    // only recompute them when the graph is modified.
+    unordered_map<left_id, u32> accel_count;
+    for (const auto &rose : roses) {
+        assert(rose.graph()->kind == NFA_INFIX);
+        accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
+    }
+
+    for (auto it = roses.begin(); it != roses.end(); ++it) {
+        left_id r1 = *it;
+        const deque<RoseVertex> &verts1 = roses.vertices(r1);
+
+        deque<left_id> merged;
+        for (auto jt = next(it); jt != roses.end(); ++jt) {
+            left_id r2 = *jt;
+            const deque<RoseVertex> &verts2 = roses.vertices(r2);
+
+            DEBUG_PRINTF("consider merging rose %p (%zu verts) "
+                         "with %p (%zu verts)\n",
+                         r1.graph(), verts1.size(), r2.graph(), verts2.size());
+
+            u32 accel1 = accel_count[r1];
+            if (accel1 >= NFA_MAX_ACCEL_STATES) {
+                DEBUG_PRINTF("h1 has hit max accel\n");
+                break; // next h1
+            }
+
+            u32 accel2 = accel_count[r2];
+            if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+                DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+                             "accel2=%u)\n",
+                             accel1, accel2);
+                continue; // next h2
+            }
+
+            if (!mergeableRoseVertices(tbi, verts1, verts2)) {
+                DEBUG_PRINTF("not mergeable\n");
+                continue; // next h2
+            }
+
+            // Attempt to merge h2 into h1.
+
+            NGHolder victim;
+            cloneHolder(victim, *r2.graph());
+
+            // Store a copy of the in-edge properties in case we have to roll
+            // back.
+            map<RoseEdge, RoseEdgeProps> edge_props;
+            for (auto v : verts2) {
+                for (const auto &e : in_edges_range(v, g)) {
+                    edge_props[e] = g[e];
+                }
+            }
+
+            if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
+                DEBUG_PRINTF("can't set distinct tops\n");
+                continue; // next h2
+            }
+
+            assert(victim.kind == r1.graph()->kind);
+            assert(!generates_callbacks(*r1.graph()));
+            if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
+                DEBUG_PRINTF("merge failed\n");
+                // Roll back in-edge properties.
+                for (const auto &m : edge_props) {
+                    g[m.first] = m.second;
+                }
+                continue; // next h2
+            }
+
+            // Update h2's roses to point to h1 now
+            shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
+            for (auto v : verts2) {
+                g[v].left.graph = winner;
+            }
+            roses.insert(r1, verts2);
+
+            merged.push_back(r2);
+
+            if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
+                DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+                             num_vertices(*winner));
+                break; // next h1
+            }
+
+            // Update h1's accel count estimate.
+            accel_count[r1] = estimatedAccelStates(tbi, *winner);
+        }
+
+        DEBUG_PRINTF("%zu roses merged\n", merged.size());
+        roses.erase_all(merged.begin(), merged.end());
+    }
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines with a small number of
+ * vertices together into larger engines. The engines must not be have a
+ * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
+ * that have compatible lag are greedily grouped such that they remain
+ * accelerable and only have a small number of states. Note: if a role has an
+ * infix with multiple trigger vertices, the role will be left unchanged by this
+ * pass and will remain using an unmerged graph.
+ */
+void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
+        return;
+    }
+
+    RoseGraph &g = tbi.g;
+
+    LeftfixBouquet nfa_leftfixes;
+
+    for (auto v : vertices_range(g)) {
+        if (!g[v].left) {
+            continue;
+        }
+
+        // Handle single-parent infixes only.
+        if (tbi.isRootSuccessor(v)) {
+            continue;
+        }
+
+        left_id left(g[v].left);
+
+        // Only non-transient for the moment.
+        if (contains(tbi.transient, left)) {
+            continue;
+        }
+
+        // No DFAs or Haigs right now.
+        if (left.dfa() || left.haig()) {
+            continue;
+        }
+
+        // Castles are handled by a different pass.
+        if (left.castle()) {
+            continue;
+        }
+
+        assert(left.graph());
+        NGHolder &h = *left.graph();
+
+        /* Ensure that kind on the graph is correct */
+        assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
+
+        if (hasReformedStartDotStar(h, tbi.cc.grey)) {
+            /* We would lose optimisations of the leading repeat by merging. */
+            continue;
+        }
+
+        // Small roses only.
+        if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
+            continue;
+        }
+
+        nfa_leftfixes.insert(left, v);
+    }
+
+    deque<LeftfixBouquet> leftfix_groups;
+    chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
+    nfa_leftfixes.clear();
+    DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
+                 leftfix_groups.size());
+
+    for (auto &group : leftfix_groups) {
+        mergeNfaLeftfixes(tbi, group);
+    }
+}
+
+static
+void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
+                insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
+    /* caller must have already ensured that candidates have the same reach */
+    RoseGraph &g = build.g;
+    DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size());
+
+    for (auto it = cands.begin(); it != cands.end(); ++it) {
+        left_id &cand_1 = *it;
+        vector<RoseVertex> &verts_1 = eng_verts[cand_1];
+        if (verts_1.empty()) {
+            continue;
+        }
+
+        for (auto jt = next(it); jt != cands.end(); ++jt) {
+            const left_id &cand_2 = *jt;
+            vector<RoseVertex> &verts_2 = eng_verts[cand_2];
+            if (verts_2.empty()) {
+                continue;
+            }
+
+            assert(cand_1.castle()->reach() == cand_2.castle()->reach());
+
+            if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) {
+                DEBUG_PRINTF("not mergeable\n");
+                continue; // next cand_2
+            }
+
+            DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(),
+                         cand_1.castle()->repeats.size());
+            DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(),
+                         cand_2.castle()->repeats.size());
+
+            map<u32, u32> top_map;
+            if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) {
+                DEBUG_PRINTF("couldn't merge\n");
+                continue; // next cand_2
+            }
+
+            // Update castle2's roses to point to castle1 now.
+            shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle;
+            for (auto v : verts_2) {
+                assert(g[v].left.castle.get() == cand_2.castle());
+                g[v].left.castle = winner;
+                for (const auto &e : in_edges_range(v, g)) {
+                    g[e].rose_top = top_map.at(g[e].rose_top);
+                }
+            }
+
+            insert(&verts_1, verts_1.end(), verts_2);
+            verts_2.clear();
+        }
+    }
+}
+
+/**
+ * Merges castles with the same reach together regardless of where in the rose
+ * graph they are. Note: there is no requirement for the castles to have common
+ * parent or target vertices.
+ *
+ * There are no heuristics for reducing block mode merges as castle speed
+ * mainly depends on the reach being scanned.
+ */
+void mergeCastleLeftfixes(RoseBuildImpl &build) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses
+        || !build.cc.grey.allowCastle) {
+        return;
+    }
+
+    RoseGraph &g = build.g;
+
+    insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
+
+    for (auto v : vertices_range(g)) {
+        if (!g[v].left.castle) {
+            continue;
+        }
+
+        // Handle infixes only.
+        if (build.isRootSuccessor(v)) {
+            continue;
+        }
+
+        eng_verts[g[v].left].push_back(v);
+    }
+
+    map<CharReach, vector<left_id>> by_reach;
+    for (const auto &left : eng_verts | map_keys) {
+        by_reach[left.castle()->reach()].push_back(left);
+    }
+
+    vector<vector<left_id>> chunks;
+    for (auto &raw_group : by_reach | map_values) {
+        chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
+    }
+    by_reach.clear();
+
+    DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
+
+    for (auto &chunk : chunks) {
+        mergeCastleChunk(build, chunk, eng_verts);
+    }
+}
+
+static
+void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
+                   const bool acyclic) {
+    RoseGraph &g = tbi.g;
+
+    DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
+
+    // If this isn't an acyclic case, we track the number of accelerable states
+    // for each graph in a map and only recompute them when the graph is
+    // modified.
+    unordered_map<suffix_id, u32> accel_count;
+    if (!acyclic) {
+        for (const auto &suffix : suffixes) {
+            assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
+            accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
+        }
+    }
+
+    for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
+        suffix_id s1 = *it;
+        const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
+        assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
+
+        // Caller should ensure that we don't propose merges of graphs that are
+        // already too big.
+        assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
+
+        deque<suffix_id> merged;
+        for (auto jt = next(it); jt != suffixes.end(); ++jt) {
+            suffix_id s2 = *jt;
+            const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
+            assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
+
+            if (!acyclic) {
+                u32 accel1 = accel_count[s1];
+                if (accel1 >= NFA_MAX_ACCEL_STATES) {
+                    DEBUG_PRINTF("h1 has hit max accel\n");
+                    break; // next h1
+                }
+
+                u32 accel2 = accel_count[s2];
+                if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+                    DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+                                 "accel2=%u)\n",
+                                 accel1, accel2);
+                    continue; // next h2
+                }
+            }
+
+            // Attempt to merge h2 into h1.
+
+            NGHolder victim;
+            cloneHolder(victim, *s2.graph());
+
+            // Store a copy of the suffix tops in case we have to roll back.
+            map<RoseVertex, u32> old_tops;
+            for (auto v : verts2) {
+                old_tops[v] = g[v].suffix.top;
+            }
+
+            if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
+                DEBUG_PRINTF("can't set distinct tops\n");
+                continue; // next h2
+            }
+
+            if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
+                DEBUG_PRINTF("merge failed\n");
+                // Roll back in-edge properties.
+                for (const auto &m : old_tops) {
+                    g[m.first].suffix.top = m.second;
+                }
+                continue; // next h2
+            }
+
+            // Update h2's roses to point to h1 now
+            shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
+            for (auto v : verts2) {
+                g[v].suffix.graph = winner;
+            }
+            suffixes.insert(s1, verts2);
+            merged.push_back(s2);
+
+            if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
+                DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+                             num_vertices(*s1.graph()));
+                break; // next h1
+            }
+
+            if (!acyclic) {
+                // Update h1's accel count estimate.
+                accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
+            }
+        }
+
+        DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
+        suffixes.erase_all(merged.begin(), merged.end());
+    }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!tbi.cc.grey.mergeSuffixes) {
+        return;
+    }
+
+    SuffixBouquet suffixes;
+
+    RoseGraph &g = tbi.g;
+
+    for (auto v : vertices_range(g)) {
+        shared_ptr<NGHolder> h = g[v].suffix.graph;
+        if (!h || tbi.isInETable(v)) {
+            continue;
+        }
+
+        assert(!g[v].suffix.haig);
+
+        if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
+            continue;
+        }
+
+        if (!isAcyclic(*h)) {
+            continue;
+        }
+
+        suffixes.insert(g[v].suffix, v);
+    }
+
+    deque<SuffixBouquet> suff_groups;
+    chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+    DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+                 suff_groups.size());
+    suffixes.clear();
+
+    for (auto &group : suff_groups) {
+        mergeSuffixes(tbi, group, true);
+    }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeSmallSuffixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!tbi.cc.grey.mergeSuffixes) {
+        return;
+    }
+
+    RoseGraph &g = tbi.g;
+    SuffixBouquet suffixes;
+
+    for (auto v : vertices_range(g)) {
+        shared_ptr<NGHolder> h = g[v].suffix.graph;
+        if (!h || tbi.isInETable(v)) {
+            continue;
+        }
+        assert(!g[v].suffix.haig);
+
+        // Leave acyclics out for the moment.
+        if (isAcyclic(*h)) {
+            continue;
+        }
+
+        // Small-ish suffixes only.
+        if (num_vertices(*h) > 32) {
+            continue;
+        }
+
+        suffixes.insert(g[v].suffix, v);
+    }
+
+    deque<SuffixBouquet> suff_groups;
+    chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+    DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+                 suff_groups.size());
+    suffixes.clear();
+
+    for (auto &group : suff_groups) {
+        mergeSuffixes(tbi, group, false);
+    }
+}
+
+static
+void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
+    auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
+    outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
+                   end(outfixes));
+}
+
+static
+void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
+    assert(!winner.is_dead());
+
+    winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
+    winner.minWidth = min(winner.minWidth, victim.minWidth);
+    winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
+    winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
+    mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
+
+    // This outfix can be ignored in small block mode if both were. The dedupe
+    // layer at runtime will protect us from extra matches if only one was in
+    // the small block matcher.
+    winner.in_sbmatcher &= victim.in_sbmatcher;
+}
+
+static
+map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
+                                            const vector<NGHolder *> &nfas) {
+    map<NGHolder *, NGHolder *> merged;
+
+    vector<NGHolder *> batch;
+    for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
+        batch.push_back(*it);
+        assert((*it)->kind == NFA_OUTFIX);
+        if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
+            auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
+            insert(&merged, batch_merged);
+            batch.clear();
+        }
+    }
+
+    return merged;
+}
+
+static
+void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
+    DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
+    if (nfas.size() < 2) {
+        return;
+    }
+
+    vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+    map<NGHolder *, size_t> nfa_mapping;
+    for (size_t i = 0; i < outfixes.size(); i++) {
+        auto *holder = outfixes[i].holder();
+        if (holder) {
+            nfa_mapping[holder] = i;
+        }
+    }
+
+    map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
+    if (merged.empty()) {
+        return;
+    }
+
+    DEBUG_PRINTF("%zu nfas merged\n", merged.size());
+
+    // Update the outfix info for merged holders.
+    for (const auto &m : merged) {
+        OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
+        OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
+        mergeOutfixInfo(winner, victim);
+        victim.clear();
+    }
+
+    removeDeadOutfixes(outfixes);
+}
+
+namespace {
+struct MergeMcClellan {
+    MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
+        : rm(rm_in), grey(grey_in) {}
+
+    unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
+        assert(d1 && d2);
+        return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
+    }
+
+private:
+    const ReportManager &rm;
+    const Grey &grey;
+};
+
+struct MergeHaig {
+    explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
+
+    unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
+                                       const raw_som_dfa *d2) const {
+        assert(d1 && d2);
+        return attemptToMergeHaig({d1, d2}, limit);
+    }
+
+private:
+    const u32 limit; //!< state limit for merged result.
+};
+}
+
+/**
+ * Generic pairwise merge algorithm that can be used for either McClellan
+ * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
+ * operation to a merge functor, which allows the caller to set some policy
+ * (state limits, etc).
+ *
+ * This is currently astonishingly simple and just considers every pair of
+ * DFAs, slow and steady. We may wish to actually apply a merge ordering
+ * strategy in the future.
+ */
+template<class RawDfa, class MergeFunctor>
+static
+void pairwiseDfaMerge(vector<RawDfa *> &dfas,
+                      unordered_map<RawDfa *, size_t> &dfa_mapping,
+                      vector<OutfixInfo> &outfixes,
+                      MergeFunctor merge_func) {
+    DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
+
+    for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
+        if (!*it) {
+            continue;
+        }
+        for (auto jt = next(it); jt != ite; ++jt) {
+            if (!*jt) {
+                continue;
+            }
+
+            DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
+            unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
+            if (!rdfa) {
+                continue; // Merge failed.
+            }
+
+            DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
+            OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
+            OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
+            assert(!winner.is_dead() && !victim.is_dead());
+
+            RawDfa *dfa_ptr = rdfa.get();
+            dfa_mapping[dfa_ptr] = dfa_mapping[*it];
+            dfa_mapping.erase(*it);
+            winner.proto = move(rdfa);
+
+            mergeOutfixInfo(winner, victim);
+
+            victim.clear();
+            *jt = nullptr; // to be deleted.
+            *it = dfa_ptr;
+        }
+    }
+}
+
+template<class RawDfa, class MergeFunctor>
+static
+void chunkedDfaMerge(vector<RawDfa *> &dfas,
+                     unordered_map<RawDfa *, size_t> &dfa_mapping,
+                     vector<OutfixInfo> &outfixes,
+                     MergeFunctor merge_func) {
+    DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
+
+    vector<RawDfa *> out_dfas;
+    vector<RawDfa *> chunk;
+    for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
+        chunk.push_back(*it);
+        if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
+            pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
+            out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
+            chunk.clear();
+        }
+    }
+
+    // Remove null (merged) DFAs and update vector for subsequent use.
+    out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
+                   out_dfas.end());
+    dfas.swap(out_dfas);
+    DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
+}
+
+static
+void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
+    DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
+    if (dfas.size() < 2) {
+        return;
+    }
+
+    vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+    /* key is index into outfix array as iterators, etc may be invalidated by
+     * element addition. */
+    unordered_map<raw_dfa *, size_t> dfa_mapping;
+    for (size_t i = 0; i < outfixes.size(); i++) {
+        auto *rdfa = outfixes[i].rdfa();
+        if (rdfa) {
+            dfa_mapping[rdfa] = i;
+        }
+    }
+
+    chunkedDfaMerge(dfas, dfa_mapping, outfixes,
+                    MergeMcClellan(tbi.rm, tbi.cc.grey));
+    removeDeadOutfixes(outfixes);
+}
+
+static
+void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
+                      const Grey &grey) {
+    if (!grey.roseMcClellanOutfix) {
+        return;
+    }
+
+    DEBUG_PRINTF("merge combo\n");
+
+    bool seen_dfa = false;
+    u32 nfa_count = 0;
+    for (const auto &outfix : tbi.outfixes) {
+        if (outfix.holder()) {
+            DEBUG_PRINTF("nfa\n");
+            nfa_count++;
+        } else if (outfix.rdfa()) {
+            DEBUG_PRINTF("dfa\n");
+            seen_dfa = true;
+        }
+    }
+
+    DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
+                  (int)seen_dfa);
+    if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
+        DEBUG_PRINTF("no combo merges possible\n");
+        return;
+    }
+
+    /* key is index into outfix array as iterators, etc may be invalidated by
+     * element addition. */
+    size_t new_dfas = 0;
+    unordered_map<raw_dfa *, size_t> dfa_mapping;
+    vector<raw_dfa *> dfas;
+
+    for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
+        auto &outfix = *it;
+        assert(!outfix.is_dead());
+
+        if (outfix.rdfa()) {
+            auto *rdfa = outfix.rdfa();
+            dfas.push_back(rdfa);
+            dfa_mapping[rdfa] = it - tbi.outfixes.begin();
+            continue;
+        }
+
+        if (!outfix.holder()) {
+            continue;
+        }
+
+        NGHolder *h = outfix.holder();
+        assert(h->kind == NFA_OUTFIX);
+        auto rdfa = buildMcClellan(*h, &rm, grey);
+        if (rdfa) {
+            // Transform this outfix into a DFA and add it to the merge set.
+            dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
+            dfas.push_back(rdfa.get());
+            outfix.proto = move(rdfa);
+            new_dfas++;
+        }
+    }
+
+    DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
+
+    if (!new_dfas) {
+        /* assumes normal dfas have already been fully merged */
+        return;
+    }
+
+    chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
+                    MergeMcClellan(tbi.rm, tbi.cc.grey));
+    removeDeadOutfixes(tbi.outfixes);
+}
+
+static
+void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
+                      u32 limit) {
+    if (dfas.size() < 2) {
+        return;
+    }
+
+    vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+    unordered_map<raw_som_dfa *, size_t> dfa_mapping;
+    for (size_t i = 0; i < outfixes.size(); i++) {
+        auto *haig = outfixes[i].haig();
+        if (haig) {
+            dfa_mapping[haig] = i;
+        }
+    }
+
+    chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
+    removeDeadOutfixes(outfixes);
+}
+
+/**
+ * This pass attempts to merge outfix engines together. At this point in time,
+ * the engine type (NFA, DFA, Haig) has already been decided for each outfix
+ * and outfixes can only merged with others of their same type. NFAs are merged
+ * in a priority order based on common prefix length. The other types are
+ * merged blindly. Engines are merged to the extent that they can still be
+ * implemented efficiently.
+ */
+void mergeOutfixes(RoseBuildImpl &tbi) {
+    if (!tbi.cc.grey.mergeOutfixes) {
+        return;
+    }
+
+    vector<NGHolder *> nfas;
+    vector<raw_dfa *> dfas;
+    vector<raw_som_dfa *> som_dfas;
+
+    for (auto &outfix : tbi.outfixes) {
+        if (outfix.rdfa()) {
+            dfas.push_back(outfix.rdfa());
+        } else if (outfix.holder()) {
+            nfas.push_back(outfix.holder());
+        } else if (outfix.haig()) {
+            som_dfas.push_back(outfix.haig());
+        }
+    }
+
+    DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
+                 dfas.size(), nfas.size());
+
+    mergeOutfixNfas(tbi, nfas);
+    mergeOutfixDfas(tbi, dfas);
+    mergeOutfixHaigs(tbi, som_dfas, 255);
+    mergeOutfixHaigs(tbi, som_dfas, 8192);
+    mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
+                          const RoseBuildImpl &tbi,
+                          RoseVertex tv) {
+    CharReach accept_cr;
+    DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
+
+    const RoseGraph &g = tbi.g;
+
+    /* TODO: inspect further back in the pattern */
+    for (u32 lit_id : g[tv].literals) {
+        const rose_literal_id &lit = tbi.literals.at(lit_id);
+        if (lit.delay) {
+            return 0; /* TODO: better */
+        }
+        if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
+            return 0;
+        }
+        assert(!lit.s.empty());
+        accept_cr |= *lit.s.rbegin();
+    }
+
+    DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+    if ((accept_cr & cr).any()) {
+        DEBUG_PRINTF("no squash\n");
+        return 0; /* the accept byte doesn't always kill the puffette. TODO:
+                   * maybe if we look further back we could find something that
+                   * would kill the puffette... */
+    }
+
+    DEBUG_PRINTF("allowed to squash %u\n", min_width);
+    return min_width;
+}
+
+void mergePuffixes(RoseBuildImpl &tbi) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!tbi.cc.grey.mergeSuffixes) {
+        return;
+    }
+
+    RoseGraph &g = tbi.g;
+
+    for (auto v : vertices_range(g)) {
+        shared_ptr<NGHolder> h = g[v].suffix.graph;
+        if (!h) {
+            continue;
+        }
+        assert(!g[v].suffix.haig);
+        assert(!g[v].eod_accept);
+
+        assert(onlyOneTop(*h)); /* we should not have merged yet */
+        bool fixed_depth = g[v].min_offset == g[v].max_offset;
+
+        if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
+            continue;
+        }
+
+        PureRepeat repeat;
+        if (!isPureRepeat(*h, repeat)) {
+            assert(0);
+            continue;
+        }
+
+        if (repeat.bounds.min == depth(0)) {
+            assert(0); // No vacuous puffs allowed.
+            continue;
+        }
+
+        assert(repeat.bounds.min.is_finite() &&
+               repeat.bounds.max.is_reachable());
+        assert(repeat.bounds.max == repeat.bounds.min ||
+               repeat.bounds.max.is_infinite());
+
+        const bool unbounded = repeat.bounds.max.is_infinite();
+        const set<ReportID> reports = all_reports(*h);
+        assert(reports.size() == 1);
+        ReportID report = *reports.begin();
+
+        DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
+                     repeat.bounds.str().c_str());
+
+        raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
+
+        u32 queue;
+        u32 event;
+        tbi.addChainTail(rp, &queue, &event);
+        u32 squashDistance =
+            allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
+
+        Report ir = makeMpvTrigger(event, squashDistance);
+        ReportID id = tbi.rm.getInternalId(ir);
+
+        DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
+        g[v].suffix.reset();
+        g[v].reports.insert(id);
+    }
+}
+
+static
+void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
+                        u32 top, const vector<RoseVertex> &verts) {
+    DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
+                  m.get(), verts.size());
+
+    for (auto v : verts) {
+        assert(g[v].suffix.castle);
+        g[v].suffix.castle = m;
+        g[v].suffix.top = top;
+    }
+}
+
+static
+void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
+            const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
+    if (castles.size() <= 1) {
+        return;
+    }
+
+    DEBUG_PRINTF("merging reach %s, %zu elements\n",
+                 describeClass(castles[0]->reach()).c_str(), castles.size());
+
+    CastleProto *m = nullptr;
+
+    for (CastleProto *c : castles) {
+        assert(c->repeats.size() == 1); // Not yet merged.
+        assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
+        if (!m) {
+            m = c;
+            continue;
+        }
+
+        u32 top = m->merge(c->repeats[0]);
+        if (top == CastleProto::max_occupancy) {
+            // No room left to merge into 'm'. This one becomes the new 'm'.
+            DEBUG_PRINTF("next mergee\n");
+            m = c;
+            continue;
+        }
+        updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top,
+                           eng_verts.at(c));
+        DEBUG_PRINTF("added to %p, top %u\n", m, top);
+    }
+}
+
+void mergeCastleSuffixes(RoseBuildImpl &build) {
+    DEBUG_PRINTF("entry\n");
+
+    if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) {
+        return;
+    }
+
+    unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
+    map<CharReach, vector<CastleProto *>> by_reach;
+
+    RoseGraph &g = build.g;
+
+    for (auto v : vertices_range(g)) {
+        if (!g[v].suffix.castle) {
+            continue;
+        }
+
+        CastleProto *c = g[v].suffix.castle.get();
+
+        if (c->repeats.size() != 1) {
+            // This code assumes it's the only place merging is being done.
+            assert(0);
+            continue;
+        }
+
+        if (!contains(eng_verts, c)) {
+            by_reach[c->reach()].push_back(c);
+        }
+        eng_verts[c].push_back(v);
+    }
+
+    for (auto &chunk : by_reach | map_values) {
+        mergeCastleSuffixChunk(g, chunk, eng_verts);
+    }
+}
+
+} // namespace ue2
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz