diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_merge.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | 2818 |
1 files changed, 2818 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp new file mode 100644 index 0000000000..5066dbd578 --- /dev/null +++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp @@ -0,0 +1,2818 @@ +/* + * Copyright (c) 2015-2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose Build: functions for reducing the size of the Rose graph + * through merging. + */ +#include "rose_build_merge.h" + +#include "grey.h" +#include "rose_build.h" +#include "rose_build_impl.h" +#include "rose_build_util.h" +#include "ue2common.h" +#include "nfa/castlecompile.h" +#include "nfa/goughcompile.h" +#include "nfa/limex_limits.h" +#include "nfa/mcclellancompile.h" +#include "nfa/nfa_build_util.h" +#include "nfa/rdfa_merge.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_haig.h" +#include "nfagraph/ng_is_equal.h" +#include "nfagraph/ng_lbr.h" +#include "nfagraph/ng_limex.h" +#include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_puff.h" +#include "nfagraph/ng_redundancy.h" +#include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_reports.h" +#include "nfagraph/ng_stop.h" +#include "nfagraph/ng_uncalc_components.h" +#include "nfagraph/ng_util.h" +#include "nfagraph/ng_width.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" +#include "util/hash.h" +#include "util/insertion_ordered.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/ue2string.h" +#include "util/unordered.h" + +#include <algorithm> +#include <functional> +#include <list> +#include <map> +#include <queue> +#include <set> +#include <string> +#include <vector> +#include <utility> + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; + +namespace ue2 { + +static const size_t NARROW_START_MAX = 10; +static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128; +static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; +static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; +static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; +static const size_t MERGE_GROUP_SIZE_MAX = 200; +static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; + +/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ +static const size_t DFA_CHUNK_SIZE_MAX = 200; + +/** \brief Max DFA states in a merged DFA. */ +static const size_t DFA_MERGE_MAX_STATES = 8000; + +/** \brief In block mode, merge two prefixes even if they don't have identical + * literal sets if they have fewer than this many states and the merged graph + * is also small. */ +static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; + +static +size_t small_merge_max_vertices(const CompileContext &cc) { + return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM + : SMALL_MERGE_MAX_VERTICES_BLOCK; +} + +static +size_t small_rose_threshold(const CompileContext &cc) { + return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM + : SMALL_ROSE_THRESHOLD_BLOCK; +} + +/** + * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that + * reports should not contribute to the hash. + */ +static +size_t hashLeftfix(const left_id &left) { + size_t val = 0; + + if (left.castle()) { + hash_combine(val, left.castle()->reach()); + for (const auto &pr : left.castle()->repeats) { + hash_combine(val, pr.first); // top + hash_combine(val, pr.second.bounds); + } + } else if (left.graph()) { + hash_combine(val, hash_holder(*left.graph())); + } + + return val; +} + +namespace { + +/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */ +struct RoseGroup { + RoseGroup(const RoseBuildImpl &build, RoseVertex v) + : left_hash(hashLeftfix(build.g[v].left)), + lag(build.g[v].left.lag), eod_table(build.isInETable(v)) { + const RoseGraph &g = build.g; + assert(in_degree(v, g) == 1); + RoseVertex u = *inv_adjacent_vertices(v, g).first; + parent = g[u].index; + } + + bool operator<(const RoseGroup &b) const { + const RoseGroup &a = *this; + ORDER_CHECK(parent); + ORDER_CHECK(left_hash); + ORDER_CHECK(lag); + ORDER_CHECK(eod_table); + return false; + } + +private: + /** Parent vertex index. We must use the index, rather than the descriptor, + * for compile determinism. */ + size_t parent; + + /** Quick hash of the leftfix itself. Must be identical for a given pair of + * graphs if is_equal would return true. */ + size_t left_hash; + + /** Leftfix lag value. */ + u32 lag; + + /** True if associated vertex (successor) is in the EOD table. We don't + * allow sharing of leftfix engines between "normal" and EOD operation. */ + bool eod_table; +}; + +/** + * Intended to find graphs that are identical except for their report + * IDs. Relies on vertex and edge indices to pick up graphs that have been + * messily put together in different orderings. Only implemented for castles and + * holders. + */ +static +bool is_equal(const left_id &u_left, ReportID u_report, + const left_id &v_left, ReportID v_report) { + if (u_left.castle() && v_left.castle()) { + return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); + } + + if (!u_left.graph() || !v_left.graph()) { + return false; + } + + return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); +} + +} // namespace + +/** + * This pass performs work similar to \ref dedupeSuffixes - it removes + * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and + * share the same trigger vertex and lag. Leftfixes are first grouped by + * parent role and lag to reduce the number of candidates to be inspected + * for each leftfix. The graphs in each cluster are then compared with each + * other and the graph is updated to only refer to a canonical version of each + * graph. + * + * Note: only roles with a single predecessor vertex are considered for this + * transform - it should probably be generalised to work for roles which share + * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or + * it should be retired entirely. + */ +bool dedupeLeftfixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("deduping leftfixes\n"); + map<RoseGroup, deque<RoseVertex>> roses; + bool work_done = false; + + /* Note: a leftfix's transientness will not be altered by deduping */ + + // Collect leftfixes into groups. + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (!g[v].left) { + continue; + } + const left_id left(g[v].left); + + if (left.haig()) { + /* TODO: allow merging of identical haigs */ + continue; + } + + if (in_degree(v, g) != 1) { + continue; + } + + roses[RoseGroup(tbi, v)].push_back(v); + } + + DEBUG_PRINTF("collected %zu rose groups\n", roses.size()); + + // Walk groups and dedupe the roses therein. + for (deque<RoseVertex> &verts : roses | map_values) { + DEBUG_PRINTF("group has %zu vertices\n", verts.size()); + + unordered_set<left_id> seen; + + for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { + RoseVertex v = *jt; + left_id left(g[v].left); + + // Skip cases we've already handled, and mark as seen otherwise. + if (!seen.insert(left).second) { + continue; + } + + // Scan the rest of the list for dupes. + for (auto kt = std::next(jt); kt != jte; ++kt) { + if (g[v].left == g[*kt].left + || !is_equal(g[v].left, g[v].left.leftfix_report, + g[*kt].left, g[*kt].left.leftfix_report)) { + continue; + } + + // Dupe found. + DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", + g[*kt].index, g[v].index); + assert(g[v].left.lag == g[*kt].left.lag); + g[*kt].left = g[v].left; + work_done = true; + } + } + } + + return work_done; +} + +/** + * \brief Returns a numeric key that can be used to group this suffix with + * others that may be its duplicate. + */ +static +size_t suffix_size_key(const suffix_id &s) { + if (s.graph()) { + return num_vertices(*s.graph()); + } + if (s.castle()) { + return s.castle()->repeats.size(); + } + return 0; +} + +static +bool is_equal(const suffix_id &s1, const suffix_id &s2) { + if (s1.graph() && s2.graph()) { + return is_equal(*s1.graph(), *s2.graph()); + } else if (s1.castle() && s2.castle()) { + return is_equal(*s1.castle(), *s2.castle()); + } + return false; +} + +/** + * This function simply looks for suffix NGHolder graphs which are identical + * and updates the roles in the RoseGraph to refer to only a single copy. This + * obviously has benefits in terms of both performance (as we don't run + * multiple engines doing the same work) and stream state. This function first + * groups all suffixes by number of vertices and report set to restrict the set + * of possible candidates. Each group is then walked to find duplicates using + * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it + * goes. + * + * Note: does not dedupe suffixes of vertices in the EOD table. + */ +void dedupeSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("deduping suffixes\n"); + + unordered_map<suffix_id, set<RoseVertex>> suffix_map; + map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; + + // Collect suffixes into groups. + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix || tbi.isInETable(v)) { + continue; + } + + const suffix_id s(g[v].suffix); + + if (!(s.graph() || s.castle())) { + continue; // e.g. Haig + } + + set<RoseVertex> &verts = suffix_map[s]; + if (verts.empty()) { + part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s); + } + verts.insert(v); + } + + DEBUG_PRINTF("collected %zu groups\n", part.size()); + + for (const auto &cand : part | map_values) { + if (cand.size() <= 1) { + continue; + } + DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size()); + + for (auto jt = cand.begin(); jt != cand.end(); ++jt) { + if (suffix_map[*jt].empty()) { + continue; + } + for (auto kt = next(jt); kt != cand.end(); ++kt) { + if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) { + continue; + } + DEBUG_PRINTF("found dupe\n"); + for (auto v : suffix_map[*kt]) { + RoseVertex dupe = *suffix_map[*jt].begin(); + assert(dupe != v); + g[v].suffix.graph = g[dupe].suffix.graph; + g[v].suffix.castle = g[dupe].suffix.castle; + assert(suffix_id(g[v].suffix) == + suffix_id(g[dupe].suffix)); + suffix_map[*jt].insert(v); + } + suffix_map[*kt].clear(); + } + } + } +} + +namespace { + +/** + * This class stores a mapping from an engine reference (left_id, suffix_id, + * etc) to a list of vertices, and also allows us to iterate over the set of + * engine references in insertion order -- we add to the mapping in vertex + * iteration order, so this allows us to provide a consistent ordering. + */ +template<class EngineRef> +class Bouquet { +private: + list<EngineRef> ordering; // Unique list in insert order. + using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; + BouquetMap bouquet; +public: + void insert(const EngineRef &h, RoseVertex v) { + typename BouquetMap::iterator f = bouquet.find(h); + if (f == bouquet.end()) { + ordering.push_back(h); + bouquet[h].push_back(v); + } else { + f->second.push_back(v); + } + } + + void insert(const EngineRef &h, const deque<RoseVertex> &verts) { + typename BouquetMap::iterator f = bouquet.find(h); + if (f == bouquet.end()) { + ordering.push_back(h); + bouquet.insert(make_pair(h, verts)); + } else { + f->second.insert(f->second.end(), verts.begin(), verts.end()); + } + } + + const deque<RoseVertex> &vertices(const EngineRef &h) const { + typename BouquetMap::const_iterator it = bouquet.find(h); + assert(it != bouquet.end()); // must be present + return it->second; + } + + void erase(const EngineRef &h) { + assert(bouquet.find(h) != bouquet.end()); + bouquet.erase(h); + ordering.remove(h); + } + + /** Remove all the elements in the given iterator range. */ + template <class Iter> + void erase_all(Iter erase_begin, Iter erase_end) { + for (Iter it = erase_begin; it != erase_end; ++it) { + bouquet.erase(*it); + } + + // Use a quick-lookup container so that we only have to traverse the + // 'ordering' list once. + const set<EngineRef> dead(erase_begin, erase_end); + for (iterator it = begin(); it != end(); /* incremented inside */) { + if (contains(dead, *it)) { + ordering.erase(it++); + } else { + ++it; + } + } + } + + void clear() { + ordering.clear(); + bouquet.clear(); + } + + size_t size() const { return bouquet.size(); } + + // iterate over holders in insert order + typedef typename list<EngineRef>::iterator iterator; + iterator begin() { return ordering.begin(); } + iterator end() { return ordering.end(); } + + // const iterate over holders in insert order + typedef typename list<EngineRef>::const_iterator const_iterator; + const_iterator begin() const { return ordering.begin(); } + const_iterator end() const { return ordering.end(); } +}; + +typedef Bouquet<left_id> LeftfixBouquet; +typedef Bouquet<suffix_id> SuffixBouquet; + +} // namespace + +/** + * Split a \ref Bouquet of some type into several smaller ones. + */ +template <class EngineRef> +static void chunkBouquets(const Bouquet<EngineRef> &in, + deque<Bouquet<EngineRef>> &out, + const size_t chunk_size) { + if (in.size() <= chunk_size) { + out.push_back(in); + return; + } + + out.push_back(Bouquet<EngineRef>()); + for (const auto &engine : in) { + if (out.back().size() >= chunk_size) { + out.push_back(Bouquet<EngineRef>()); + } + out.back().insert(engine, in.vertices(engine)); + } +} + +static +bool stringsCanFinishAtSameSpot(const ue2_literal &u, + ue2_literal::const_iterator v_b, + ue2_literal::const_iterator v_e) { + ue2_literal::const_iterator u_e = u.end(); + ue2_literal::const_iterator u_b = u.begin(); + + while (u_e != u_b && v_e != v_b) { + --u_e; + --v_e; + + if (!overlaps(*u_e, *v_e)) { + return false; + } + } + + return true; +} + +/** + * Check that if after u has been seen, that it is impossible for the arrival of + * v to require the inspection of an engine earlier than u did. + * + * Let delta be the earliest that v can be seen after u (may be zero) + * + * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) + * ==> - ulag <= delta - vlag + * ==> vlag - ulag <= delta + */ +static +bool checkPrefix(const rose_literal_id &ul, const u32 ulag, + const rose_literal_id &vl, const u32 vlag) { + DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, + escapeString(vl.s).c_str(), vlag); + + if (vl.delay || ul.delay) { + /* engine related literals should not be delayed anyway */ + return false; + } + + if (ulag >= vlag) { + assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); + return true; + } + + size_t min_allowed_delta = vlag - ulag; + DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); + + for (size_t i = 0; i < min_allowed_delta; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } + } + + DEBUG_PRINTF("OK\n"); + return true; +} + +static +bool hasSameEngineType(const RoseVertexProps &u_prop, + const RoseVertexProps &v_prop) { + const left_id u_left = u_prop.left; + const left_id v_left = v_prop.left; + + return !u_left.haig() == !v_left.haig() + && !u_left.dfa() == !v_left.dfa() + && !u_left.castle() == !v_left.castle() + && !u_left.graph() == !v_left.graph(); +} + +/** + * Verifies that merging the leftfix of vertices does not cause conflicts due + * to the literals on the right. + * + * The main concern is that the lags of the literals and overlap between them + * allow the engine check offset to potentially regress. + * + * Parameters are vectors of literals + lag pairs. + * + * Note: if more constraints of when the leftfixes were going to be checked + * (mandatory lookarounds passing, offset checks), more merges may be allowed. + */ +static +bool compatibleLiteralsForMerge( + const vector<pair<const rose_literal_id *, u32>> &ulits, + const vector<pair<const rose_literal_id *, u32>> &vlits) { + assert(!ulits.empty()); + assert(!vlits.empty()); + + // We cannot merge engines that prefix literals in different tables. + if (ulits[0].first->table != vlits[0].first->table) { + DEBUG_PRINTF("literals in different tables\n"); + return false; + } + + // We don't handle delayed cases yet. + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + if (ul.delay) { + return false; + } + } + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + if (vl.delay) { + return false; + } + } + + /* An engine requires that all accesses to it are ordered by offsets. (ie, + we can not check an engine's state at offset Y, if we have already + checked its status at offset X and X > Y). If we can not establish that + the literals used for triggering will satisfy this property, then it is + not safe to merge the engine. */ + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + u32 ulag = ue.second; + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + u32 vlag = ve.second; + + if (!checkPrefix(ul, ulag, vl, vlag) + || !checkPrefix(vl, vlag, ul, ulag)) { + DEBUG_PRINTF("prefix check failed\n"); + return false; + } + } + } + + return true; +} + +/** + * True if this graph has few enough accel states to be implemented as an NFA + * with all of those states actually becoming accel schemes. + */ +static +bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { + u32 num = countAccelStates(g, &build.rm, build.cc); + DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", + num_vertices(g), num); + return num <= NFA_MAX_ACCEL_STATES; +} + +/** + * In block mode, we want to be a little more selective -- We will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, + RoseVertex v) { + assert(!build.cc.streaming); + assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u)) { + return true; + } + + const RoseGraph &g = build.g; + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (g[u].literals == g[v].literals) { + return true; + } + + // The rest of this function only deals with the case when both vertices + // have graph leftfixes. + if (!g[u].left.graph || !g[v].left.graph) { + return false; + } + + const size_t u_count = num_vertices(*g[u].left.graph); + const size_t v_count = num_vertices(*g[v].left.graph); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, *g[v].left.graph); + if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + const size_t max_size = u_count + v_count; + const size_t min_size = max(u_count, v_count); + const size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) && + (isAccelerableLeftfix(build, *g[u].left.graph) || + isAccelerableLeftfix(build, *g[v].left.graph))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, + RoseVertex v) { + assert(u != v); + + if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { + return false; + } + + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { + return false; + } + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices */ + if (tbi.isRootSuccessor(u)) { + assert(tbi.isRootSuccessor(v)); + set<RoseVertex> u_preds; + set<RoseVertex> v_preds; + insert(&u_preds, inv_adjacent_vertices(u, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + + u32 ulag = tbi.g[u].left.lag; + vector<pair<const rose_literal_id *, u32>> ulits; + ulits.reserve(tbi.g[u].literals.size()); + for (u32 id : tbi.g[u].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } + + u32 vlag = tbi.g[v].left.lag; + vector<pair<const rose_literal_id *, u32>> vlits; + vlits.reserve(tbi.g[v].literals.size()); + for (u32 id : tbi.g[v].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, + tbi.g[v].index); + return true; +} + +/* We cannot merge an engine, if a trigger literal and a post literal overlap + * in such a way that engine status needs to be check at a point before the + * engine's current location. + * + * i.e., for a trigger literal u and a pos literal v, + * where delta is the earliest v can appear after t, + * we require that v_loc - v_lag >= u_loc + * ==> u_loc + delta - v_lag >= u_loc + * ==> delta >= v_lag + * + */ +static +bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, + u32 vlag) { + DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), + escapeString(vl.s).c_str(), vlag); + + for (size_t i = 0; i < vlag; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } + } + + DEBUG_PRINTF("OK\n"); + return true; +} + +template<typename VertexCont> +static never_inline +bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, + const VertexCont &v2) { + flat_set<RoseVertex> preds; + for (auto v : v1) { + insert(&preds, inv_adjacent_vertices(v, build.g)); + } + + flat_set<u32> pred_lits; + + /* No need to examine delays of a common pred - as it must already have + * survived the delay checks. + * + * This is important when the pred is in the anchored table as + * the literal is no longer available. */ + flat_set<RoseVertex> known_good_preds; + for (auto v : v2) { + insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); + } + + for (auto u : preds) { + if (!contains(known_good_preds, u)) { + insert(&pred_lits, build.g[u].literals); + } + } + + vector<const rose_literal_id *> pred_rose_lits; + pred_rose_lits.reserve(pred_lits.size()); + for (const auto &p : pred_lits) { + pred_rose_lits.push_back(&build.literals.at(p)); + } + + for (auto v : v2) { + u32 vlag = build.g[v].left.lag; + if (!vlag) { + continue; + } + + for (const u32 vlit : build.g[v].literals) { + const rose_literal_id &vl = build.literals.at(vlit); + assert(!vl.delay); // this should never have got this far? + for (const auto &ul : pred_rose_lits) { + assert(!ul->delay); // this should never have got this far? + + if (!checkPredDelay(*ul, vl, vlag)) { + return false; + } + } + } + } + + return true; +} + +static +bool mergeableRoseVertices(const RoseBuildImpl &tbi, + const deque<RoseVertex> &verts1, + const deque<RoseVertex> &verts2) { + assert(!verts1.empty()); + assert(!verts2.empty()); + + RoseVertex u_front = verts1.front(); + RoseVertex v_front = verts2.front(); + + /* all vertices must have the same engine type: assume all verts in each + * group are already of the same type */ + if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { + return false; + } + + bool is_prefix = tbi.isRootSuccessor(u_front); + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices: similarly, assume the grouped vertices are compatible */ + if (is_prefix) { + assert(tbi.isRootSuccessor(v_front)); + set<RoseVertex> u_preds; + set<RoseVertex> v_preds; + insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : verts1) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { + return false; + } + + u32 ulag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : verts2) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; + } + + u32 vlag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + // Check preds are compatible as well. + if (!checkPredDelays(tbi, verts1, verts2) + || !checkPredDelays(tbi, verts2, verts1)) { + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1, + const set<RoseVertex> &v2) { + const deque<RoseVertex> vv1(v1.begin(), v1.end()); + const deque<RoseVertex> vv2(v2.begin(), v2.end()); + return mergeableRoseVertices(tbi, vv1, vv2); +} + +/** \brief Priority queue element for Rose merges. */ +namespace { +struct RoseMergeCandidate { + RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in, + u32 tb) + : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0), + tie_breaker(tb) { + if (r1.graph() && r2.graph()) { + const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph(); + /* som_none as haigs don't merge and just a guiding heuristic */ + CharReach stop1 = findStopAlphabet(h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(h2, SOM_NONE); + stopxor = (stop1 ^ stop2).count(); + + // We use the number of vertices as an approximation of the state + // count here, as this is just feeding a comparison. + u32 vertex_count = num_vertices(h1) + num_vertices(h2); + states = vertex_count - min(vertex_count, cpl); + } else if (r1.castle() && r2.castle()) { + // FIXME + } + } + + bool operator<(const RoseMergeCandidate &a) const { + if (stopxor != a.stopxor) { + return stopxor > a.stopxor; + } + if (cpl != a.cpl) { + return cpl < a.cpl; + } + if (states != a.states) { + return states > a.states; + } + return tie_breaker < a.tie_breaker; + } + + left_id r1; + left_id r2; + u32 stopxor; + u32 cpl; //!< common prefix length + u32 states; + u32 tie_breaker; //!< determinism +}; +} + +static +bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, + const vector<RoseVertex> &verts1, + const vector<RoseVertex> &verts2) { + assert(!verts1.empty() && !verts2.empty()); + + DEBUG_PRINTF("merging pair of leftfixes:\n"); + DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), + as_string_list(all_tops(r1)).c_str()); + DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), + as_string_list(all_tops(r2)).c_str()); + + RoseGraph &g = build.g; + + if (r1.graph()) { + assert(r2.graph()); + assert(r1.graph()->kind == r2.graph()->kind); + if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { + DEBUG_PRINTF("nfa merge failed\n"); + return false; + } + + /* The graph in r1 has been merged into the graph in r2. Update r1's + * vertices with the new graph ptr. mergeNfaPair() does not alter the + * tops from the input graph so no need to update top values. + * + * It is the responsibility of the caller to ensure that the tops are + * distinct when they have different trigger conditions. + * [Note: mergeLeftfixesVariableLag() should have a common parent set] + */ + shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; + for (RoseVertex v : verts1) { + g[v].left.graph = h; + } + + return true; + } else if (r1.castle()) { + assert(r2.castle()); + assert(build.cc.grey.allowCastle); + + map<u32, u32> top_map; + if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { + DEBUG_PRINTF("castle merge failed\n"); + return false; + } + + // The castle in r1 has been merged into the castle in r2, with tops + // remapped as per top_map. + const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle; + for (RoseVertex v : verts1) { + g[v].left.castle = c; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + return true; + } + + assert(0); + return false; +} + +/** + * Checks that there is no problem due to the involved vertices if we merge two + * leftfix engines. + * + * This functions takes the vertices on the right of the two engines. + * + * Unlike mergeableRoseVertices(), this does not: + * - check that engines themselves can be merged + * - use heuristics to find out if merging the engines is wise. + */ +static +bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &targets_1, + const vector<RoseVertex> &targets_2) { + assert(!targets_1.empty()); + assert(!targets_2.empty()); + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : targets_1) { + u32 ulag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + ulits.emplace_back(&build.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : targets_2) { + u32 vlag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + vlits.emplace_back(&build.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + // Check preds are compatible as well. + if (!checkPredDelays(build, targets_1, targets_2) + || !checkPredDelays(build, targets_2, targets_1)) { + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +/** + * In block mode, we want to be a little more selective -- we will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool goodBlockModeMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &u_verts, const left_id &u_eng, + const vector<RoseVertex> &v_verts, + const left_id &v_eng) { + assert(!build.cc.streaming); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u_verts.front())) { + return true; + } + + const RoseGraph &g = build.g; + + flat_set<u32> u_lits; + for (RoseVertex u : u_verts) { + insert(&u_lits, g[u].literals); + } + + flat_set<u32> v_lits; + for (RoseVertex v : v_verts) { + insert(&v_lits, g[v].literals); + } + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (u_lits == v_lits) { + return true; + } + + // The rest of this function only deals with the case when have graph + // leftfixes. + if (!u_eng.graph()) { + return false; + } + assert(v_eng.graph()); + const NGHolder &ug = *u_eng.graph(); + const NGHolder &vg = *v_eng.graph(); + + size_t u_count = num_vertices(ug); + size_t v_count = num_vertices(vg); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, vg); + if (!mergeNfaPair(ug, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + size_t max_size = u_count + v_count; + size_t min_size = max(u_count, v_count); + size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) + && (isAccelerableLeftfix(build, ug) + || isAccelerableLeftfix(build, vg))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +/** + * Merge r1 into r2 if safe and appropriate. Returns true on success. + */ +static +bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, + const vector<RoseVertex> &targets_1, + left_id &r2, + const vector<RoseVertex> &targets_2) { + if (targets_1.empty() || targets_2.empty()) { + /* one of the engines has already been merged away */ + return false; + } + + assert(!r1.graph() == !r2.graph()); + if (r1.graph()) { + NGHolder *h1 = r1.graph(); + NGHolder *h2 = r2.graph(); + CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); + CharReach stopboth = stop1 & stop2; + DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), + stop2.count(), stopboth.count()); + if (stopboth.count() < 10 + && (stop1.count() > 10 || stop2.count() > 10)) { + DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); + return false; + } + size_t maxstop = max(stop1.count(), stop2.count()); + if (maxstop > 200 && stopboth.count() < 200) { + DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); + return false; + } + } + + /* Rechecking that the targets are compatible, as we may have already + * merged new states into r1 or r2 and we need to verify that this + * candidate is still ok. */ + if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { + return false; + } + + if (!build.cc.streaming + && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { + return false; + } + + return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); +} + +static +bool nfaHasNarrowStart(const NGHolder &g) { + if (out_degree(g.startDs, g) > 1) { + return false; // unanchored + } + + CharReach cr; + + for (auto v : adjacent_vertices_range(g.start, g)) { + if (v == g.startDs) { + continue; + } + cr |= g[v].char_reach; + } + return cr.count() <= NARROW_START_MAX; +} + +static +bool nfaHasFiniteMaxWidth(const NGHolder &g) { + return findMaxWidth(g).is_finite(); +} + +static +bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { + if (!proper_out_degree(h.startDs, h)) { + return false; + } + + assert(!is_triggered(h)); + + NGHolder h_temp; + cloneHolder(h_temp, h); + + vector<BoundedRepeatData> repeats; + bool suitable_for_sds_reforming = false; + const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */ + const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */ + const bool simple_model_selection = true; // FIRST is considered simple + analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true, + simple_model_selection, grey, &suitable_for_sds_reforming); + + return suitable_for_sds_reforming; +} + +static +u32 commonPrefixLength(left_id &r1, left_id &r2) { + if (r1.graph() && r2.graph()) { + return commonPrefixLength(*r1.graph(), *r2.graph()); + } else if (r1.castle() && r2.castle()) { + return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); + } + return 0; +} + +namespace { +struct MergeKey { + MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : + parents(std::move(parents_in)) { + + // We want to distinguish prefixes (but not infixes) on whether they + // have a narrow start or max width. + if (left.graph() && !is_triggered(*left.graph())) { + const NGHolder &h = *left.graph(); + narrowStart = nfaHasNarrowStart(h); + hasMaxWidth = nfaHasFiniteMaxWidth(h); + } else { + narrowStart = false; + hasMaxWidth = false; + } + + if (left.castle()) { + /* castles should have a non-empty reach */ + assert(left.castle()->reach().any()); + castle_cr = left.castle()->reach(); + } else { + assert(left.graph()); + } + } + + bool operator<(const MergeKey &b) const { + const MergeKey &a = *this; + ORDER_CHECK(narrowStart); + ORDER_CHECK(hasMaxWidth); + ORDER_CHECK(castle_cr); + ORDER_CHECK(parents); + return false; + } + + // NOTE: these two bool discriminators are only used for prefixes, not + // infixes. + bool narrowStart; + bool hasMaxWidth; + CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ + + flat_set<RoseVertex> parents; +}; +} + +template <typename T> +static +void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { + if (in.size() <= chunk_size) { + out->push_back(std::move(in)); + return; + } + + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + for (const auto &t : in) { + if (out->back().size() >= chunk_size) { + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + } + out->back().push_back(std::move(t)); + } +} + +static +insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + for (auto v : vertices_range(g)) { + const auto &left = g[v].left; + if (!left) { + continue; + } + assert(contains(all_reports(left), left.leftfix_report)); + eng_verts[left].push_back(v); + } + + return eng_verts; +} + +/** + * This pass attempts to merge prefix/infix engines which share a common set of + * parent vertices. + * + * Engines are greedily merged pairwise by this process based on a priority + * queue keyed off the common prefix length. + * + * Engines are not merged if the lags are not compatible or if it would damage + * the stop alphabet. + * + * Infixes: + * - It is expected that when this is run all infixes are still at the single + * top stage as we have not yet merged unrelated infixes together. After + * execution, castles may have multiple (but equivalent) tops. + * + * Prefixes: + * - transient prefixes are not considered. + * - with a max width or a narrow start are kept segregated by + * this phase and can only be merged with similar infixes. + * - in block mode, merges are only performed if literal sets are the same. + * - merges are not considered in cases where dot star start state will be + * reformed to optimise a leading repeat. + */ +void mergeLeftfixesVariableLag(RoseBuildImpl &build) { + if (!build.cc.grey.mergeRose) { + return; + } + assert(!hasOrphanedTops(build)); + + RoseGraph &g = build.g; + + DEBUG_PRINTF("-----\n"); + DEBUG_PRINTF("entry\n"); + DEBUG_PRINTF("-----\n"); + + auto eng_verts = get_eng_verts(g); + + map<MergeKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; + // Only non-transient for the moment. + if (contains(build.transient, left)) { + continue; + } + + // No forced McClellan or Haig infix merges. + if (left.dfa() || left.haig()) { + continue; + } + assert(left.graph() || left.castle()); + + if (left.graph()) { + const NGHolder &h = *left.graph(); + /* we should not have merged yet */ + assert(!is_triggered(h) || onlyOneTop(h)); + + if (hasReformedStartDotStar(h, build.cc.grey)) { + continue; // preserve the optimisation of the leading repeat + } + } else { + assert(left.castle()); + + if (!build.cc.grey.allowCastle) { + DEBUG_PRINTF("castle merging disallowed by greybox\n"); + continue; + } + } + + // We collapse the anchored root into the root vertex when calculating + // parents, so that we can merge differently-anchored prefix roses + // together. (Prompted by UE-2100) + + flat_set<RoseVertex> parents; + for (RoseVertex v : verts) { + insert(&parents, inv_adjacent_vertices_range(v, g)); + } + + if (contains(parents, build.anchored_root)) { + parents.erase(build.anchored_root); + parents.insert(build.root); + } + + assert(!parents.empty()); + +#ifndef _WIN32 + engine_groups[MergeKey(left, parents)].push_back(left); +#else + // On windows, when passing MergeKey object into map 'engine_groups', + // it will not be copied, but will be freed along with + // engine_groups.clear(). + // If we construct MergeKey object on the stack, it will be destructed + // on its life cycle ending, then on engine_groups.clear(), which + // will cause is_block_type_valid() assertion error in MergeKey + // destructor. + MergeKey *mk = new MergeKey(left, parents); + engine_groups[*mk].push_back(left); +#endif + } + + vector<vector<left_id>> chunks; + for (auto &raw_group : engine_groups | map_values) { + chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); + } + engine_groups.clear(); + + DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); + + for (auto &roses : chunks) { + if (roses.size() < 2) { + continue; + } + // All pairs on the prio queue. + u32 tie_breaker = 0; + priority_queue<RoseMergeCandidate> pq; + for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { + left_id r1 = *it; + const vector<RoseVertex> &targets_1 = eng_verts[r1]; + + for (auto jt = next(it); jt != ite; ++jt) { + left_id r2 = *jt; + + /* we should have already split on engine types and reach */ + assert(!r1.castle() == !r2.castle()); + assert(!r1.graph() == !r2.graph()); + assert(!r1.castle() + || r1.castle()->reach() == r2.castle()->reach()); + + const vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (!checkVerticesOkForLeftfixMerge(build, targets_1, + targets_2)) { + continue; // No point queueing unmergeable cases. + } + + u32 cpl = commonPrefixLength(r1, r2); + pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); + } + } + + DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); + + while (!pq.empty()) { + left_id r1 = pq.top().r1; + left_id r2 = pq.top().r2; + DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", + r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); + pq.pop(); + vector<RoseVertex> &targets_1 = eng_verts[r1]; + vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, + targets_2)) { + insert(&targets_2, targets_2.end(), targets_1); + targets_1.clear(); + } + } + } + + DEBUG_PRINTF("-----\n"); + DEBUG_PRINTF("exit\n"); + DEBUG_PRINTF("-----\n"); + assert(!hasOrphanedTops(build)); +} + +namespace { + +/** + * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. + */ +struct DedupeLeftKey { + DedupeLeftKey(const RoseBuildImpl &build, + flat_set<pair<size_t, u32>> preds_in, const left_id &left) + : left_hash(hashLeftfix(left)), preds(move(preds_in)), + transient(contains(build.transient, left)) { + } + + bool operator<(const DedupeLeftKey &b) const { + return tie(left_hash, preds, transient) + < tie(b.left_hash, b.preds, b.transient); + } + +private: + /** Quick hash of the leftfix itself. Must be identical for a given pair of + * graphs if is_equal would return true. */ + size_t left_hash; + + /** For each in-edge, the pair of (parent index, edge top). */ + flat_set<pair<size_t, u32>> preds; + + /** We don't want to combine transient with non-transient. */ + bool transient; +}; + +} // namespace + +static +flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { + flat_set<pair<size_t, u32>> preds; + for (const auto &e : in_edges_range(v, g)) { + preds.emplace(g[source(e, g)].index, g[e].rose_top); + } + return preds; +} + +/** + * This is a generalisation of \ref dedupeLeftfixes which relaxes two + * restrictions: multiple predecessor roles are allowed and the delay used by + * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes, + * the leftfixes' successor vertices are first grouped to reduce the number of + * potential candidates - the grouping in this case is by the set of + * predecessor roles with their associated top events. For the dedupe to be + * possible, it is required that: + * + * 1. the nfa graphs with respect to the relevant reports are identical + * 2. the nfa graphs are triggered by the same roles with same events (ensured + * by the initial grouping pass) + * 3. all the successor roles of either graph can inspect the combined leftfix + * without advancing the state of the leftfix past the point that another + * successor may want to inspect it; the overlap relationships between the + * involved literals are examined to ensure that this property holds. + * + * Note: this is unable to dedupe when delayed literals are involved unlike + * dedupeLeftfixes. + */ +void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { + DEBUG_PRINTF("entry\n"); + + RoseGraph &g = build.g; + auto eng_verts = get_eng_verts(g); + + map<DedupeLeftKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; + + /* There should only be one report on an engine as no merges have + * happened yet. (aside from eod prefixes) */ + if (all_reports(left).size() != 1) { + assert(any_of_in(adjacent_vertices_range(verts.front(), g), + [&](RoseVertex w) { return g[w].eod_accept; })); + continue; + } + + if (left.haig()) { + /* TODO: allow deduping of identical haigs */ + continue; + } + + if (left.graph()) { + /* we should not have merged yet */ + assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); + } + + auto preds = get_pred_tops(verts.front(), g); + for (RoseVertex v : verts) { + if (preds != get_pred_tops(v, g)) { + DEBUG_PRINTF("distinct pred sets\n"); + continue; + } + } + engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); + } + + /* We don't bother chunking as we expect deduping to be successful if the + * hashes match */ + + for (auto &group : engine_groups | map_values) { + DEBUG_PRINTF("group of %zu roses\n", group.size()); + + if (group.size() < 2) { + continue; + } + + for (auto it = group.begin(); it != group.end(); ++it) { + left_id r1 = *it; + vector<RoseVertex> &verts1 = eng_verts[r1]; + assert(!verts1.empty()); /* cleared engines should be behind us */ + + assert(all_reports(r1).size() == 1); + ReportID r1_report = *all_reports(r1).begin(); + + for (auto jt = next(it); jt != group.end(); ++jt) { + left_id r2 = *jt; + vector<RoseVertex> &verts2 = eng_verts[r2]; + assert(!verts2.empty()); + assert(all_reports(r2).size() == 1); + ReportID r2_report = *all_reports(r2).begin(); + + if (!is_equal(r1, r1_report, r2, r2_report)) { + continue; + } + + if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { + continue; + } + + DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); + + // Replace r1 with r2. + + for (auto v : verts1) { + DEBUG_PRINTF("replacing report %u with %u on %zu\n", + r2_report, r1_report, g[v].index); + u32 orig_lag = g[v].left.lag; + g[v].left = g[verts2.front()].left; + g[v].left.lag = orig_lag; + } + + insert(&verts2, verts2.end(), verts1); + verts1.clear(); + + /* remove stale entry from transient set, if present */ + build.transient.erase(r1); + + break; + } + } + } +} + +static +u32 findUnusedTop(const flat_set<u32> &tops) { + u32 i = 0; + while (contains(tops, i)) { + i++; + } + return i; +} + +// Replace top 't' on edges with new top 'u'. +static +void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; + } + flat_set<u32> new_tops; + for (u32 t : h[e].tops) { + DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); + new_tops.insert(top_mapping.at(t)); + } + h[e].tops = std::move(new_tops); + } +} + +static +bool setDistinctTops(NGHolder &h1, const NGHolder &h2, + map<u32, u32> &top_mapping) { + flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); + + DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), + tops2.size()); + + // If our tops don't intersect, we're OK to merge with no changes. + if (!has_intersection(tops1, tops2)) { + DEBUG_PRINTF("tops don't intersect\n"); + return true; + } + + // Otherwise, we have to renumber the tops in h1 so that they don't overlap + // with the tops in h2. + top_mapping.clear(); + for (u32 t : tops1) { + u32 u = findUnusedTop(tops2); + DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u); + top_mapping.insert(make_pair(t, u)); + assert(!contains(tops2, u)); + tops2.insert(u); + } + + replaceTops(h1, top_mapping); + return true; +} + +bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, + const deque<RoseVertex> &verts1) { + map<u32, u32> top_mapping; + if (!setDistinctTops(h1, h2, top_mapping)) { + return false; + } + + if (top_mapping.empty()) { + return true; // No remapping necessary. + } + + for (auto v : verts1) { + DEBUG_PRINTF("vertex %zu\n", g[v].index); + assert(!g[v].left.haig); + assert(!g[v].left.dfa); + for (const auto &e : in_edges_range(v, g)) { + u32 t = g[e].rose_top; + DEBUG_PRINTF("t=%u\n", t); + assert(contains(top_mapping, t)); + g[e].rose_top = top_mapping[t]; + DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", + g[source(e, g)].index, g[target(e, g)].index, t, + top_mapping[t]); + } + } + + return true; +} + +static +bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, + const deque<RoseVertex> &verts1) { + map<u32, u32> top_mapping; + if (!setDistinctTops(h1, h2, top_mapping)) { + return false; + } + + if (top_mapping.empty()) { + return true; // No remapping necessary. + } + + for (auto v : verts1) { + DEBUG_PRINTF("vertex %zu\n", g[v].index); + u32 t = g[v].suffix.top; + assert(contains(top_mapping, t)); + g[v].suffix.top = top_mapping[t]; + } + + return true; +} + +/** \brief Estimate the number of accel states in the given graph when built as + * an NFA. + * + * (The easiest way to estimate something like this is to actually build it: + * the criteria for NFA acceleration are quite complicated and buried in + * limex_compile.) + */ +static +u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) { + return countAccelStates(h, &tbi.rm, tbi.cc); +} + +static +void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { + RoseGraph &g = tbi.g; + DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size()); + + // We track the number of accelerable states for each graph in a map and + // only recompute them when the graph is modified. + unordered_map<left_id, u32> accel_count; + for (const auto &rose : roses) { + assert(rose.graph()->kind == NFA_INFIX); + accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); + } + + for (auto it = roses.begin(); it != roses.end(); ++it) { + left_id r1 = *it; + const deque<RoseVertex> &verts1 = roses.vertices(r1); + + deque<left_id> merged; + for (auto jt = next(it); jt != roses.end(); ++jt) { + left_id r2 = *jt; + const deque<RoseVertex> &verts2 = roses.vertices(r2); + + DEBUG_PRINTF("consider merging rose %p (%zu verts) " + "with %p (%zu verts)\n", + r1.graph(), verts1.size(), r2.graph(), verts2.size()); + + u32 accel1 = accel_count[r1]; + if (accel1 >= NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("h1 has hit max accel\n"); + break; // next h1 + } + + u32 accel2 = accel_count[r2]; + if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " + "accel2=%u)\n", + accel1, accel2); + continue; // next h2 + } + + if (!mergeableRoseVertices(tbi, verts1, verts2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next h2 + } + + // Attempt to merge h2 into h1. + + NGHolder victim; + cloneHolder(victim, *r2.graph()); + + // Store a copy of the in-edge properties in case we have to roll + // back. + map<RoseEdge, RoseEdgeProps> edge_props; + for (auto v : verts2) { + for (const auto &e : in_edges_range(v, g)) { + edge_props[e] = g[e]; + } + } + + if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) { + DEBUG_PRINTF("can't set distinct tops\n"); + continue; // next h2 + } + + assert(victim.kind == r1.graph()->kind); + assert(!generates_callbacks(*r1.graph())); + if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) { + DEBUG_PRINTF("merge failed\n"); + // Roll back in-edge properties. + for (const auto &m : edge_props) { + g[m.first] = m.second; + } + continue; // next h2 + } + + // Update h2's roses to point to h1 now + shared_ptr<NGHolder> winner = g[verts1.front()].left.graph; + for (auto v : verts2) { + g[v].left.graph = winner; + } + roses.insert(r1, verts2); + + merged.push_back(r2); + + if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) { + DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", + num_vertices(*winner)); + break; // next h1 + } + + // Update h1's accel count estimate. + accel_count[r1] = estimatedAccelStates(tbi, *winner); + } + + DEBUG_PRINTF("%zu roses merged\n", merged.size()); + roses.erase_all(merged.begin(), merged.end()); + } +} + +/** + * This pass attempts to merge prefix/infix engines with a small number of + * vertices together into larger engines. The engines must not be have a + * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines + * that have compatible lag are greedily grouped such that they remain + * accelerable and only have a small number of states. Note: if a role has an + * infix with multiple trigger vertices, the role will be left unchanged by this + * pass and will remain using an unmerged graph. + */ +void mergeSmallLeftfixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) { + return; + } + + RoseGraph &g = tbi.g; + + LeftfixBouquet nfa_leftfixes; + + for (auto v : vertices_range(g)) { + if (!g[v].left) { + continue; + } + + // Handle single-parent infixes only. + if (tbi.isRootSuccessor(v)) { + continue; + } + + left_id left(g[v].left); + + // Only non-transient for the moment. + if (contains(tbi.transient, left)) { + continue; + } + + // No DFAs or Haigs right now. + if (left.dfa() || left.haig()) { + continue; + } + + // Castles are handled by a different pass. + if (left.castle()) { + continue; + } + + assert(left.graph()); + NGHolder &h = *left.graph(); + + /* Ensure that kind on the graph is correct */ + assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); + + if (hasReformedStartDotStar(h, tbi.cc.grey)) { + /* We would lose optimisations of the leading repeat by merging. */ + continue; + } + + // Small roses only. + if (num_vertices(h) > small_rose_threshold(tbi.cc)) { + continue; + } + + nfa_leftfixes.insert(left, v); + } + + deque<LeftfixBouquet> leftfix_groups; + chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX); + nfa_leftfixes.clear(); + DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n", + leftfix_groups.size()); + + for (auto &group : leftfix_groups) { + mergeNfaLeftfixes(tbi, group); + } +} + +static +void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, + insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { + /* caller must have already ensured that candidates have the same reach */ + RoseGraph &g = build.g; + DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); + + for (auto it = cands.begin(); it != cands.end(); ++it) { + left_id &cand_1 = *it; + vector<RoseVertex> &verts_1 = eng_verts[cand_1]; + if (verts_1.empty()) { + continue; + } + + for (auto jt = next(it); jt != cands.end(); ++jt) { + const left_id &cand_2 = *jt; + vector<RoseVertex> &verts_2 = eng_verts[cand_2]; + if (verts_2.empty()) { + continue; + } + + assert(cand_1.castle()->reach() == cand_2.castle()->reach()); + + if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next cand_2 + } + + DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), + cand_1.castle()->repeats.size()); + DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), + cand_2.castle()->repeats.size()); + + map<u32, u32> top_map; + if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { + DEBUG_PRINTF("couldn't merge\n"); + continue; // next cand_2 + } + + // Update castle2's roses to point to castle1 now. + shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; + for (auto v : verts_2) { + assert(g[v].left.castle.get() == cand_2.castle()); + g[v].left.castle = winner; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + + insert(&verts_1, verts_1.end(), verts_2); + verts_2.clear(); + } + } +} + +/** + * Merges castles with the same reach together regardless of where in the rose + * graph they are. Note: there is no requirement for the castles to have common + * parent or target vertices. + * + * There are no heuristics for reducing block mode merges as castle speed + * mainly depends on the reach being scanned. + */ +void mergeCastleLeftfixes(RoseBuildImpl &build) { + DEBUG_PRINTF("entry\n"); + + if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses + || !build.cc.grey.allowCastle) { + return; + } + + RoseGraph &g = build.g; + + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + + for (auto v : vertices_range(g)) { + if (!g[v].left.castle) { + continue; + } + + // Handle infixes only. + if (build.isRootSuccessor(v)) { + continue; + } + + eng_verts[g[v].left].push_back(v); + } + + map<CharReach, vector<left_id>> by_reach; + for (const auto &left : eng_verts | map_keys) { + by_reach[left.castle()->reach()].push_back(left); + } + + vector<vector<left_id>> chunks; + for (auto &raw_group : by_reach | map_values) { + chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); + } + by_reach.clear(); + + DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); + + for (auto &chunk : chunks) { + mergeCastleChunk(build, chunk, eng_verts); + } +} + +static +void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, + const bool acyclic) { + RoseGraph &g = tbi.g; + + DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size()); + + // If this isn't an acyclic case, we track the number of accelerable states + // for each graph in a map and only recompute them when the graph is + // modified. + unordered_map<suffix_id, u32> accel_count; + if (!acyclic) { + for (const auto &suffix : suffixes) { + assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); + accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph()); + } + } + + for (auto it = suffixes.begin(); it != suffixes.end(); ++it) { + suffix_id s1 = *it; + const deque<RoseVertex> &verts1 = suffixes.vertices(s1); + assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); + + // Caller should ensure that we don't propose merges of graphs that are + // already too big. + assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); + + deque<suffix_id> merged; + for (auto jt = next(it); jt != suffixes.end(); ++jt) { + suffix_id s2 = *jt; + const deque<RoseVertex> &verts2 = suffixes.vertices(s2); + assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); + + if (!acyclic) { + u32 accel1 = accel_count[s1]; + if (accel1 >= NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("h1 has hit max accel\n"); + break; // next h1 + } + + u32 accel2 = accel_count[s2]; + if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " + "accel2=%u)\n", + accel1, accel2); + continue; // next h2 + } + } + + // Attempt to merge h2 into h1. + + NGHolder victim; + cloneHolder(victim, *s2.graph()); + + // Store a copy of the suffix tops in case we have to roll back. + map<RoseVertex, u32> old_tops; + for (auto v : verts2) { + old_tops[v] = g[v].suffix.top; + } + + if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) { + DEBUG_PRINTF("can't set distinct tops\n"); + continue; // next h2 + } + + if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) { + DEBUG_PRINTF("merge failed\n"); + // Roll back in-edge properties. + for (const auto &m : old_tops) { + g[m.first].suffix.top = m.second; + } + continue; // next h2 + } + + // Update h2's roses to point to h1 now + shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph; + for (auto v : verts2) { + g[v].suffix.graph = winner; + } + suffixes.insert(s1, verts2); + merged.push_back(s2); + + if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) { + DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", + num_vertices(*s1.graph())); + break; // next h1 + } + + if (!acyclic) { + // Update h1's accel count estimate. + accel_count[s1] = estimatedAccelStates(tbi, *s1.graph()); + } + } + + DEBUG_PRINTF("%zu suffixes merged\n", merged.size()); + suffixes.erase_all(merged.begin(), merged.end()); + } +} + +/** + * This merge pass combines suffixes from unrelated roles into a single + * suffix with multiple top events in order to distinguish the triggers + * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes + * while mergeSmallSuffixes only considers small suffixes. The merges will + * group roles with suffixes in the graph into clusters of at most + * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the + * suffixes and attempting to pairwise merge it with another member. Merges + * will fail if the result is not implementable, requires too many distinct top + * events, or if it losses the ability to be accelerated. The merge will modify + * the existing suffix graph of the one member (g1), the other member updates + * it graph to refer to g1 instead of its previous graph (g2) and use the new + * tops created. Other roles may have been sharing g1 - these are unaffected by + * the change as the existing top events are left untouched. Other roles using + * g2 are also unaffected as g2 will continue to exist until while it has any + * roles triggering it. + * + * Note: suffixes destined for the LBR are not considered for these merges as + * the LBR can only handle a single repeat and this type of repeat is ideally + * handled outside of an NFA or DFA. + */ +void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + SuffixBouquet suffixes; + + RoseGraph &g = tbi.g; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h || tbi.isInETable(v)) { + continue; + } + + assert(!g[v].suffix.haig); + + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { + continue; + } + + if (!isAcyclic(*h)) { + continue; + } + + suffixes.insert(g[v].suffix, v); + } + + deque<SuffixBouquet> suff_groups; + chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); + DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), + suff_groups.size()); + suffixes.clear(); + + for (auto &group : suff_groups) { + mergeSuffixes(tbi, group, true); + } +} + +/** + * This merge pass combines suffixes from unrelated roles into a single + * suffix with multiple top events in order to distinguish the triggers + * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes + * while mergeSmallSuffixes only considers small suffixes. The merges will + * group roles with suffixes in the graph into clusters of at most + * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the + * suffixes and attempting to pairwise merge it with another member. Merges + * will fail if the result is not implementable, requires too many distinct top + * events, or if it losses the ability to be accelerated. The merge will modify + * the existing suffix graph of the one member (g1), the other member updates + * it graph to refer to g1 instead of its previous graph (g2) and use the new + * tops created. Other roles may have been sharing g1 - these are unaffected by + * the change as the existing top events are left untouched. Other roles using + * g2 are also unaffected as g2 will continue to exist until while it has any + * roles triggering it. + * + * Note: suffixes destined for the LBR are not considered for these merges as + * the LBR can only handle a single repeat and this type of repeat is ideally + * handled outside of an NFA or DFA. + */ +void mergeSmallSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + RoseGraph &g = tbi.g; + SuffixBouquet suffixes; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h || tbi.isInETable(v)) { + continue; + } + assert(!g[v].suffix.haig); + + // Leave acyclics out for the moment. + if (isAcyclic(*h)) { + continue; + } + + // Small-ish suffixes only. + if (num_vertices(*h) > 32) { + continue; + } + + suffixes.insert(g[v].suffix, v); + } + + deque<SuffixBouquet> suff_groups; + chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); + DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), + suff_groups.size()); + suffixes.clear(); + + for (auto &group : suff_groups) { + mergeSuffixes(tbi, group, false); + } +} + +static +void removeDeadOutfixes(vector<OutfixInfo> &outfixes) { + auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); }; + outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead), + end(outfixes)); +} + +static +void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { + assert(!winner.is_dead()); + + winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth); + winner.minWidth = min(winner.minWidth, victim.minWidth); + winner.maxWidth = max(winner.maxWidth, victim.maxWidth); + winner.maxOffset = max(winner.maxOffset, victim.maxOffset); + mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info); + + // This outfix can be ignored in small block mode if both were. The dedupe + // layer at runtime will protect us from extra matches if only one was in + // the small block matcher. + winner.in_sbmatcher &= victim.in_sbmatcher; +} + +static +map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, + const vector<NGHolder *> &nfas) { + map<NGHolder *, NGHolder *> merged; + + vector<NGHolder *> batch; + for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) { + batch.push_back(*it); + assert((*it)->kind == NFA_OUTFIX); + if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { + auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); + insert(&merged, batch_merged); + batch.clear(); + } + } + + return merged; +} + +static +void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { + DEBUG_PRINTF("merging %zu nfas\n", nfas.size()); + if (nfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + + map<NGHolder *, size_t> nfa_mapping; + for (size_t i = 0; i < outfixes.size(); i++) { + auto *holder = outfixes[i].holder(); + if (holder) { + nfa_mapping[holder] = i; + } + } + + map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas); + if (merged.empty()) { + return; + } + + DEBUG_PRINTF("%zu nfas merged\n", merged.size()); + + // Update the outfix info for merged holders. + for (const auto &m : merged) { + OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]); + OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]); + mergeOutfixInfo(winner, victim); + victim.clear(); + } + + removeDeadOutfixes(outfixes); +} + +namespace { +struct MergeMcClellan { + MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in) + : rm(rm_in), grey(grey_in) {} + + unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const { + assert(d1 && d2); + return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey); + } + +private: + const ReportManager &rm; + const Grey &grey; +}; + +struct MergeHaig { + explicit MergeHaig(u32 limit_in) : limit(limit_in) {} + + unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1, + const raw_som_dfa *d2) const { + assert(d1 && d2); + return attemptToMergeHaig({d1, d2}, limit); + } + +private: + const u32 limit; //!< state limit for merged result. +}; +} + +/** + * Generic pairwise merge algorithm that can be used for either McClellan + * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge + * operation to a merge functor, which allows the caller to set some policy + * (state limits, etc). + * + * This is currently astonishingly simple and just considers every pair of + * DFAs, slow and steady. We may wish to actually apply a merge ordering + * strategy in the future. + */ +template<class RawDfa, class MergeFunctor> +static +void pairwiseDfaMerge(vector<RawDfa *> &dfas, + unordered_map<RawDfa *, size_t> &dfa_mapping, + vector<OutfixInfo> &outfixes, + MergeFunctor merge_func) { + DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); + + for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) { + if (!*it) { + continue; + } + for (auto jt = next(it); jt != ite; ++jt) { + if (!*jt) { + continue; + } + + DEBUG_PRINTF("try merge %p and %p\n", *it, *jt); + unique_ptr<RawDfa> rdfa = merge_func(*it, *jt); + if (!rdfa) { + continue; // Merge failed. + } + + DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get()); + OutfixInfo &winner = outfixes.at(dfa_mapping[*it]); + OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]); + assert(!winner.is_dead() && !victim.is_dead()); + + RawDfa *dfa_ptr = rdfa.get(); + dfa_mapping[dfa_ptr] = dfa_mapping[*it]; + dfa_mapping.erase(*it); + winner.proto = move(rdfa); + + mergeOutfixInfo(winner, victim); + + victim.clear(); + *jt = nullptr; // to be deleted. + *it = dfa_ptr; + } + } +} + +template<class RawDfa, class MergeFunctor> +static +void chunkedDfaMerge(vector<RawDfa *> &dfas, + unordered_map<RawDfa *, size_t> &dfa_mapping, + vector<OutfixInfo> &outfixes, + MergeFunctor merge_func) { + DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); + + vector<RawDfa *> out_dfas; + vector<RawDfa *> chunk; + for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) { + chunk.push_back(*it); + if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { + pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func); + out_dfas.insert(end(out_dfas), begin(chunk), end(chunk)); + chunk.clear(); + } + } + + // Remove null (merged) DFAs and update vector for subsequent use. + out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr), + out_dfas.end()); + dfas.swap(out_dfas); + DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size()); +} + +static +void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { + DEBUG_PRINTF("merging %zu nfas\n", dfas.size()); + if (dfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + + /* key is index into outfix array as iterators, etc may be invalidated by + * element addition. */ + unordered_map<raw_dfa *, size_t> dfa_mapping; + for (size_t i = 0; i < outfixes.size(); i++) { + auto *rdfa = outfixes[i].rdfa(); + if (rdfa) { + dfa_mapping[rdfa] = i; + } + } + + chunkedDfaMerge(dfas, dfa_mapping, outfixes, + MergeMcClellan(tbi.rm, tbi.cc.grey)); + removeDeadOutfixes(outfixes); +} + +static +void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, + const Grey &grey) { + if (!grey.roseMcClellanOutfix) { + return; + } + + DEBUG_PRINTF("merge combo\n"); + + bool seen_dfa = false; + u32 nfa_count = 0; + for (const auto &outfix : tbi.outfixes) { + if (outfix.holder()) { + DEBUG_PRINTF("nfa\n"); + nfa_count++; + } else if (outfix.rdfa()) { + DEBUG_PRINTF("dfa\n"); + seen_dfa = true; + } + } + + DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count, + (int)seen_dfa); + if (!nfa_count || (nfa_count == 1 && !seen_dfa)) { + DEBUG_PRINTF("no combo merges possible\n"); + return; + } + + /* key is index into outfix array as iterators, etc may be invalidated by + * element addition. */ + size_t new_dfas = 0; + unordered_map<raw_dfa *, size_t> dfa_mapping; + vector<raw_dfa *> dfas; + + for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { + auto &outfix = *it; + assert(!outfix.is_dead()); + + if (outfix.rdfa()) { + auto *rdfa = outfix.rdfa(); + dfas.push_back(rdfa); + dfa_mapping[rdfa] = it - tbi.outfixes.begin(); + continue; + } + + if (!outfix.holder()) { + continue; + } + + NGHolder *h = outfix.holder(); + assert(h->kind == NFA_OUTFIX); + auto rdfa = buildMcClellan(*h, &rm, grey); + if (rdfa) { + // Transform this outfix into a DFA and add it to the merge set. + dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); + dfas.push_back(rdfa.get()); + outfix.proto = move(rdfa); + new_dfas++; + } + } + + DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas); + + if (!new_dfas) { + /* assumes normal dfas have already been fully merged */ + return; + } + + chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes, + MergeMcClellan(tbi.rm, tbi.cc.grey)); + removeDeadOutfixes(tbi.outfixes); +} + +static +void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, + u32 limit) { + if (dfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + + unordered_map<raw_som_dfa *, size_t> dfa_mapping; + for (size_t i = 0; i < outfixes.size(); i++) { + auto *haig = outfixes[i].haig(); + if (haig) { + dfa_mapping[haig] = i; + } + } + + chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit)); + removeDeadOutfixes(outfixes); +} + +/** + * This pass attempts to merge outfix engines together. At this point in time, + * the engine type (NFA, DFA, Haig) has already been decided for each outfix + * and outfixes can only merged with others of their same type. NFAs are merged + * in a priority order based on common prefix length. The other types are + * merged blindly. Engines are merged to the extent that they can still be + * implemented efficiently. + */ +void mergeOutfixes(RoseBuildImpl &tbi) { + if (!tbi.cc.grey.mergeOutfixes) { + return; + } + + vector<NGHolder *> nfas; + vector<raw_dfa *> dfas; + vector<raw_som_dfa *> som_dfas; + + for (auto &outfix : tbi.outfixes) { + if (outfix.rdfa()) { + dfas.push_back(outfix.rdfa()); + } else if (outfix.holder()) { + nfas.push_back(outfix.holder()); + } else if (outfix.haig()) { + som_dfas.push_back(outfix.haig()); + } + } + + DEBUG_PRINTF("merging %zu dfas, %zu nfas\n", + dfas.size(), nfas.size()); + + mergeOutfixNfas(tbi, nfas); + mergeOutfixDfas(tbi, dfas); + mergeOutfixHaigs(tbi, som_dfas, 255); + mergeOutfixHaigs(tbi, som_dfas, 8192); + mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey); +} + +static +u32 allowedSquashDistance(const CharReach &cr, u32 min_width, + const RoseBuildImpl &tbi, + RoseVertex tv) { + CharReach accept_cr; + DEBUG_PRINTF("hello |cr|=%zu\n", cr.count()); + + const RoseGraph &g = tbi.g; + + /* TODO: inspect further back in the pattern */ + for (u32 lit_id : g[tv].literals) { + const rose_literal_id &lit = tbi.literals.at(lit_id); + if (lit.delay) { + return 0; /* TODO: better */ + } + if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) { + return 0; + } + assert(!lit.s.empty()); + accept_cr |= *lit.s.rbegin(); + } + + DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count()); + + if ((accept_cr & cr).any()) { + DEBUG_PRINTF("no squash\n"); + return 0; /* the accept byte doesn't always kill the puffette. TODO: + * maybe if we look further back we could find something that + * would kill the puffette... */ + } + + DEBUG_PRINTF("allowed to squash %u\n", min_width); + return min_width; +} + +void mergePuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + RoseGraph &g = tbi.g; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h) { + continue; + } + assert(!g[v].suffix.haig); + assert(!g[v].eod_accept); + + assert(onlyOneTop(*h)); /* we should not have merged yet */ + bool fixed_depth = g[v].min_offset == g[v].max_offset; + + if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) { + continue; + } + + PureRepeat repeat; + if (!isPureRepeat(*h, repeat)) { + assert(0); + continue; + } + + if (repeat.bounds.min == depth(0)) { + assert(0); // No vacuous puffs allowed. + continue; + } + + assert(repeat.bounds.min.is_finite() && + repeat.bounds.max.is_reachable()); + assert(repeat.bounds.max == repeat.bounds.min || + repeat.bounds.max.is_infinite()); + + const bool unbounded = repeat.bounds.max.is_infinite(); + const set<ReportID> reports = all_reports(*h); + assert(reports.size() == 1); + ReportID report = *reports.begin(); + + DEBUG_PRINTF("got puffette candidate %u:%s\n", report, + repeat.bounds.str().c_str()); + + raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach); + + u32 queue; + u32 event; + tbi.addChainTail(rp, &queue, &event); + u32 squashDistance = + allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); + + Report ir = makeMpvTrigger(event, squashDistance); + ReportID id = tbi.rm.getInternalId(ir); + + DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); + g[v].suffix.reset(); + g[v].reports.insert(id); + } +} + +static +void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, + u32 top, const vector<RoseVertex> &verts) { + DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, + m.get(), verts.size()); + + for (auto v : verts) { + assert(g[v].suffix.castle); + g[v].suffix.castle = m; + g[v].suffix.top = top; + } +} + +static +void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, + const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { + if (castles.size() <= 1) { + return; + } + + DEBUG_PRINTF("merging reach %s, %zu elements\n", + describeClass(castles[0]->reach()).c_str(), castles.size()); + + CastleProto *m = nullptr; + + for (CastleProto *c : castles) { + assert(c->repeats.size() == 1); // Not yet merged. + assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); + if (!m) { + m = c; + continue; + } + + u32 top = m->merge(c->repeats[0]); + if (top == CastleProto::max_occupancy) { + // No room left to merge into 'm'. This one becomes the new 'm'. + DEBUG_PRINTF("next mergee\n"); + m = c; + continue; + } + updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, + eng_verts.at(c)); + DEBUG_PRINTF("added to %p, top %u\n", m, top); + } +} + +void mergeCastleSuffixes(RoseBuildImpl &build) { + DEBUG_PRINTF("entry\n"); + + if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { + return; + } + + unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; + map<CharReach, vector<CastleProto *>> by_reach; + + RoseGraph &g = build.g; + + for (auto v : vertices_range(g)) { + if (!g[v].suffix.castle) { + continue; + } + + CastleProto *c = g[v].suffix.castle.get(); + + if (c->repeats.size() != 1) { + // This code assumes it's the only place merging is being done. + assert(0); + continue; + } + + if (!contains(eng_verts, c)) { + by_reach[c->reach()].push_back(c); + } + eng_verts[c].push_back(v); + } + + for (auto &chunk : by_reach | map_values) { + mergeCastleSuffixChunk(g, chunk, eng_verts); + } +} + +} // namespace ue2 |