diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_merge.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | 2042 |
1 files changed, 1021 insertions, 1021 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp index 5066dbd578..0045782cfb 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp @@ -63,12 +63,12 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/hash.h" -#include "util/insertion_ordered.h" +#include "util/hash.h" +#include "util/insertion_ordered.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <functional> @@ -84,7 +84,7 @@ using namespace std; using boost::adaptors::map_values; -using boost::adaptors::map_keys; +using boost::adaptors::map_keys; namespace ue2 { @@ -94,7 +94,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; static const size_t MERGE_GROUP_SIZE_MAX = 200; -static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; +static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ static const size_t DFA_CHUNK_SIZE_MAX = 200; @@ -102,10 +102,10 @@ static const size_t DFA_CHUNK_SIZE_MAX = 200; /** \brief Max DFA states in a merged DFA. */ static const size_t DFA_MERGE_MAX_STATES = 8000; -/** \brief In block mode, merge two prefixes even if they don't have identical - * literal sets if they have fewer than this many states and the merged graph - * is also small. */ -static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; +/** \brief In block mode, merge two prefixes even if they don't have identical + * literal sets if they have fewer than this many states and the merged graph + * is also small. */ +static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; static size_t small_merge_max_vertices(const CompileContext &cc) { @@ -124,17 +124,17 @@ size_t small_rose_threshold(const CompileContext &cc) { * reports should not contribute to the hash. */ static -size_t hashLeftfix(const left_id &left) { +size_t hashLeftfix(const left_id &left) { size_t val = 0; - if (left.castle()) { - hash_combine(val, left.castle()->reach()); - for (const auto &pr : left.castle()->repeats) { + if (left.castle()) { + hash_combine(val, left.castle()->reach()); + for (const auto &pr : left.castle()->repeats) { hash_combine(val, pr.first); // top hash_combine(val, pr.second.bounds); } - } else if (left.graph()) { - hash_combine(val, hash_holder(*left.graph())); + } else if (left.graph()) { + hash_combine(val, hash_holder(*left.graph())); } return val; @@ -150,7 +150,7 @@ struct RoseGroup { const RoseGraph &g = build.g; assert(in_degree(v, g) == 1); RoseVertex u = *inv_adjacent_vertices(v, g).first; - parent = g[u].index; + parent = g[u].index; } bool operator<(const RoseGroup &b) const { @@ -180,24 +180,24 @@ private: }; /** - * Intended to find graphs that are identical except for their report - * IDs. Relies on vertex and edge indices to pick up graphs that have been - * messily put together in different orderings. Only implemented for castles and - * holders. + * Intended to find graphs that are identical except for their report + * IDs. Relies on vertex and edge indices to pick up graphs that have been + * messily put together in different orderings. Only implemented for castles and + * holders. */ -static -bool is_equal(const left_id &u_left, ReportID u_report, - const left_id &v_left, ReportID v_report) { - if (u_left.castle() && v_left.castle()) { - return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); - } +static +bool is_equal(const left_id &u_left, ReportID u_report, + const left_id &v_left, ReportID v_report) { + if (u_left.castle() && v_left.castle()) { + return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); + } - if (!u_left.graph() || !v_left.graph()) { - return false; + if (!u_left.graph() || !v_left.graph()) { + return false; } - return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); -} + return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); +} } // namespace @@ -212,8 +212,8 @@ bool is_equal(const left_id &u_left, ReportID u_report, * * Note: only roles with a single predecessor vertex are considered for this * transform - it should probably be generalised to work for roles which share - * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or - * it should be retired entirely. + * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or + * it should be retired entirely. */ bool dedupeLeftfixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping leftfixes\n"); @@ -248,7 +248,7 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { for (deque<RoseVertex> &verts : roses | map_values) { DEBUG_PRINTF("group has %zu vertices\n", verts.size()); - unordered_set<left_id> seen; + unordered_set<left_id> seen; for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { RoseVertex v = *jt; @@ -260,16 +260,16 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { } // Scan the rest of the list for dupes. - for (auto kt = std::next(jt); kt != jte; ++kt) { - if (g[v].left == g[*kt].left - || !is_equal(g[v].left, g[v].left.leftfix_report, - g[*kt].left, g[*kt].left.leftfix_report)) { + for (auto kt = std::next(jt); kt != jte; ++kt) { + if (g[v].left == g[*kt].left + || !is_equal(g[v].left, g[v].left.leftfix_report, + g[*kt].left, g[*kt].left.leftfix_report)) { continue; } // Dupe found. DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", - g[*kt].index, g[v].index); + g[*kt].index, g[v].index); assert(g[v].left.lag == g[*kt].left.lag); g[*kt].left = g[v].left; work_done = true; @@ -320,7 +320,7 @@ bool is_equal(const suffix_id &s1, const suffix_id &s2) { void dedupeSuffixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping suffixes\n"); - unordered_map<suffix_id, set<RoseVertex>> suffix_map; + unordered_map<suffix_id, set<RoseVertex>> suffix_map; map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; // Collect suffixes into groups. @@ -387,7 +387,7 @@ template<class EngineRef> class Bouquet { private: list<EngineRef> ordering; // Unique list in insert order. - using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; + using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; BouquetMap bouquet; public: void insert(const EngineRef &h, RoseVertex v) { @@ -485,246 +485,246 @@ static void chunkBouquets(const Bouquet<EngineRef> &in, } } -static -bool stringsCanFinishAtSameSpot(const ue2_literal &u, - ue2_literal::const_iterator v_b, - ue2_literal::const_iterator v_e) { - ue2_literal::const_iterator u_e = u.end(); - ue2_literal::const_iterator u_b = u.begin(); - - while (u_e != u_b && v_e != v_b) { - --u_e; - --v_e; - - if (!overlaps(*u_e, *v_e)) { - return false; - } - } - - return true; -} - +static +bool stringsCanFinishAtSameSpot(const ue2_literal &u, + ue2_literal::const_iterator v_b, + ue2_literal::const_iterator v_e) { + ue2_literal::const_iterator u_e = u.end(); + ue2_literal::const_iterator u_b = u.begin(); + + while (u_e != u_b && v_e != v_b) { + --u_e; + --v_e; + + if (!overlaps(*u_e, *v_e)) { + return false; + } + } + + return true; +} + /** - * Check that if after u has been seen, that it is impossible for the arrival of - * v to require the inspection of an engine earlier than u did. - * - * Let delta be the earliest that v can be seen after u (may be zero) + * Check that if after u has been seen, that it is impossible for the arrival of + * v to require the inspection of an engine earlier than u did. * - * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) - * ==> - ulag <= delta - vlag - * ==> vlag - ulag <= delta + * Let delta be the earliest that v can be seen after u (may be zero) + * + * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) + * ==> - ulag <= delta - vlag + * ==> vlag - ulag <= delta */ static bool checkPrefix(const rose_literal_id &ul, const u32 ulag, const rose_literal_id &vl, const u32 vlag) { - DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, - escapeString(vl.s).c_str(), vlag); - - if (vl.delay || ul.delay) { - /* engine related literals should not be delayed anyway */ + DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, + escapeString(vl.s).c_str(), vlag); + + if (vl.delay || ul.delay) { + /* engine related literals should not be delayed anyway */ return false; } - if (ulag >= vlag) { - assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); - return true; - } - - size_t min_allowed_delta = vlag - ulag; - DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); - - for (size_t i = 0; i < min_allowed_delta; i++) { - if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { - DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); - return false; - } - } - - DEBUG_PRINTF("OK\n"); - return true; -} - -static -bool hasSameEngineType(const RoseVertexProps &u_prop, - const RoseVertexProps &v_prop) { - const left_id u_left = u_prop.left; - const left_id v_left = v_prop.left; - - return !u_left.haig() == !v_left.haig() - && !u_left.dfa() == !v_left.dfa() - && !u_left.castle() == !v_left.castle() - && !u_left.graph() == !v_left.graph(); -} - -/** - * Verifies that merging the leftfix of vertices does not cause conflicts due - * to the literals on the right. - * - * The main concern is that the lags of the literals and overlap between them - * allow the engine check offset to potentially regress. - * - * Parameters are vectors of literals + lag pairs. - * + if (ulag >= vlag) { + assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); + return true; + } + + size_t min_allowed_delta = vlag - ulag; + DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); + + for (size_t i = 0; i < min_allowed_delta; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } + } + + DEBUG_PRINTF("OK\n"); + return true; +} + +static +bool hasSameEngineType(const RoseVertexProps &u_prop, + const RoseVertexProps &v_prop) { + const left_id u_left = u_prop.left; + const left_id v_left = v_prop.left; + + return !u_left.haig() == !v_left.haig() + && !u_left.dfa() == !v_left.dfa() + && !u_left.castle() == !v_left.castle() + && !u_left.graph() == !v_left.graph(); +} + +/** + * Verifies that merging the leftfix of vertices does not cause conflicts due + * to the literals on the right. + * + * The main concern is that the lags of the literals and overlap between them + * allow the engine check offset to potentially regress. + * + * Parameters are vectors of literals + lag pairs. + * * Note: if more constraints of when the leftfixes were going to be checked - * (mandatory lookarounds passing, offset checks), more merges may be allowed. - */ -static -bool compatibleLiteralsForMerge( - const vector<pair<const rose_literal_id *, u32>> &ulits, - const vector<pair<const rose_literal_id *, u32>> &vlits) { - assert(!ulits.empty()); - assert(!vlits.empty()); - - // We cannot merge engines that prefix literals in different tables. - if (ulits[0].first->table != vlits[0].first->table) { + * (mandatory lookarounds passing, offset checks), more merges may be allowed. + */ +static +bool compatibleLiteralsForMerge( + const vector<pair<const rose_literal_id *, u32>> &ulits, + const vector<pair<const rose_literal_id *, u32>> &vlits) { + assert(!ulits.empty()); + assert(!vlits.empty()); + + // We cannot merge engines that prefix literals in different tables. + if (ulits[0].first->table != vlits[0].first->table) { DEBUG_PRINTF("literals in different tables\n"); return false; } - // We don't handle delayed cases yet. - for (const auto &ue : ulits) { - const rose_literal_id &ul = *ue.first; - if (ul.delay) { + // We don't handle delayed cases yet. + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + if (ul.delay) { return false; } } - for (const auto &ve : vlits) { - const rose_literal_id &vl = *ve.first; - if (vl.delay) { + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + if (vl.delay) { return false; } } - /* An engine requires that all accesses to it are ordered by offsets. (ie, - we can not check an engine's state at offset Y, if we have already - checked its status at offset X and X > Y). If we can not establish that + /* An engine requires that all accesses to it are ordered by offsets. (ie, + we can not check an engine's state at offset Y, if we have already + checked its status at offset X and X > Y). If we can not establish that the literals used for triggering will satisfy this property, then it is - not safe to merge the engine. */ - for (const auto &ue : ulits) { - const rose_literal_id &ul = *ue.first; - u32 ulag = ue.second; - - for (const auto &ve : vlits) { - const rose_literal_id &vl = *ve.first; - u32 vlag = ve.second; - - if (!checkPrefix(ul, ulag, vl, vlag) - || !checkPrefix(vl, vlag, ul, ulag)) { - DEBUG_PRINTF("prefix check failed\n"); - return false; - } - } - } - - return true; -} - -/** - * True if this graph has few enough accel states to be implemented as an NFA - * with all of those states actually becoming accel schemes. - */ -static -bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { - u32 num = countAccelStates(g, &build.rm, build.cc); - DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", - num_vertices(g), num); - return num <= NFA_MAX_ACCEL_STATES; -} - -/** - * In block mode, we want to be a little more selective -- We will only merge - * prefix engines when the literal sets are the same or if the merged graph - * has only grown by a small amount. - */ -static -bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, - RoseVertex v) { - assert(!build.cc.streaming); - assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); - - // Always merge infixes if we can (subject to the other criteria in - // mergeableRoseVertices). - if (!build.isRootSuccessor(u)) { - return true; - } - - const RoseGraph &g = build.g; - - // Merge prefixes with identical literal sets (as we'd have to run them - // both when we see those literals anyway). - if (g[u].literals == g[v].literals) { - return true; - } - - // The rest of this function only deals with the case when both vertices - // have graph leftfixes. - if (!g[u].left.graph || !g[v].left.graph) { - return false; - } - - const size_t u_count = num_vertices(*g[u].left.graph); - const size_t v_count = num_vertices(*g[v].left.graph); - DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", - u_count, v_count); - if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || - v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("prefixes too big already\n"); - return false; - } - - DEBUG_PRINTF("trying merge\n"); - NGHolder h; - cloneHolder(h, *g[v].left.graph); - if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { - DEBUG_PRINTF("couldn't merge\n"); - return false; - } - - const size_t merged_count = num_vertices(h); - DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); - if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("exceeded limit\n"); - return false; - } - - // We want to only perform merges that take advantage of some - // commonality in the two input graphs, so we check that the number of - // vertices has only grown a small amount: somewhere between the sum - // (no commonality) and the max (no growth at all) of the vertex counts - // of the input graphs. - const size_t max_size = u_count + v_count; - const size_t min_size = max(u_count, v_count); - const size_t max_growth = ((max_size - min_size) * 25) / 100; - if (merged_count > min_size + max_growth) { - DEBUG_PRINTF("grew too much\n"); - return false; - } - - // We don't want to squander any chances at accelerating. - if (!isAccelerableLeftfix(build, h) && - (isAccelerableLeftfix(build, *g[u].left.graph) || - isAccelerableLeftfix(build, *g[v].left.graph))) { - DEBUG_PRINTF("would lose accel property\n"); - return false; - } - - DEBUG_PRINTF("safe to merge\n"); - return true; -} - -bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, - RoseVertex v) { - assert(u != v); - - if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { - return false; - } - - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { - return false; - } - + not safe to merge the engine. */ + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + u32 ulag = ue.second; + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + u32 vlag = ve.second; + + if (!checkPrefix(ul, ulag, vl, vlag) + || !checkPrefix(vl, vlag, ul, ulag)) { + DEBUG_PRINTF("prefix check failed\n"); + return false; + } + } + } + + return true; +} + +/** + * True if this graph has few enough accel states to be implemented as an NFA + * with all of those states actually becoming accel schemes. + */ +static +bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { + u32 num = countAccelStates(g, &build.rm, build.cc); + DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", + num_vertices(g), num); + return num <= NFA_MAX_ACCEL_STATES; +} + +/** + * In block mode, we want to be a little more selective -- We will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, + RoseVertex v) { + assert(!build.cc.streaming); + assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u)) { + return true; + } + + const RoseGraph &g = build.g; + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (g[u].literals == g[v].literals) { + return true; + } + + // The rest of this function only deals with the case when both vertices + // have graph leftfixes. + if (!g[u].left.graph || !g[v].left.graph) { + return false; + } + + const size_t u_count = num_vertices(*g[u].left.graph); + const size_t v_count = num_vertices(*g[v].left.graph); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, *g[v].left.graph); + if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + const size_t max_size = u_count + v_count; + const size_t min_size = max(u_count, v_count); + const size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) && + (isAccelerableLeftfix(build, *g[u].left.graph) || + isAccelerableLeftfix(build, *g[v].left.graph))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, + RoseVertex v) { + assert(u != v); + + if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { + return false; + } + + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { + return false; + } + /* We cannot merge prefixes/vertices if they are successors of different * root vertices */ if (tbi.isRootSuccessor(u)) { @@ -739,105 +739,105 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } - u32 ulag = tbi.g[u].left.lag; - vector<pair<const rose_literal_id *, u32>> ulits; - ulits.reserve(tbi.g[u].literals.size()); - for (u32 id : tbi.g[u].literals) { - ulits.emplace_back(&tbi.literals.at(id), ulag); - } + u32 ulag = tbi.g[u].left.lag; + vector<pair<const rose_literal_id *, u32>> ulits; + ulits.reserve(tbi.g[u].literals.size()); + for (u32 id : tbi.g[u].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } - u32 vlag = tbi.g[v].left.lag; - vector<pair<const rose_literal_id *, u32>> vlits; - vlits.reserve(tbi.g[v].literals.size()); - for (u32 id : tbi.g[v].literals) { - vlits.emplace_back(&tbi.literals.at(id), vlag); - } + u32 vlag = tbi.g[v].left.lag; + vector<pair<const rose_literal_id *, u32>> vlits; + vlits.reserve(tbi.g[v].literals.size()); + for (u32 id : tbi.g[v].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, - tbi.g[v].index); + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, + tbi.g[v].index); return true; } -/* We cannot merge an engine, if a trigger literal and a post literal overlap - * in such a way that engine status needs to be check at a point before the - * engine's current location. - * - * i.e., for a trigger literal u and a pos literal v, - * where delta is the earliest v can appear after t, - * we require that v_loc - v_lag >= u_loc - * ==> u_loc + delta - v_lag >= u_loc - * ==> delta >= v_lag - * - */ +/* We cannot merge an engine, if a trigger literal and a post literal overlap + * in such a way that engine status needs to be check at a point before the + * engine's current location. + * + * i.e., for a trigger literal u and a pos literal v, + * where delta is the earliest v can appear after t, + * we require that v_loc - v_lag >= u_loc + * ==> u_loc + delta - v_lag >= u_loc + * ==> delta >= v_lag + * + */ static -bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, - u32 vlag) { - DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), - escapeString(vl.s).c_str(), vlag); - - for (size_t i = 0; i < vlag; i++) { - if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { - DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); - return false; +bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, + u32 vlag) { + DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), + escapeString(vl.s).c_str(), vlag); + + for (size_t i = 0; i < vlag; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; } } - - DEBUG_PRINTF("OK\n"); + + DEBUG_PRINTF("OK\n"); return true; } -template<typename VertexCont> -static never_inline -bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, - const VertexCont &v2) { - flat_set<RoseVertex> preds; +template<typename VertexCont> +static never_inline +bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, + const VertexCont &v2) { + flat_set<RoseVertex> preds; for (auto v : v1) { - insert(&preds, inv_adjacent_vertices(v, build.g)); - } - - flat_set<u32> pred_lits; - - /* No need to examine delays of a common pred - as it must already have - * survived the delay checks. - * - * This is important when the pred is in the anchored table as - * the literal is no longer available. */ - flat_set<RoseVertex> known_good_preds; - for (auto v : v2) { - insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); - } - + insert(&preds, inv_adjacent_vertices(v, build.g)); + } + + flat_set<u32> pred_lits; + + /* No need to examine delays of a common pred - as it must already have + * survived the delay checks. + * + * This is important when the pred is in the anchored table as + * the literal is no longer available. */ + flat_set<RoseVertex> known_good_preds; + for (auto v : v2) { + insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); + } + for (auto u : preds) { - if (!contains(known_good_preds, u)) { - insert(&pred_lits, build.g[u].literals); - } - } - - vector<const rose_literal_id *> pred_rose_lits; - pred_rose_lits.reserve(pred_lits.size()); - for (const auto &p : pred_lits) { - pred_rose_lits.push_back(&build.literals.at(p)); - } - - for (auto v : v2) { - u32 vlag = build.g[v].left.lag; - if (!vlag) { - continue; - } - - for (const u32 vlit : build.g[v].literals) { - const rose_literal_id &vl = build.literals.at(vlit); - assert(!vl.delay); // this should never have got this far? - for (const auto &ul : pred_rose_lits) { - assert(!ul->delay); // this should never have got this far? - - if (!checkPredDelay(*ul, vl, vlag)) { - return false; - } + if (!contains(known_good_preds, u)) { + insert(&pred_lits, build.g[u].literals); + } + } + + vector<const rose_literal_id *> pred_rose_lits; + pred_rose_lits.reserve(pred_lits.size()); + for (const auto &p : pred_lits) { + pred_rose_lits.push_back(&build.literals.at(p)); + } + + for (auto v : v2) { + u32 vlag = build.g[v].left.lag; + if (!vlag) { + continue; + } + + for (const u32 vlit : build.g[v].literals) { + const rose_literal_id &vl = build.literals.at(vlit); + assert(!vl.delay); // this should never have got this far? + for (const auto &ul : pred_rose_lits) { + assert(!ul->delay); // this should never have got this far? + + if (!checkPredDelay(*ul, vl, vlag)) { + return false; + } } } } @@ -849,65 +849,65 @@ static bool mergeableRoseVertices(const RoseBuildImpl &tbi, const deque<RoseVertex> &verts1, const deque<RoseVertex> &verts2) { - assert(!verts1.empty()); - assert(!verts2.empty()); - - RoseVertex u_front = verts1.front(); - RoseVertex v_front = verts2.front(); - - /* all vertices must have the same engine type: assume all verts in each - * group are already of the same type */ - if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { - return false; - } - - bool is_prefix = tbi.isRootSuccessor(u_front); - - /* We cannot merge prefixes/vertices if they are successors of different - * root vertices: similarly, assume the grouped vertices are compatible */ - if (is_prefix) { - assert(tbi.isRootSuccessor(v_front)); - set<RoseVertex> u_preds; - set<RoseVertex> v_preds; - insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); - insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); - - if (u_preds != v_preds) { - return false; - } - } - - vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ - for (auto a : verts1) { - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { - return false; - } - - u32 ulag = tbi.g[a].left.lag; - for (u32 id : tbi.g[a].literals) { - ulits.emplace_back(&tbi.literals.at(id), ulag); - } - } - - vector<pair<const rose_literal_id *, u32>> vlits; - for (auto a : verts2) { - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { - return false; - } - - u32 vlag = tbi.g[a].left.lag; - for (u32 id : tbi.g[a].literals) { - vlits.emplace_back(&tbi.literals.at(id), vlag); - } - } - - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; - } - + assert(!verts1.empty()); + assert(!verts2.empty()); + + RoseVertex u_front = verts1.front(); + RoseVertex v_front = verts2.front(); + + /* all vertices must have the same engine type: assume all verts in each + * group are already of the same type */ + if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { + return false; + } + + bool is_prefix = tbi.isRootSuccessor(u_front); + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices: similarly, assume the grouped vertices are compatible */ + if (is_prefix) { + assert(tbi.isRootSuccessor(v_front)); + set<RoseVertex> u_preds; + set<RoseVertex> v_preds; + insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : verts1) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { + return false; + } + + u32 ulag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : verts2) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; + } + + u32 vlag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + // Check preds are compatible as well. - if (!checkPredDelays(tbi, verts1, verts2) - || !checkPredDelays(tbi, verts2, verts1)) { + if (!checkPredDelays(tbi, verts1, verts2) + || !checkPredDelays(tbi, verts2, verts1)) { return false; } @@ -968,35 +968,35 @@ struct RoseMergeCandidate { } static -bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, - const vector<RoseVertex> &verts1, - const vector<RoseVertex> &verts2) { +bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, + const vector<RoseVertex> &verts1, + const vector<RoseVertex> &verts2) { assert(!verts1.empty() && !verts2.empty()); - DEBUG_PRINTF("merging pair of leftfixes:\n"); - DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), - as_string_list(all_tops(r1)).c_str()); - DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), - as_string_list(all_tops(r2)).c_str()); - - RoseGraph &g = build.g; + DEBUG_PRINTF("merging pair of leftfixes:\n"); + DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), + as_string_list(all_tops(r1)).c_str()); + DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), + as_string_list(all_tops(r2)).c_str()); + RoseGraph &g = build.g; + if (r1.graph()) { assert(r2.graph()); assert(r1.graph()->kind == r2.graph()->kind); - if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { + if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { DEBUG_PRINTF("nfa merge failed\n"); return false; } - /* The graph in r1 has been merged into the graph in r2. Update r1's - * vertices with the new graph ptr. mergeNfaPair() does not alter the - * tops from the input graph so no need to update top values. - * - * It is the responsibility of the caller to ensure that the tops are - * distinct when they have different trigger conditions. - * [Note: mergeLeftfixesVariableLag() should have a common parent set] - */ + /* The graph in r1 has been merged into the graph in r2. Update r1's + * vertices with the new graph ptr. mergeNfaPair() does not alter the + * tops from the input graph so no need to update top values. + * + * It is the responsibility of the caller to ensure that the tops are + * distinct when they have different trigger conditions. + * [Note: mergeLeftfixesVariableLag() should have a common parent set] + */ shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; for (RoseVertex v : verts1) { g[v].left.graph = h; @@ -1005,7 +1005,7 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, return true; } else if (r1.castle()) { assert(r2.castle()); - assert(build.cc.grey.allowCastle); + assert(build.cc.grey.allowCastle); map<u32, u32> top_map; if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { @@ -1029,200 +1029,200 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, return false; } -/** - * Checks that there is no problem due to the involved vertices if we merge two - * leftfix engines. - * - * This functions takes the vertices on the right of the two engines. - * - * Unlike mergeableRoseVertices(), this does not: - * - check that engines themselves can be merged - * - use heuristics to find out if merging the engines is wise. - */ +/** + * Checks that there is no problem due to the involved vertices if we merge two + * leftfix engines. + * + * This functions takes the vertices on the right of the two engines. + * + * Unlike mergeableRoseVertices(), this does not: + * - check that engines themselves can be merged + * - use heuristics to find out if merging the engines is wise. + */ static -bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, - const vector<RoseVertex> &targets_1, - const vector<RoseVertex> &targets_2) { - assert(!targets_1.empty()); - assert(!targets_2.empty()); - - vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ - for (auto a : targets_1) { - u32 ulag = build.g[a].left.lag; - for (u32 id : build.g[a].literals) { - ulits.emplace_back(&build.literals.at(id), ulag); - } - } - - vector<pair<const rose_literal_id *, u32>> vlits; - for (auto a : targets_2) { - u32 vlag = build.g[a].left.lag; - for (u32 id : build.g[a].literals) { - vlits.emplace_back(&build.literals.at(id), vlag); - } - } - - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; - } - - // Check preds are compatible as well. - if (!checkPredDelays(build, targets_1, targets_2) - || !checkPredDelays(build, targets_2, targets_1)) { - return false; - } - - DEBUG_PRINTF("vertex sets are mergeable\n"); - return true; -} - -/** - * In block mode, we want to be a little more selective -- we will only merge - * prefix engines when the literal sets are the same or if the merged graph - * has only grown by a small amount. - */ -static -bool goodBlockModeMerge(const RoseBuildImpl &build, - const vector<RoseVertex> &u_verts, const left_id &u_eng, - const vector<RoseVertex> &v_verts, - const left_id &v_eng) { - assert(!build.cc.streaming); - - // Always merge infixes if we can (subject to the other criteria in - // mergeableRoseVertices). - if (!build.isRootSuccessor(u_verts.front())) { - return true; - } - - const RoseGraph &g = build.g; - - flat_set<u32> u_lits; - for (RoseVertex u : u_verts) { - insert(&u_lits, g[u].literals); - } - - flat_set<u32> v_lits; - for (RoseVertex v : v_verts) { - insert(&v_lits, g[v].literals); - } - - // Merge prefixes with identical literal sets (as we'd have to run them - // both when we see those literals anyway). - if (u_lits == v_lits) { - return true; - } - - // The rest of this function only deals with the case when have graph - // leftfixes. - if (!u_eng.graph()) { - return false; - } - assert(v_eng.graph()); - const NGHolder &ug = *u_eng.graph(); - const NGHolder &vg = *v_eng.graph(); - - size_t u_count = num_vertices(ug); - size_t v_count = num_vertices(vg); - DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", - u_count, v_count); - if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || - v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("prefixes too big already\n"); - return false; - } - - DEBUG_PRINTF("trying merge\n"); - NGHolder h; - cloneHolder(h, vg); - if (!mergeNfaPair(ug, h, nullptr, build.cc)) { - DEBUG_PRINTF("couldn't merge\n"); - return false; - } - - const size_t merged_count = num_vertices(h); - DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); - if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("exceeded limit\n"); - return false; - } - - // We want to only perform merges that take advantage of some - // commonality in the two input graphs, so we check that the number of - // vertices has only grown a small amount: somewhere between the sum - // (no commonality) and the max (no growth at all) of the vertex counts - // of the input graphs. - size_t max_size = u_count + v_count; - size_t min_size = max(u_count, v_count); - size_t max_growth = ((max_size - min_size) * 25) / 100; - if (merged_count > min_size + max_growth) { - DEBUG_PRINTF("grew too much\n"); - return false; - } - - // We don't want to squander any chances at accelerating. - if (!isAccelerableLeftfix(build, h) - && (isAccelerableLeftfix(build, ug) - || isAccelerableLeftfix(build, vg))) { - DEBUG_PRINTF("would lose accel property\n"); - return false; - } - - DEBUG_PRINTF("safe to merge\n"); - return true; -} - -/** - * Merge r1 into r2 if safe and appropriate. Returns true on success. - */ -static -bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, - const vector<RoseVertex> &targets_1, - left_id &r2, - const vector<RoseVertex> &targets_2) { - if (targets_1.empty() || targets_2.empty()) { - /* one of the engines has already been merged away */ - return false; - } - - assert(!r1.graph() == !r2.graph()); - if (r1.graph()) { - NGHolder *h1 = r1.graph(); - NGHolder *h2 = r2.graph(); - CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); - CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); - CharReach stopboth = stop1 & stop2; - DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), - stop2.count(), stopboth.count()); - if (stopboth.count() < 10 - && (stop1.count() > 10 || stop2.count() > 10)) { - DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); - return false; - } - size_t maxstop = max(stop1.count(), stop2.count()); - if (maxstop > 200 && stopboth.count() < 200) { - DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); - return false; - } - } - - /* Rechecking that the targets are compatible, as we may have already - * merged new states into r1 or r2 and we need to verify that this - * candidate is still ok. */ - if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { - return false; - } - - if (!build.cc.streaming - && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { - return false; - } - - return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); +bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &targets_1, + const vector<RoseVertex> &targets_2) { + assert(!targets_1.empty()); + assert(!targets_2.empty()); + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : targets_1) { + u32 ulag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + ulits.emplace_back(&build.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : targets_2) { + u32 vlag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + vlits.emplace_back(&build.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + // Check preds are compatible as well. + if (!checkPredDelays(build, targets_1, targets_2) + || !checkPredDelays(build, targets_2, targets_1)) { + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +/** + * In block mode, we want to be a little more selective -- we will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool goodBlockModeMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &u_verts, const left_id &u_eng, + const vector<RoseVertex> &v_verts, + const left_id &v_eng) { + assert(!build.cc.streaming); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u_verts.front())) { + return true; + } + + const RoseGraph &g = build.g; + + flat_set<u32> u_lits; + for (RoseVertex u : u_verts) { + insert(&u_lits, g[u].literals); + } + + flat_set<u32> v_lits; + for (RoseVertex v : v_verts) { + insert(&v_lits, g[v].literals); + } + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (u_lits == v_lits) { + return true; + } + + // The rest of this function only deals with the case when have graph + // leftfixes. + if (!u_eng.graph()) { + return false; + } + assert(v_eng.graph()); + const NGHolder &ug = *u_eng.graph(); + const NGHolder &vg = *v_eng.graph(); + + size_t u_count = num_vertices(ug); + size_t v_count = num_vertices(vg); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, vg); + if (!mergeNfaPair(ug, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + size_t max_size = u_count + v_count; + size_t min_size = max(u_count, v_count); + size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) + && (isAccelerableLeftfix(build, ug) + || isAccelerableLeftfix(build, vg))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +/** + * Merge r1 into r2 if safe and appropriate. Returns true on success. + */ +static +bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, + const vector<RoseVertex> &targets_1, + left_id &r2, + const vector<RoseVertex> &targets_2) { + if (targets_1.empty() || targets_2.empty()) { + /* one of the engines has already been merged away */ + return false; + } + + assert(!r1.graph() == !r2.graph()); + if (r1.graph()) { + NGHolder *h1 = r1.graph(); + NGHolder *h2 = r2.graph(); + CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); + CharReach stopboth = stop1 & stop2; + DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), + stop2.count(), stopboth.count()); + if (stopboth.count() < 10 + && (stop1.count() > 10 || stop2.count() > 10)) { + DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); + return false; + } + size_t maxstop = max(stop1.count(), stop2.count()); + if (maxstop > 200 && stopboth.count() < 200) { + DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); + return false; + } + } + + /* Rechecking that the targets are compatible, as we may have already + * merged new states into r1 or r2 and we need to verify that this + * candidate is still ok. */ + if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { + return false; + } + + if (!build.cc.streaming + && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { + return false; + } + + return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); } static bool nfaHasNarrowStart(const NGHolder &g) { - if (out_degree(g.startDs, g) > 1) { + if (out_degree(g.startDs, g) > 1) { return false; // unanchored } @@ -1267,91 +1267,91 @@ bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { static u32 commonPrefixLength(left_id &r1, left_id &r2) { if (r1.graph() && r2.graph()) { - return commonPrefixLength(*r1.graph(), *r2.graph()); + return commonPrefixLength(*r1.graph(), *r2.graph()); } else if (r1.castle() && r2.castle()) { return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); } return 0; } -namespace { -struct MergeKey { - MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : - parents(std::move(parents_in)) { - - // We want to distinguish prefixes (but not infixes) on whether they - // have a narrow start or max width. - if (left.graph() && !is_triggered(*left.graph())) { - const NGHolder &h = *left.graph(); - narrowStart = nfaHasNarrowStart(h); - hasMaxWidth = nfaHasFiniteMaxWidth(h); - } else { - narrowStart = false; - hasMaxWidth = false; - } - - if (left.castle()) { - /* castles should have a non-empty reach */ - assert(left.castle()->reach().any()); - castle_cr = left.castle()->reach(); - } else { - assert(left.graph()); - } - } - - bool operator<(const MergeKey &b) const { - const MergeKey &a = *this; - ORDER_CHECK(narrowStart); - ORDER_CHECK(hasMaxWidth); - ORDER_CHECK(castle_cr); - ORDER_CHECK(parents); - return false; - } - - // NOTE: these two bool discriminators are only used for prefixes, not - // infixes. - bool narrowStart; - bool hasMaxWidth; - CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ - - flat_set<RoseVertex> parents; -}; -} - -template <typename T> -static -void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { - if (in.size() <= chunk_size) { - out->push_back(std::move(in)); - return; - } - - out->push_back(vector<T>()); - out->back().reserve(chunk_size); - for (const auto &t : in) { - if (out->back().size() >= chunk_size) { - out->push_back(vector<T>()); - out->back().reserve(chunk_size); - } - out->back().push_back(std::move(t)); - } -} - -static -insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { - insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; - for (auto v : vertices_range(g)) { - const auto &left = g[v].left; - if (!left) { - continue; - } - assert(contains(all_reports(left), left.leftfix_report)); - eng_verts[left].push_back(v); - } - - return eng_verts; -} - +namespace { +struct MergeKey { + MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : + parents(std::move(parents_in)) { + + // We want to distinguish prefixes (but not infixes) on whether they + // have a narrow start or max width. + if (left.graph() && !is_triggered(*left.graph())) { + const NGHolder &h = *left.graph(); + narrowStart = nfaHasNarrowStart(h); + hasMaxWidth = nfaHasFiniteMaxWidth(h); + } else { + narrowStart = false; + hasMaxWidth = false; + } + + if (left.castle()) { + /* castles should have a non-empty reach */ + assert(left.castle()->reach().any()); + castle_cr = left.castle()->reach(); + } else { + assert(left.graph()); + } + } + + bool operator<(const MergeKey &b) const { + const MergeKey &a = *this; + ORDER_CHECK(narrowStart); + ORDER_CHECK(hasMaxWidth); + ORDER_CHECK(castle_cr); + ORDER_CHECK(parents); + return false; + } + + // NOTE: these two bool discriminators are only used for prefixes, not + // infixes. + bool narrowStart; + bool hasMaxWidth; + CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ + + flat_set<RoseVertex> parents; +}; +} + +template <typename T> +static +void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { + if (in.size() <= chunk_size) { + out->push_back(std::move(in)); + return; + } + + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + for (const auto &t : in) { + if (out->back().size() >= chunk_size) { + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + } + out->back().push_back(std::move(t)); + } +} + +static +insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + for (auto v : vertices_range(g)) { + const auto &left = g[v].left; + if (!left) { + continue; + } + assert(contains(all_reports(left), left.leftfix_report)); + eng_verts[left].push_back(v); + } + + return eng_verts; +} + /** * This pass attempts to merge prefix/infix engines which share a common set of * parent vertices. @@ -1363,9 +1363,9 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { * the stop alphabet. * * Infixes: - * - It is expected that when this is run all infixes are still at the single - * top stage as we have not yet merged unrelated infixes together. After - * execution, castles may have multiple (but equivalent) tops. + * - It is expected that when this is run all infixes are still at the single + * top stage as we have not yet merged unrelated infixes together. After + * execution, castles may have multiple (but equivalent) tops. * * Prefixes: * - transient prefixes are not considered. @@ -1375,48 +1375,48 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { * - merges are not considered in cases where dot star start state will be * reformed to optimise a leading repeat. */ -void mergeLeftfixesVariableLag(RoseBuildImpl &build) { - if (!build.cc.grey.mergeRose) { +void mergeLeftfixesVariableLag(RoseBuildImpl &build) { + if (!build.cc.grey.mergeRose) { return; } - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); - RoseGraph &g = build.g; + RoseGraph &g = build.g; DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); DEBUG_PRINTF("-----\n"); - auto eng_verts = get_eng_verts(g); + auto eng_verts = get_eng_verts(g); - map<MergeKey, vector<left_id>> engine_groups; - for (const auto &e : eng_verts) { - const left_id &left = e.first; - const auto &verts = e.second; + map<MergeKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; // Only non-transient for the moment. - if (contains(build.transient, left)) { + if (contains(build.transient, left)) { continue; } // No forced McClellan or Haig infix merges. - if (left.dfa() || left.haig()) { + if (left.dfa() || left.haig()) { continue; } - assert(left.graph() || left.castle()); + assert(left.graph() || left.castle()); - if (left.graph()) { - const NGHolder &h = *left.graph(); - /* we should not have merged yet */ - assert(!is_triggered(h) || onlyOneTop(h)); + if (left.graph()) { + const NGHolder &h = *left.graph(); + /* we should not have merged yet */ + assert(!is_triggered(h) || onlyOneTop(h)); - if (hasReformedStartDotStar(h, build.cc.grey)) { + if (hasReformedStartDotStar(h, build.cc.grey)) { continue; // preserve the optimisation of the leading repeat } - } else { - assert(left.castle()); + } else { + assert(left.castle()); - if (!build.cc.grey.allowCastle) { - DEBUG_PRINTF("castle merging disallowed by greybox\n"); + if (!build.cc.grey.allowCastle) { + DEBUG_PRINTF("castle merging disallowed by greybox\n"); continue; } } @@ -1425,20 +1425,20 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { // parents, so that we can merge differently-anchored prefix roses // together. (Prompted by UE-2100) - flat_set<RoseVertex> parents; - for (RoseVertex v : verts) { - insert(&parents, inv_adjacent_vertices_range(v, g)); + flat_set<RoseVertex> parents; + for (RoseVertex v : verts) { + insert(&parents, inv_adjacent_vertices_range(v, g)); } - if (contains(parents, build.anchored_root)) { - parents.erase(build.anchored_root); - parents.insert(build.root); + if (contains(parents, build.anchored_root)) { + parents.erase(build.anchored_root); + parents.insert(build.root); } - assert(!parents.empty()); - + assert(!parents.empty()); + #ifndef _WIN32 - engine_groups[MergeKey(left, parents)].push_back(left); + engine_groups[MergeKey(left, parents)].push_back(left); #else // On windows, when passing MergeKey object into map 'engine_groups', // it will not be copied, but will be freed along with @@ -1452,59 +1452,59 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { #endif } - vector<vector<left_id>> chunks; - for (auto &raw_group : engine_groups | map_values) { - chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); - } - engine_groups.clear(); - - DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); - - for (auto &roses : chunks) { - if (roses.size() < 2) { + vector<vector<left_id>> chunks; + for (auto &raw_group : engine_groups | map_values) { + chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); + } + engine_groups.clear(); + + DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); + + for (auto &roses : chunks) { + if (roses.size() < 2) { continue; } - // All pairs on the prio queue. - u32 tie_breaker = 0; - priority_queue<RoseMergeCandidate> pq; - for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { - left_id r1 = *it; - const vector<RoseVertex> &targets_1 = eng_verts[r1]; - - for (auto jt = next(it); jt != ite; ++jt) { - left_id r2 = *jt; - - /* we should have already split on engine types and reach */ - assert(!r1.castle() == !r2.castle()); - assert(!r1.graph() == !r2.graph()); - assert(!r1.castle() - || r1.castle()->reach() == r2.castle()->reach()); - - const vector<RoseVertex> &targets_2 = eng_verts[r2]; - if (!checkVerticesOkForLeftfixMerge(build, targets_1, - targets_2)) { - continue; // No point queueing unmergeable cases. - } - - u32 cpl = commonPrefixLength(r1, r2); - pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); - } - } - - DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); - - while (!pq.empty()) { - left_id r1 = pq.top().r1; - left_id r2 = pq.top().r2; - DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", - r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); - pq.pop(); - vector<RoseVertex> &targets_1 = eng_verts[r1]; - vector<RoseVertex> &targets_2 = eng_verts[r2]; - if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, - targets_2)) { - insert(&targets_2, targets_2.end(), targets_1); - targets_1.clear(); + // All pairs on the prio queue. + u32 tie_breaker = 0; + priority_queue<RoseMergeCandidate> pq; + for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { + left_id r1 = *it; + const vector<RoseVertex> &targets_1 = eng_verts[r1]; + + for (auto jt = next(it); jt != ite; ++jt) { + left_id r2 = *jt; + + /* we should have already split on engine types and reach */ + assert(!r1.castle() == !r2.castle()); + assert(!r1.graph() == !r2.graph()); + assert(!r1.castle() + || r1.castle()->reach() == r2.castle()->reach()); + + const vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (!checkVerticesOkForLeftfixMerge(build, targets_1, + targets_2)) { + continue; // No point queueing unmergeable cases. + } + + u32 cpl = commonPrefixLength(r1, r2); + pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); + } + } + + DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); + + while (!pq.empty()) { + left_id r1 = pq.top().r1; + left_id r2 = pq.top().r2; + DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", + r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); + pq.pop(); + vector<RoseVertex> &targets_1 = eng_verts[r1]; + vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, + targets_2)) { + insert(&targets_2, targets_2.end(), targets_1); + targets_1.clear(); } } } @@ -1512,7 +1512,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("exit\n"); DEBUG_PRINTF("-----\n"); - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); } namespace { @@ -1521,15 +1521,15 @@ namespace { * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. */ struct DedupeLeftKey { - DedupeLeftKey(const RoseBuildImpl &build, - flat_set<pair<size_t, u32>> preds_in, const left_id &left) - : left_hash(hashLeftfix(left)), preds(move(preds_in)), - transient(contains(build.transient, left)) { + DedupeLeftKey(const RoseBuildImpl &build, + flat_set<pair<size_t, u32>> preds_in, const left_id &left) + : left_hash(hashLeftfix(left)), preds(move(preds_in)), + transient(contains(build.transient, left)) { } bool operator<(const DedupeLeftKey &b) const { - return tie(left_hash, preds, transient) - < tie(b.left_hash, b.preds, b.transient); + return tie(left_hash, preds, transient) + < tie(b.left_hash, b.preds, b.transient); } private: @@ -1538,23 +1538,23 @@ private: size_t left_hash; /** For each in-edge, the pair of (parent index, edge top). */ - flat_set<pair<size_t, u32>> preds; - - /** We don't want to combine transient with non-transient. */ - bool transient; + flat_set<pair<size_t, u32>> preds; + + /** We don't want to combine transient with non-transient. */ + bool transient; }; } // namespace -static -flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { - flat_set<pair<size_t, u32>> preds; - for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].index, g[e].rose_top); - } - return preds; -} - +static +flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { + flat_set<pair<size_t, u32>> preds; + for (const auto &e : in_edges_range(v, g)) { + preds.emplace(g[source(e, g)].index, g[e].rose_top); + } + return preds; +} + /** * This is a generalisation of \ref dedupeLeftfixes which relaxes two * restrictions: multiple predecessor roles are allowed and the delay used by @@ -1572,99 +1572,99 @@ flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { * successor may want to inspect it; the overlap relationships between the * involved literals are examined to ensure that this property holds. * - * Note: this is unable to dedupe when delayed literals are involved unlike - * dedupeLeftfixes. + * Note: this is unable to dedupe when delayed literals are involved unlike + * dedupeLeftfixes. */ -void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { +void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - RoseGraph &g = build.g; - auto eng_verts = get_eng_verts(g); + RoseGraph &g = build.g; + auto eng_verts = get_eng_verts(g); - map<DedupeLeftKey, vector<left_id>> engine_groups; - for (const auto &e : eng_verts) { - const left_id &left = e.first; - const auto &verts = e.second; + map<DedupeLeftKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; - /* There should only be one report on an engine as no merges have - * happened yet. (aside from eod prefixes) */ - if (all_reports(left).size() != 1) { - assert(any_of_in(adjacent_vertices_range(verts.front(), g), - [&](RoseVertex w) { return g[w].eod_accept; })); + /* There should only be one report on an engine as no merges have + * happened yet. (aside from eod prefixes) */ + if (all_reports(left).size() != 1) { + assert(any_of_in(adjacent_vertices_range(verts.front(), g), + [&](RoseVertex w) { return g[w].eod_accept; })); continue; } - if (left.haig()) { - /* TODO: allow deduping of identical haigs */ + if (left.haig()) { + /* TODO: allow deduping of identical haigs */ continue; } - if (left.graph()) { - /* we should not have merged yet */ - assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); - } - - auto preds = get_pred_tops(verts.front(), g); - for (RoseVertex v : verts) { - if (preds != get_pred_tops(v, g)) { - DEBUG_PRINTF("distinct pred sets\n"); - continue; - } - } - engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); - } - - /* We don't bother chunking as we expect deduping to be successful if the - * hashes match */ - - for (auto &group : engine_groups | map_values) { - DEBUG_PRINTF("group of %zu roses\n", group.size()); - - if (group.size() < 2) { + if (left.graph()) { + /* we should not have merged yet */ + assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); + } + + auto preds = get_pred_tops(verts.front(), g); + for (RoseVertex v : verts) { + if (preds != get_pred_tops(v, g)) { + DEBUG_PRINTF("distinct pred sets\n"); + continue; + } + } + engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); + } + + /* We don't bother chunking as we expect deduping to be successful if the + * hashes match */ + + for (auto &group : engine_groups | map_values) { + DEBUG_PRINTF("group of %zu roses\n", group.size()); + + if (group.size() < 2) { continue; } - for (auto it = group.begin(); it != group.end(); ++it) { + for (auto it = group.begin(); it != group.end(); ++it) { left_id r1 = *it; - vector<RoseVertex> &verts1 = eng_verts[r1]; - assert(!verts1.empty()); /* cleared engines should be behind us */ - - assert(all_reports(r1).size() == 1); - ReportID r1_report = *all_reports(r1).begin(); + vector<RoseVertex> &verts1 = eng_verts[r1]; + assert(!verts1.empty()); /* cleared engines should be behind us */ - for (auto jt = next(it); jt != group.end(); ++jt) { + assert(all_reports(r1).size() == 1); + ReportID r1_report = *all_reports(r1).begin(); + + for (auto jt = next(it); jt != group.end(); ++jt) { left_id r2 = *jt; - vector<RoseVertex> &verts2 = eng_verts[r2]; - assert(!verts2.empty()); - assert(all_reports(r2).size() == 1); - ReportID r2_report = *all_reports(r2).begin(); + vector<RoseVertex> &verts2 = eng_verts[r2]; + assert(!verts2.empty()); + assert(all_reports(r2).size() == 1); + ReportID r2_report = *all_reports(r2).begin(); - if (!is_equal(r1, r1_report, r2, r2_report)) { + if (!is_equal(r1, r1_report, r2, r2_report)) { continue; } - if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { + if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { continue; } DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); - // Replace r1 with r2. + // Replace r1 with r2. for (auto v : verts1) { DEBUG_PRINTF("replacing report %u with %u on %zu\n", - r2_report, r1_report, g[v].index); + r2_report, r1_report, g[v].index); u32 orig_lag = g[v].left.lag; - g[v].left = g[verts2.front()].left; + g[v].left = g[verts2.front()].left; g[v].left.lag = orig_lag; } - - insert(&verts2, verts2.end(), verts1); - verts1.clear(); - - /* remove stale entry from transient set, if present */ - build.transient.erase(r1); - + + insert(&verts2, verts2.end(), verts1); + verts1.clear(); + + /* remove stale entry from transient set, if present */ + build.transient.erase(r1); + break; } } @@ -1672,7 +1672,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { } static -u32 findUnusedTop(const flat_set<u32> &tops) { +u32 findUnusedTop(const flat_set<u32> &tops) { u32 i = 0; while (contains(tops, i)) { i++; @@ -1688,19 +1688,19 @@ void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { if (v == h.startDs) { continue; } - flat_set<u32> new_tops; - for (u32 t : h[e].tops) { - DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); - new_tops.insert(top_mapping.at(t)); - } - h[e].tops = std::move(new_tops); + flat_set<u32> new_tops; + for (u32 t : h[e].tops) { + DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); + new_tops.insert(top_mapping.at(t)); + } + h[e].tops = std::move(new_tops); } } static bool setDistinctTops(NGHolder &h1, const NGHolder &h2, map<u32, u32> &top_mapping) { - flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); + flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); @@ -1738,7 +1738,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); assert(!g[v].left.haig); assert(!g[v].left.dfa); for (const auto &e : in_edges_range(v, g)) { @@ -1747,7 +1747,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, assert(contains(top_mapping, t)); g[e].rose_top = top_mapping[t]; DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", - g[source(e, g)].index, g[target(e, g)].index, t, + g[source(e, g)].index, g[target(e, g)].index, t, top_mapping[t]); } } @@ -1768,7 +1768,7 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); u32 t = g[v].suffix.top; assert(contains(top_mapping, t)); g[v].suffix.top = top_mapping[t]; @@ -1796,7 +1796,7 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { // We track the number of accelerable states for each graph in a map and // only recompute them when the graph is modified. - unordered_map<left_id, u32> accel_count; + unordered_map<left_id, u32> accel_count; for (const auto &rose : roses) { assert(rose.graph()->kind == NFA_INFIX); accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); @@ -1965,109 +1965,109 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { } } -static -void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, - insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { - /* caller must have already ensured that candidates have the same reach */ - RoseGraph &g = build.g; - DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); - - for (auto it = cands.begin(); it != cands.end(); ++it) { - left_id &cand_1 = *it; - vector<RoseVertex> &verts_1 = eng_verts[cand_1]; - if (verts_1.empty()) { - continue; - } - - for (auto jt = next(it); jt != cands.end(); ++jt) { - const left_id &cand_2 = *jt; - vector<RoseVertex> &verts_2 = eng_verts[cand_2]; - if (verts_2.empty()) { - continue; - } - - assert(cand_1.castle()->reach() == cand_2.castle()->reach()); - - if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { - DEBUG_PRINTF("not mergeable\n"); - continue; // next cand_2 - } - - DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), - cand_1.castle()->repeats.size()); - DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), - cand_2.castle()->repeats.size()); - - map<u32, u32> top_map; - if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { - DEBUG_PRINTF("couldn't merge\n"); - continue; // next cand_2 - } - - // Update castle2's roses to point to castle1 now. - shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; - for (auto v : verts_2) { - assert(g[v].left.castle.get() == cand_2.castle()); - g[v].left.castle = winner; - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - - insert(&verts_1, verts_1.end(), verts_2); - verts_2.clear(); - } - } -} - -/** - * Merges castles with the same reach together regardless of where in the rose - * graph they are. Note: there is no requirement for the castles to have common - * parent or target vertices. - * - * There are no heuristics for reducing block mode merges as castle speed - * mainly depends on the reach being scanned. - */ -void mergeCastleLeftfixes(RoseBuildImpl &build) { +static +void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, + insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { + /* caller must have already ensured that candidates have the same reach */ + RoseGraph &g = build.g; + DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); + + for (auto it = cands.begin(); it != cands.end(); ++it) { + left_id &cand_1 = *it; + vector<RoseVertex> &verts_1 = eng_verts[cand_1]; + if (verts_1.empty()) { + continue; + } + + for (auto jt = next(it); jt != cands.end(); ++jt) { + const left_id &cand_2 = *jt; + vector<RoseVertex> &verts_2 = eng_verts[cand_2]; + if (verts_2.empty()) { + continue; + } + + assert(cand_1.castle()->reach() == cand_2.castle()->reach()); + + if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next cand_2 + } + + DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), + cand_1.castle()->repeats.size()); + DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), + cand_2.castle()->repeats.size()); + + map<u32, u32> top_map; + if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { + DEBUG_PRINTF("couldn't merge\n"); + continue; // next cand_2 + } + + // Update castle2's roses to point to castle1 now. + shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; + for (auto v : verts_2) { + assert(g[v].left.castle.get() == cand_2.castle()); + g[v].left.castle = winner; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + + insert(&verts_1, verts_1.end(), verts_2); + verts_2.clear(); + } + } +} + +/** + * Merges castles with the same reach together regardless of where in the rose + * graph they are. Note: there is no requirement for the castles to have common + * parent or target vertices. + * + * There are no heuristics for reducing block mode merges as castle speed + * mainly depends on the reach being scanned. + */ +void mergeCastleLeftfixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses - || !build.cc.grey.allowCastle) { + if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses + || !build.cc.grey.allowCastle) { return; } - RoseGraph &g = build.g; + RoseGraph &g = build.g; - insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; for (auto v : vertices_range(g)) { - if (!g[v].left.castle) { + if (!g[v].left.castle) { continue; } - // Handle infixes only. - if (build.isRootSuccessor(v)) { + // Handle infixes only. + if (build.isRootSuccessor(v)) { continue; } - eng_verts[g[v].left].push_back(v); - } + eng_verts[g[v].left].push_back(v); + } - map<CharReach, vector<left_id>> by_reach; - for (const auto &left : eng_verts | map_keys) { - by_reach[left.castle()->reach()].push_back(left); - } + map<CharReach, vector<left_id>> by_reach; + for (const auto &left : eng_verts | map_keys) { + by_reach[left.castle()->reach()].push_back(left); + } - vector<vector<left_id>> chunks; - for (auto &raw_group : by_reach | map_values) { - chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); + vector<vector<left_id>> chunks; + for (auto &raw_group : by_reach | map_values) { + chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); } - by_reach.clear(); + by_reach.clear(); - DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); + DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); - for (auto &chunk : chunks) { - mergeCastleChunk(build, chunk, eng_verts); + for (auto &chunk : chunks) { + mergeCastleChunk(build, chunk, eng_verts); } } @@ -2081,7 +2081,7 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, // If this isn't an acyclic case, we track the number of accelerable states // for each graph in a map and only recompute them when the graph is // modified. - unordered_map<suffix_id, u32> accel_count; + unordered_map<suffix_id, u32> accel_count; if (!acyclic) { for (const auto &suffix : suffixes) { assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); @@ -2093,11 +2093,11 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, suffix_id s1 = *it; const deque<RoseVertex> &verts1 = suffixes.vertices(s1); assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); - - // Caller should ensure that we don't propose merges of graphs that are - // already too big. - assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); - + + // Caller should ensure that we don't propose merges of graphs that are + // already too big. + assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); + deque<suffix_id> merged; for (auto jt = next(it); jt != suffixes.end(); ++jt) { suffix_id s2 = *jt; @@ -2210,11 +2210,11 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { assert(!g[v].suffix.haig); - if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { continue; } - if (!isAcyclic(*h)) { + if (!isAcyclic(*h)) { continue; } @@ -2327,8 +2327,8 @@ map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, batch.push_back(*it); assert((*it)->kind == NFA_OUTFIX); if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { - auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); - insert(&merged, batch_merged); + auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); + insert(&merged, batch_merged); batch.clear(); } } @@ -2347,9 +2347,9 @@ void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { map<NGHolder *, size_t> nfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *holder = outfixes[i].holder(); - if (holder) { - nfa_mapping[holder] = i; + auto *holder = outfixes[i].holder(); + if (holder) { + nfa_mapping[holder] = i; } } @@ -2413,7 +2413,7 @@ private: template<class RawDfa, class MergeFunctor> static void pairwiseDfaMerge(vector<RawDfa *> &dfas, - unordered_map<RawDfa *, size_t> &dfa_mapping, + unordered_map<RawDfa *, size_t> &dfa_mapping, vector<OutfixInfo> &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); @@ -2441,7 +2441,7 @@ void pairwiseDfaMerge(vector<RawDfa *> &dfas, RawDfa *dfa_ptr = rdfa.get(); dfa_mapping[dfa_ptr] = dfa_mapping[*it]; dfa_mapping.erase(*it); - winner.proto = move(rdfa); + winner.proto = move(rdfa); mergeOutfixInfo(winner, victim); @@ -2455,7 +2455,7 @@ void pairwiseDfaMerge(vector<RawDfa *> &dfas, template<class RawDfa, class MergeFunctor> static void chunkedDfaMerge(vector<RawDfa *> &dfas, - unordered_map<RawDfa *, size_t> &dfa_mapping, + unordered_map<RawDfa *, size_t> &dfa_mapping, vector<OutfixInfo> &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); @@ -2489,11 +2489,11 @@ void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ - unordered_map<raw_dfa *, size_t> dfa_mapping; + unordered_map<raw_dfa *, size_t> dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *rdfa = outfixes[i].rdfa(); - if (rdfa) { - dfa_mapping[rdfa] = i; + auto *rdfa = outfixes[i].rdfa(); + if (rdfa) { + dfa_mapping[rdfa] = i; } } @@ -2514,10 +2514,10 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, bool seen_dfa = false; u32 nfa_count = 0; for (const auto &outfix : tbi.outfixes) { - if (outfix.holder()) { + if (outfix.holder()) { DEBUG_PRINTF("nfa\n"); nfa_count++; - } else if (outfix.rdfa()) { + } else if (outfix.rdfa()) { DEBUG_PRINTF("dfa\n"); seen_dfa = true; } @@ -2533,32 +2533,32 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ size_t new_dfas = 0; - unordered_map<raw_dfa *, size_t> dfa_mapping; + unordered_map<raw_dfa *, size_t> dfa_mapping; vector<raw_dfa *> dfas; for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { - auto &outfix = *it; - assert(!outfix.is_dead()); - - if (outfix.rdfa()) { - auto *rdfa = outfix.rdfa(); - dfas.push_back(rdfa); - dfa_mapping[rdfa] = it - tbi.outfixes.begin(); + auto &outfix = *it; + assert(!outfix.is_dead()); + + if (outfix.rdfa()) { + auto *rdfa = outfix.rdfa(); + dfas.push_back(rdfa); + dfa_mapping[rdfa] = it - tbi.outfixes.begin(); continue; } - if (!outfix.holder()) { + if (!outfix.holder()) { continue; } - NGHolder *h = outfix.holder(); + NGHolder *h = outfix.holder(); assert(h->kind == NFA_OUTFIX); auto rdfa = buildMcClellan(*h, &rm, grey); if (rdfa) { // Transform this outfix into a DFA and add it to the merge set. dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); dfas.push_back(rdfa.get()); - outfix.proto = move(rdfa); + outfix.proto = move(rdfa); new_dfas++; } } @@ -2584,11 +2584,11 @@ void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, vector<OutfixInfo> &outfixes = tbi.outfixes; - unordered_map<raw_som_dfa *, size_t> dfa_mapping; + unordered_map<raw_som_dfa *, size_t> dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *haig = outfixes[i].haig(); - if (haig) { - dfa_mapping[haig] = i; + auto *haig = outfixes[i].haig(); + if (haig) { + dfa_mapping[haig] = i; } } @@ -2613,13 +2613,13 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector<raw_dfa *> dfas; vector<raw_som_dfa *> som_dfas; - for (auto &outfix : tbi.outfixes) { - if (outfix.rdfa()) { - dfas.push_back(outfix.rdfa()); - } else if (outfix.holder()) { - nfas.push_back(outfix.holder()); - } else if (outfix.haig()) { - som_dfas.push_back(outfix.haig()); + for (auto &outfix : tbi.outfixes) { + if (outfix.rdfa()) { + dfas.push_back(outfix.rdfa()); + } else if (outfix.holder()) { + nfas.push_back(outfix.holder()); + } else if (outfix.haig()) { + som_dfas.push_back(outfix.haig()); } } @@ -2644,7 +2644,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, /* TODO: inspect further back in the pattern */ for (u32 lit_id : g[tv].literals) { - const rose_literal_id &lit = tbi.literals.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); if (lit.delay) { return 0; /* TODO: better */ } @@ -2724,7 +2724,7 @@ void mergePuffixes(RoseBuildImpl &tbi) { u32 squashDistance = allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); - Report ir = makeMpvTrigger(event, squashDistance); + Report ir = makeMpvTrigger(event, squashDistance); ReportID id = tbi.rm.getInternalId(ir); DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); @@ -2736,8 +2736,8 @@ void mergePuffixes(RoseBuildImpl &tbi) { static void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, u32 top, const vector<RoseVertex> &verts) { - DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, - m.get(), verts.size()); + DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, + m.get(), verts.size()); for (auto v : verts) { assert(g[v].suffix.castle); @@ -2747,56 +2747,56 @@ void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, } static -void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, - const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { +void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, + const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { if (castles.size() <= 1) { return; } - DEBUG_PRINTF("merging reach %s, %zu elements\n", - describeClass(castles[0]->reach()).c_str(), castles.size()); + DEBUG_PRINTF("merging reach %s, %zu elements\n", + describeClass(castles[0]->reach()).c_str(), castles.size()); - CastleProto *m = nullptr; + CastleProto *m = nullptr; - for (CastleProto *c : castles) { + for (CastleProto *c : castles) { assert(c->repeats.size() == 1); // Not yet merged. - assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); - if (!m) { - m = c; + assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); + if (!m) { + m = c; continue; } - u32 top = m->merge(c->repeats[0]); - if (top == CastleProto::max_occupancy) { + u32 top = m->merge(c->repeats[0]); + if (top == CastleProto::max_occupancy) { // No room left to merge into 'm'. This one becomes the new 'm'. DEBUG_PRINTF("next mergee\n"); m = c; - continue; + continue; } - updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, - eng_verts.at(c)); - DEBUG_PRINTF("added to %p, top %u\n", m, top); + updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, + eng_verts.at(c)); + DEBUG_PRINTF("added to %p, top %u\n", m, top); } } -void mergeCastleSuffixes(RoseBuildImpl &build) { +void mergeCastleSuffixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { + if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { return; } - unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; - map<CharReach, vector<CastleProto *>> by_reach; + unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; + map<CharReach, vector<CastleProto *>> by_reach; - RoseGraph &g = build.g; + RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix.castle) { continue; } - CastleProto *c = g[v].suffix.castle.get(); + CastleProto *c = g[v].suffix.castle.get(); if (c->repeats.size() != 1) { // This code assumes it's the only place merging is being done. @@ -2804,14 +2804,14 @@ void mergeCastleSuffixes(RoseBuildImpl &build) { continue; } - if (!contains(eng_verts, c)) { + if (!contains(eng_verts, c)) { by_reach[c->reach()].push_back(c); } - eng_verts[c].push_back(v); + eng_verts[c].push_back(v); } - for (auto &chunk : by_reach | map_values) { - mergeCastleSuffixChunk(g, chunk, eng_verts); + for (auto &chunk : by_reach | map_values) { + mergeCastleSuffixChunk(g, chunk, eng_verts); } } |