diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch) | |
tree | d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | |
parent | 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff) | |
download | ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_merge.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_merge.cpp | 3616 |
1 files changed, 1808 insertions, 1808 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp index 5066dbd578..2b92d83fb4 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp @@ -1,490 +1,490 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose Build: functions for reducing the size of the Rose graph - * through merging. - */ -#include "rose_build_merge.h" - -#include "grey.h" -#include "rose_build.h" -#include "rose_build_impl.h" -#include "rose_build_util.h" -#include "ue2common.h" -#include "nfa/castlecompile.h" -#include "nfa/goughcompile.h" -#include "nfa/limex_limits.h" -#include "nfa/mcclellancompile.h" -#include "nfa/nfa_build_util.h" -#include "nfa/rdfa_merge.h" -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_haig.h" -#include "nfagraph/ng_is_equal.h" -#include "nfagraph/ng_lbr.h" -#include "nfagraph/ng_limex.h" -#include "nfagraph/ng_mcclellan.h" -#include "nfagraph/ng_puff.h" -#include "nfagraph/ng_redundancy.h" -#include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_reports.h" -#include "nfagraph/ng_stop.h" -#include "nfagraph/ng_uncalc_components.h" -#include "nfagraph/ng_util.h" -#include "nfagraph/ng_width.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose Build: functions for reducing the size of the Rose graph + * through merging. + */ +#include "rose_build_merge.h" + +#include "grey.h" +#include "rose_build.h" +#include "rose_build_impl.h" +#include "rose_build_util.h" +#include "ue2common.h" +#include "nfa/castlecompile.h" +#include "nfa/goughcompile.h" +#include "nfa/limex_limits.h" +#include "nfa/mcclellancompile.h" +#include "nfa/nfa_build_util.h" +#include "nfa/rdfa_merge.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_haig.h" +#include "nfagraph/ng_is_equal.h" +#include "nfagraph/ng_lbr.h" +#include "nfagraph/ng_limex.h" +#include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_puff.h" +#include "nfagraph/ng_redundancy.h" +#include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_reports.h" +#include "nfagraph/ng_stop.h" +#include "nfagraph/ng_uncalc_components.h" +#include "nfagraph/ng_util.h" +#include "nfagraph/ng_width.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" #include "util/hash.h" #include "util/insertion_ordered.h" -#include "util/order_check.h" -#include "util/report_manager.h" -#include "util/ue2string.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/ue2string.h" #include "util/unordered.h" - -#include <algorithm> -#include <functional> -#include <list> -#include <map> -#include <queue> -#include <set> -#include <string> -#include <vector> -#include <utility> - -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_values; + +#include <algorithm> +#include <functional> +#include <list> +#include <map> +#include <queue> +#include <set> +#include <string> +#include <vector> +#include <utility> + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; using boost::adaptors::map_keys; - -namespace ue2 { - -static const size_t NARROW_START_MAX = 10; -static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128; -static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; -static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; -static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; -static const size_t MERGE_GROUP_SIZE_MAX = 200; + +namespace ue2 { + +static const size_t NARROW_START_MAX = 10; +static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128; +static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; +static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; +static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; +static const size_t MERGE_GROUP_SIZE_MAX = 200; static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; - -/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ -static const size_t DFA_CHUNK_SIZE_MAX = 200; - -/** \brief Max DFA states in a merged DFA. */ -static const size_t DFA_MERGE_MAX_STATES = 8000; - + +/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ +static const size_t DFA_CHUNK_SIZE_MAX = 200; + +/** \brief Max DFA states in a merged DFA. */ +static const size_t DFA_MERGE_MAX_STATES = 8000; + /** \brief In block mode, merge two prefixes even if they don't have identical * literal sets if they have fewer than this many states and the merged graph * is also small. */ static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; - -static -size_t small_merge_max_vertices(const CompileContext &cc) { - return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM - : SMALL_MERGE_MAX_VERTICES_BLOCK; -} - -static -size_t small_rose_threshold(const CompileContext &cc) { - return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM - : SMALL_ROSE_THRESHOLD_BLOCK; -} - -/** - * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that - * reports should not contribute to the hash. - */ -static + +static +size_t small_merge_max_vertices(const CompileContext &cc) { + return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM + : SMALL_MERGE_MAX_VERTICES_BLOCK; +} + +static +size_t small_rose_threshold(const CompileContext &cc) { + return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM + : SMALL_ROSE_THRESHOLD_BLOCK; +} + +/** + * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that + * reports should not contribute to the hash. + */ +static size_t hashLeftfix(const left_id &left) { - size_t val = 0; - + size_t val = 0; + if (left.castle()) { hash_combine(val, left.castle()->reach()); for (const auto &pr : left.castle()->repeats) { - hash_combine(val, pr.first); // top - hash_combine(val, pr.second.bounds); - } + hash_combine(val, pr.first); // top + hash_combine(val, pr.second.bounds); + } } else if (left.graph()) { hash_combine(val, hash_holder(*left.graph())); - } - - return val; -} - -namespace { - -/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */ -struct RoseGroup { - RoseGroup(const RoseBuildImpl &build, RoseVertex v) - : left_hash(hashLeftfix(build.g[v].left)), - lag(build.g[v].left.lag), eod_table(build.isInETable(v)) { - const RoseGraph &g = build.g; - assert(in_degree(v, g) == 1); - RoseVertex u = *inv_adjacent_vertices(v, g).first; + } + + return val; +} + +namespace { + +/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */ +struct RoseGroup { + RoseGroup(const RoseBuildImpl &build, RoseVertex v) + : left_hash(hashLeftfix(build.g[v].left)), + lag(build.g[v].left.lag), eod_table(build.isInETable(v)) { + const RoseGraph &g = build.g; + assert(in_degree(v, g) == 1); + RoseVertex u = *inv_adjacent_vertices(v, g).first; parent = g[u].index; - } - - bool operator<(const RoseGroup &b) const { - const RoseGroup &a = *this; - ORDER_CHECK(parent); - ORDER_CHECK(left_hash); - ORDER_CHECK(lag); - ORDER_CHECK(eod_table); - return false; - } - -private: - /** Parent vertex index. We must use the index, rather than the descriptor, - * for compile determinism. */ - size_t parent; - - /** Quick hash of the leftfix itself. Must be identical for a given pair of - * graphs if is_equal would return true. */ - size_t left_hash; - - /** Leftfix lag value. */ - u32 lag; - - /** True if associated vertex (successor) is in the EOD table. We don't - * allow sharing of leftfix engines between "normal" and EOD operation. */ - bool eod_table; -}; - -/** + } + + bool operator<(const RoseGroup &b) const { + const RoseGroup &a = *this; + ORDER_CHECK(parent); + ORDER_CHECK(left_hash); + ORDER_CHECK(lag); + ORDER_CHECK(eod_table); + return false; + } + +private: + /** Parent vertex index. We must use the index, rather than the descriptor, + * for compile determinism. */ + size_t parent; + + /** Quick hash of the leftfix itself. Must be identical for a given pair of + * graphs if is_equal would return true. */ + size_t left_hash; + + /** Leftfix lag value. */ + u32 lag; + + /** True if associated vertex (successor) is in the EOD table. We don't + * allow sharing of leftfix engines between "normal" and EOD operation. */ + bool eod_table; +}; + +/** * Intended to find graphs that are identical except for their report * IDs. Relies on vertex and edge indices to pick up graphs that have been * messily put together in different orderings. Only implemented for castles and * holders. - */ + */ static bool is_equal(const left_id &u_left, ReportID u_report, const left_id &v_left, ReportID v_report) { if (u_left.castle() && v_left.castle()) { return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); } - + if (!u_left.graph() || !v_left.graph()) { return false; - } - + } + return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); } - -} // namespace - -/** - * This pass performs work similar to \ref dedupeSuffixes - it removes - * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and - * share the same trigger vertex and lag. Leftfixes are first grouped by - * parent role and lag to reduce the number of candidates to be inspected - * for each leftfix. The graphs in each cluster are then compared with each - * other and the graph is updated to only refer to a canonical version of each - * graph. - * - * Note: only roles with a single predecessor vertex are considered for this - * transform - it should probably be generalised to work for roles which share + +} // namespace + +/** + * This pass performs work similar to \ref dedupeSuffixes - it removes + * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and + * share the same trigger vertex and lag. Leftfixes are first grouped by + * parent role and lag to reduce the number of candidates to be inspected + * for each leftfix. The graphs in each cluster are then compared with each + * other and the graph is updated to only refer to a canonical version of each + * graph. + * + * Note: only roles with a single predecessor vertex are considered for this + * transform - it should probably be generalised to work for roles which share * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or * it should be retired entirely. - */ -bool dedupeLeftfixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("deduping leftfixes\n"); - map<RoseGroup, deque<RoseVertex>> roses; - bool work_done = false; - - /* Note: a leftfix's transientness will not be altered by deduping */ - - // Collect leftfixes into groups. - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (!g[v].left) { - continue; - } - const left_id left(g[v].left); - - if (left.haig()) { - /* TODO: allow merging of identical haigs */ - continue; - } - - if (in_degree(v, g) != 1) { - continue; - } - - roses[RoseGroup(tbi, v)].push_back(v); - } - - DEBUG_PRINTF("collected %zu rose groups\n", roses.size()); - - // Walk groups and dedupe the roses therein. - for (deque<RoseVertex> &verts : roses | map_values) { - DEBUG_PRINTF("group has %zu vertices\n", verts.size()); - + */ +bool dedupeLeftfixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("deduping leftfixes\n"); + map<RoseGroup, deque<RoseVertex>> roses; + bool work_done = false; + + /* Note: a leftfix's transientness will not be altered by deduping */ + + // Collect leftfixes into groups. + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (!g[v].left) { + continue; + } + const left_id left(g[v].left); + + if (left.haig()) { + /* TODO: allow merging of identical haigs */ + continue; + } + + if (in_degree(v, g) != 1) { + continue; + } + + roses[RoseGroup(tbi, v)].push_back(v); + } + + DEBUG_PRINTF("collected %zu rose groups\n", roses.size()); + + // Walk groups and dedupe the roses therein. + for (deque<RoseVertex> &verts : roses | map_values) { + DEBUG_PRINTF("group has %zu vertices\n", verts.size()); + unordered_set<left_id> seen; - - for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { - RoseVertex v = *jt; - left_id left(g[v].left); - - // Skip cases we've already handled, and mark as seen otherwise. - if (!seen.insert(left).second) { - continue; - } - - // Scan the rest of the list for dupes. + + for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { + RoseVertex v = *jt; + left_id left(g[v].left); + + // Skip cases we've already handled, and mark as seen otherwise. + if (!seen.insert(left).second) { + continue; + } + + // Scan the rest of the list for dupes. for (auto kt = std::next(jt); kt != jte; ++kt) { if (g[v].left == g[*kt].left || !is_equal(g[v].left, g[v].left.leftfix_report, g[*kt].left, g[*kt].left.leftfix_report)) { - continue; - } - - // Dupe found. - DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", + continue; + } + + // Dupe found. + DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", g[*kt].index, g[v].index); - assert(g[v].left.lag == g[*kt].left.lag); - g[*kt].left = g[v].left; - work_done = true; - } - } - } - - return work_done; -} - -/** - * \brief Returns a numeric key that can be used to group this suffix with - * others that may be its duplicate. - */ -static -size_t suffix_size_key(const suffix_id &s) { - if (s.graph()) { - return num_vertices(*s.graph()); - } - if (s.castle()) { - return s.castle()->repeats.size(); - } - return 0; -} - -static -bool is_equal(const suffix_id &s1, const suffix_id &s2) { - if (s1.graph() && s2.graph()) { - return is_equal(*s1.graph(), *s2.graph()); - } else if (s1.castle() && s2.castle()) { - return is_equal(*s1.castle(), *s2.castle()); - } - return false; -} - -/** - * This function simply looks for suffix NGHolder graphs which are identical - * and updates the roles in the RoseGraph to refer to only a single copy. This - * obviously has benefits in terms of both performance (as we don't run - * multiple engines doing the same work) and stream state. This function first - * groups all suffixes by number of vertices and report set to restrict the set - * of possible candidates. Each group is then walked to find duplicates using - * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it - * goes. - * - * Note: does not dedupe suffixes of vertices in the EOD table. - */ -void dedupeSuffixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("deduping suffixes\n"); - + assert(g[v].left.lag == g[*kt].left.lag); + g[*kt].left = g[v].left; + work_done = true; + } + } + } + + return work_done; +} + +/** + * \brief Returns a numeric key that can be used to group this suffix with + * others that may be its duplicate. + */ +static +size_t suffix_size_key(const suffix_id &s) { + if (s.graph()) { + return num_vertices(*s.graph()); + } + if (s.castle()) { + return s.castle()->repeats.size(); + } + return 0; +} + +static +bool is_equal(const suffix_id &s1, const suffix_id &s2) { + if (s1.graph() && s2.graph()) { + return is_equal(*s1.graph(), *s2.graph()); + } else if (s1.castle() && s2.castle()) { + return is_equal(*s1.castle(), *s2.castle()); + } + return false; +} + +/** + * This function simply looks for suffix NGHolder graphs which are identical + * and updates the roles in the RoseGraph to refer to only a single copy. This + * obviously has benefits in terms of both performance (as we don't run + * multiple engines doing the same work) and stream state. This function first + * groups all suffixes by number of vertices and report set to restrict the set + * of possible candidates. Each group is then walked to find duplicates using + * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it + * goes. + * + * Note: does not dedupe suffixes of vertices in the EOD table. + */ +void dedupeSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("deduping suffixes\n"); + unordered_map<suffix_id, set<RoseVertex>> suffix_map; - map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; - - // Collect suffixes into groups. - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (!g[v].suffix || tbi.isInETable(v)) { - continue; - } - - const suffix_id s(g[v].suffix); - - if (!(s.graph() || s.castle())) { - continue; // e.g. Haig - } - - set<RoseVertex> &verts = suffix_map[s]; - if (verts.empty()) { - part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s); - } - verts.insert(v); - } - - DEBUG_PRINTF("collected %zu groups\n", part.size()); - - for (const auto &cand : part | map_values) { - if (cand.size() <= 1) { - continue; - } - DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size()); - - for (auto jt = cand.begin(); jt != cand.end(); ++jt) { - if (suffix_map[*jt].empty()) { - continue; - } - for (auto kt = next(jt); kt != cand.end(); ++kt) { - if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) { - continue; - } - DEBUG_PRINTF("found dupe\n"); - for (auto v : suffix_map[*kt]) { - RoseVertex dupe = *suffix_map[*jt].begin(); - assert(dupe != v); - g[v].suffix.graph = g[dupe].suffix.graph; - g[v].suffix.castle = g[dupe].suffix.castle; - assert(suffix_id(g[v].suffix) == - suffix_id(g[dupe].suffix)); - suffix_map[*jt].insert(v); - } - suffix_map[*kt].clear(); - } - } - } -} - -namespace { - -/** - * This class stores a mapping from an engine reference (left_id, suffix_id, - * etc) to a list of vertices, and also allows us to iterate over the set of - * engine references in insertion order -- we add to the mapping in vertex - * iteration order, so this allows us to provide a consistent ordering. - */ -template<class EngineRef> -class Bouquet { -private: - list<EngineRef> ordering; // Unique list in insert order. + map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; + + // Collect suffixes into groups. + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix || tbi.isInETable(v)) { + continue; + } + + const suffix_id s(g[v].suffix); + + if (!(s.graph() || s.castle())) { + continue; // e.g. Haig + } + + set<RoseVertex> &verts = suffix_map[s]; + if (verts.empty()) { + part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s); + } + verts.insert(v); + } + + DEBUG_PRINTF("collected %zu groups\n", part.size()); + + for (const auto &cand : part | map_values) { + if (cand.size() <= 1) { + continue; + } + DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size()); + + for (auto jt = cand.begin(); jt != cand.end(); ++jt) { + if (suffix_map[*jt].empty()) { + continue; + } + for (auto kt = next(jt); kt != cand.end(); ++kt) { + if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) { + continue; + } + DEBUG_PRINTF("found dupe\n"); + for (auto v : suffix_map[*kt]) { + RoseVertex dupe = *suffix_map[*jt].begin(); + assert(dupe != v); + g[v].suffix.graph = g[dupe].suffix.graph; + g[v].suffix.castle = g[dupe].suffix.castle; + assert(suffix_id(g[v].suffix) == + suffix_id(g[dupe].suffix)); + suffix_map[*jt].insert(v); + } + suffix_map[*kt].clear(); + } + } + } +} + +namespace { + +/** + * This class stores a mapping from an engine reference (left_id, suffix_id, + * etc) to a list of vertices, and also allows us to iterate over the set of + * engine references in insertion order -- we add to the mapping in vertex + * iteration order, so this allows us to provide a consistent ordering. + */ +template<class EngineRef> +class Bouquet { +private: + list<EngineRef> ordering; // Unique list in insert order. using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; - BouquetMap bouquet; -public: - void insert(const EngineRef &h, RoseVertex v) { - typename BouquetMap::iterator f = bouquet.find(h); - if (f == bouquet.end()) { - ordering.push_back(h); - bouquet[h].push_back(v); - } else { - f->second.push_back(v); - } - } - - void insert(const EngineRef &h, const deque<RoseVertex> &verts) { - typename BouquetMap::iterator f = bouquet.find(h); - if (f == bouquet.end()) { - ordering.push_back(h); - bouquet.insert(make_pair(h, verts)); - } else { - f->second.insert(f->second.end(), verts.begin(), verts.end()); - } - } - - const deque<RoseVertex> &vertices(const EngineRef &h) const { - typename BouquetMap::const_iterator it = bouquet.find(h); - assert(it != bouquet.end()); // must be present - return it->second; - } - - void erase(const EngineRef &h) { - assert(bouquet.find(h) != bouquet.end()); - bouquet.erase(h); - ordering.remove(h); - } - - /** Remove all the elements in the given iterator range. */ - template <class Iter> - void erase_all(Iter erase_begin, Iter erase_end) { - for (Iter it = erase_begin; it != erase_end; ++it) { - bouquet.erase(*it); - } - - // Use a quick-lookup container so that we only have to traverse the - // 'ordering' list once. - const set<EngineRef> dead(erase_begin, erase_end); - for (iterator it = begin(); it != end(); /* incremented inside */) { - if (contains(dead, *it)) { - ordering.erase(it++); - } else { - ++it; - } - } - } - - void clear() { - ordering.clear(); - bouquet.clear(); - } - - size_t size() const { return bouquet.size(); } - - // iterate over holders in insert order - typedef typename list<EngineRef>::iterator iterator; - iterator begin() { return ordering.begin(); } - iterator end() { return ordering.end(); } - - // const iterate over holders in insert order - typedef typename list<EngineRef>::const_iterator const_iterator; - const_iterator begin() const { return ordering.begin(); } - const_iterator end() const { return ordering.end(); } -}; - + BouquetMap bouquet; +public: + void insert(const EngineRef &h, RoseVertex v) { + typename BouquetMap::iterator f = bouquet.find(h); + if (f == bouquet.end()) { + ordering.push_back(h); + bouquet[h].push_back(v); + } else { + f->second.push_back(v); + } + } + + void insert(const EngineRef &h, const deque<RoseVertex> &verts) { + typename BouquetMap::iterator f = bouquet.find(h); + if (f == bouquet.end()) { + ordering.push_back(h); + bouquet.insert(make_pair(h, verts)); + } else { + f->second.insert(f->second.end(), verts.begin(), verts.end()); + } + } + + const deque<RoseVertex> &vertices(const EngineRef &h) const { + typename BouquetMap::const_iterator it = bouquet.find(h); + assert(it != bouquet.end()); // must be present + return it->second; + } + + void erase(const EngineRef &h) { + assert(bouquet.find(h) != bouquet.end()); + bouquet.erase(h); + ordering.remove(h); + } + + /** Remove all the elements in the given iterator range. */ + template <class Iter> + void erase_all(Iter erase_begin, Iter erase_end) { + for (Iter it = erase_begin; it != erase_end; ++it) { + bouquet.erase(*it); + } + + // Use a quick-lookup container so that we only have to traverse the + // 'ordering' list once. + const set<EngineRef> dead(erase_begin, erase_end); + for (iterator it = begin(); it != end(); /* incremented inside */) { + if (contains(dead, *it)) { + ordering.erase(it++); + } else { + ++it; + } + } + } + + void clear() { + ordering.clear(); + bouquet.clear(); + } + + size_t size() const { return bouquet.size(); } + + // iterate over holders in insert order + typedef typename list<EngineRef>::iterator iterator; + iterator begin() { return ordering.begin(); } + iterator end() { return ordering.end(); } + + // const iterate over holders in insert order + typedef typename list<EngineRef>::const_iterator const_iterator; + const_iterator begin() const { return ordering.begin(); } + const_iterator end() const { return ordering.end(); } +}; + typedef Bouquet<left_id> LeftfixBouquet; -typedef Bouquet<suffix_id> SuffixBouquet; - -} // namespace - -/** - * Split a \ref Bouquet of some type into several smaller ones. - */ -template <class EngineRef> -static void chunkBouquets(const Bouquet<EngineRef> &in, - deque<Bouquet<EngineRef>> &out, - const size_t chunk_size) { - if (in.size() <= chunk_size) { - out.push_back(in); - return; - } - - out.push_back(Bouquet<EngineRef>()); - for (const auto &engine : in) { - if (out.back().size() >= chunk_size) { - out.push_back(Bouquet<EngineRef>()); - } - out.back().insert(engine, in.vertices(engine)); - } -} - +typedef Bouquet<suffix_id> SuffixBouquet; + +} // namespace + +/** + * Split a \ref Bouquet of some type into several smaller ones. + */ +template <class EngineRef> +static void chunkBouquets(const Bouquet<EngineRef> &in, + deque<Bouquet<EngineRef>> &out, + const size_t chunk_size) { + if (in.size() <= chunk_size) { + out.push_back(in); + return; + } + + out.push_back(Bouquet<EngineRef>()); + for (const auto &engine : in) { + if (out.back().size() >= chunk_size) { + out.push_back(Bouquet<EngineRef>()); + } + out.back().insert(engine, in.vertices(engine)); + } +} + static bool stringsCanFinishAtSameSpot(const ue2_literal &u, ue2_literal::const_iterator v_b, @@ -504,31 +504,31 @@ bool stringsCanFinishAtSameSpot(const ue2_literal &u, return true; } -/** +/** * Check that if after u has been seen, that it is impossible for the arrival of * v to require the inspection of an engine earlier than u did. - * + * * Let delta be the earliest that v can be seen after u (may be zero) * * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) * ==> - ulag <= delta - vlag * ==> vlag - ulag <= delta - */ -static -bool checkPrefix(const rose_literal_id &ul, const u32 ulag, - const rose_literal_id &vl, const u32 vlag) { + */ +static +bool checkPrefix(const rose_literal_id &ul, const u32 ulag, + const rose_literal_id &vl, const u32 vlag) { DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, escapeString(vl.s).c_str(), vlag); if (vl.delay || ul.delay) { /* engine related literals should not be delayed anyway */ - return false; - } - + return false; + } + if (ulag >= vlag) { assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); return true; - } + } size_t min_allowed_delta = vlag - ulag; DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); @@ -542,20 +542,20 @@ bool checkPrefix(const rose_literal_id &ul, const u32 ulag, DEBUG_PRINTF("OK\n"); return true; -} - +} + static bool hasSameEngineType(const RoseVertexProps &u_prop, const RoseVertexProps &v_prop) { const left_id u_left = u_prop.left; const left_id v_left = v_prop.left; - + return !u_left.haig() == !v_left.haig() && !u_left.dfa() == !v_left.dfa() && !u_left.castle() == !v_left.castle() && !u_left.graph() == !v_left.graph(); } - + /** * Verifies that merging the leftfix of vertices does not cause conflicts due * to the literals on the right. @@ -577,25 +577,25 @@ bool compatibleLiteralsForMerge( // We cannot merge engines that prefix literals in different tables. if (ulits[0].first->table != vlits[0].first->table) { - DEBUG_PRINTF("literals in different tables\n"); - return false; - } - + DEBUG_PRINTF("literals in different tables\n"); + return false; + } + // We don't handle delayed cases yet. for (const auto &ue : ulits) { const rose_literal_id &ul = *ue.first; if (ul.delay) { - return false; - } - } - + return false; + } + } + for (const auto &ve : vlits) { const rose_literal_id &vl = *ve.first; if (vl.delay) { - return false; - } - } - + return false; + } + } + /* An engine requires that all accesses to it are ordered by offsets. (ie, we can not check an engine's state at offset Y, if we have already checked its status at offset X and X > Y). If we can not establish that @@ -614,9 +614,9 @@ bool compatibleLiteralsForMerge( DEBUG_PRINTF("prefix check failed\n"); return false; } - } - } - + } + } + return true; } @@ -647,8 +647,8 @@ bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, // mergeableRoseVertices). if (!build.isRootSuccessor(u)) { return true; - } - + } + const RoseGraph &g = build.g; // Merge prefixes with identical literal sets (as we'd have to run them @@ -725,43 +725,43 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, return false; } - /* We cannot merge prefixes/vertices if they are successors of different - * root vertices */ - if (tbi.isRootSuccessor(u)) { - assert(tbi.isRootSuccessor(v)); - set<RoseVertex> u_preds; - set<RoseVertex> v_preds; - insert(&u_preds, inv_adjacent_vertices(u, tbi.g)); - insert(&v_preds, inv_adjacent_vertices(v, tbi.g)); - - if (u_preds != v_preds) { - return false; - } - } - + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices */ + if (tbi.isRootSuccessor(u)) { + assert(tbi.isRootSuccessor(v)); + set<RoseVertex> u_preds; + set<RoseVertex> v_preds; + insert(&u_preds, inv_adjacent_vertices(u, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + u32 ulag = tbi.g[u].left.lag; vector<pair<const rose_literal_id *, u32>> ulits; ulits.reserve(tbi.g[u].literals.size()); for (u32 id : tbi.g[u].literals) { ulits.emplace_back(&tbi.literals.at(id), ulag); } - + u32 vlag = tbi.g[v].left.lag; vector<pair<const rose_literal_id *, u32>> vlits; vlits.reserve(tbi.g[v].literals.size()); for (u32 id : tbi.g[v].literals) { vlits.emplace_back(&tbi.literals.at(id), vlag); } - + if (!compatibleLiteralsForMerge(ulits, vlits)) { return false; - } - + } + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, tbi.g[v].index); - return true; -} - + return true; +} + /* We cannot merge an engine, if a trigger literal and a post literal overlap * in such a way that engine status needs to be check at a point before the * engine's current location. @@ -773,32 +773,32 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, * ==> delta >= v_lag * */ -static +static bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, u32 vlag) { DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), escapeString(vl.s).c_str(), vlag); - + for (size_t i = 0; i < vlag; i++) { if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); return false; - } - } + } + } DEBUG_PRINTF("OK\n"); - return true; -} - + return true; +} + template<typename VertexCont> static never_inline bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, const VertexCont &v2) { flat_set<RoseVertex> preds; - for (auto v : v1) { + for (auto v : v1) { insert(&preds, inv_adjacent_vertices(v, build.g)); - } - + } + flat_set<u32> pred_lits; /* No need to examine delays of a common pred - as it must already have @@ -811,7 +811,7 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); } - for (auto u : preds) { + for (auto u : preds) { if (!contains(known_good_preds, u)) { insert(&pred_lits, build.g[u].literals); } @@ -838,17 +838,17 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, if (!checkPredDelay(*ul, vl, vlag)) { return false; } - } - } - } - - return true; -} - -static -bool mergeableRoseVertices(const RoseBuildImpl &tbi, - const deque<RoseVertex> &verts1, - const deque<RoseVertex> &verts2) { + } + } + } + + return true; +} + +static +bool mergeableRoseVertices(const RoseBuildImpl &tbi, + const deque<RoseVertex> &verts1, + const deque<RoseVertex> &verts2) { assert(!verts1.empty()); assert(!verts2.empty()); @@ -874,9 +874,9 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, if (u_preds != v_preds) { return false; - } - } - + } + } + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ for (auto a : verts1) { if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { @@ -905,90 +905,90 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, return false; } - // Check preds are compatible as well. + // Check preds are compatible as well. if (!checkPredDelays(tbi, verts1, verts2) || !checkPredDelays(tbi, verts2, verts1)) { - return false; - } - - DEBUG_PRINTF("vertex sets are mergeable\n"); - return true; -} - -bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1, - const set<RoseVertex> &v2) { - const deque<RoseVertex> vv1(v1.begin(), v1.end()); - const deque<RoseVertex> vv2(v2.begin(), v2.end()); - return mergeableRoseVertices(tbi, vv1, vv2); -} - -/** \brief Priority queue element for Rose merges. */ -namespace { -struct RoseMergeCandidate { - RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in, - u32 tb) - : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0), - tie_breaker(tb) { - if (r1.graph() && r2.graph()) { - const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph(); - /* som_none as haigs don't merge and just a guiding heuristic */ - CharReach stop1 = findStopAlphabet(h1, SOM_NONE); - CharReach stop2 = findStopAlphabet(h2, SOM_NONE); - stopxor = (stop1 ^ stop2).count(); - - // We use the number of vertices as an approximation of the state - // count here, as this is just feeding a comparison. - u32 vertex_count = num_vertices(h1) + num_vertices(h2); - states = vertex_count - min(vertex_count, cpl); - } else if (r1.castle() && r2.castle()) { - // FIXME - } - } - - bool operator<(const RoseMergeCandidate &a) const { - if (stopxor != a.stopxor) { - return stopxor > a.stopxor; - } - if (cpl != a.cpl) { - return cpl < a.cpl; - } - if (states != a.states) { - return states > a.states; - } - return tie_breaker < a.tie_breaker; - } - - left_id r1; - left_id r2; - u32 stopxor; - u32 cpl; //!< common prefix length - u32 states; - u32 tie_breaker; //!< determinism -}; -} - -static + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1, + const set<RoseVertex> &v2) { + const deque<RoseVertex> vv1(v1.begin(), v1.end()); + const deque<RoseVertex> vv2(v2.begin(), v2.end()); + return mergeableRoseVertices(tbi, vv1, vv2); +} + +/** \brief Priority queue element for Rose merges. */ +namespace { +struct RoseMergeCandidate { + RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in, + u32 tb) + : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0), + tie_breaker(tb) { + if (r1.graph() && r2.graph()) { + const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph(); + /* som_none as haigs don't merge and just a guiding heuristic */ + CharReach stop1 = findStopAlphabet(h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(h2, SOM_NONE); + stopxor = (stop1 ^ stop2).count(); + + // We use the number of vertices as an approximation of the state + // count here, as this is just feeding a comparison. + u32 vertex_count = num_vertices(h1) + num_vertices(h2); + states = vertex_count - min(vertex_count, cpl); + } else if (r1.castle() && r2.castle()) { + // FIXME + } + } + + bool operator<(const RoseMergeCandidate &a) const { + if (stopxor != a.stopxor) { + return stopxor > a.stopxor; + } + if (cpl != a.cpl) { + return cpl < a.cpl; + } + if (states != a.states) { + return states > a.states; + } + return tie_breaker < a.tie_breaker; + } + + left_id r1; + left_id r2; + u32 stopxor; + u32 cpl; //!< common prefix length + u32 states; + u32 tie_breaker; //!< determinism +}; +} + +static bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, const vector<RoseVertex> &verts1, const vector<RoseVertex> &verts2) { - assert(!verts1.empty() && !verts2.empty()); - + assert(!verts1.empty() && !verts2.empty()); + DEBUG_PRINTF("merging pair of leftfixes:\n"); DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), as_string_list(all_tops(r1)).c_str()); DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), as_string_list(all_tops(r2)).c_str()); - + RoseGraph &g = build.g; - if (r1.graph()) { - assert(r2.graph()); - assert(r1.graph()->kind == r2.graph()->kind); + if (r1.graph()) { + assert(r2.graph()); + assert(r1.graph()->kind == r2.graph()->kind); if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { - DEBUG_PRINTF("nfa merge failed\n"); - return false; - } - + DEBUG_PRINTF("nfa merge failed\n"); + return false; + } + /* The graph in r1 has been merged into the graph in r2. Update r1's * vertices with the new graph ptr. mergeNfaPair() does not alter the * tops from the input graph so no need to update top values. @@ -997,38 +997,38 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, * distinct when they have different trigger conditions. * [Note: mergeLeftfixesVariableLag() should have a common parent set] */ - shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; - for (RoseVertex v : verts1) { - g[v].left.graph = h; - } - - return true; - } else if (r1.castle()) { - assert(r2.castle()); + shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; + for (RoseVertex v : verts1) { + g[v].left.graph = h; + } + + return true; + } else if (r1.castle()) { + assert(r2.castle()); assert(build.cc.grey.allowCastle); - - map<u32, u32> top_map; - if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { - DEBUG_PRINTF("castle merge failed\n"); - return false; - } - - // The castle in r1 has been merged into the castle in r2, with tops - // remapped as per top_map. - const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle; - for (RoseVertex v : verts1) { - g[v].left.castle = c; - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - return true; - } - - assert(0); - return false; -} - + + map<u32, u32> top_map; + if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { + DEBUG_PRINTF("castle merge failed\n"); + return false; + } + + // The castle in r1 has been merged into the castle in r2, with tops + // remapped as per top_map. + const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle; + for (RoseVertex v : verts1) { + g[v].left.castle = c; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + return true; + } + + assert(0); + return false; +} + /** * Checks that there is no problem due to the involved vertices if we merge two * leftfix engines. @@ -1039,13 +1039,13 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, * - check that engines themselves can be merged * - use heuristics to find out if merging the engines is wise. */ -static +static bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, const vector<RoseVertex> &targets_1, const vector<RoseVertex> &targets_2) { assert(!targets_1.empty()); assert(!targets_2.empty()); - + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ for (auto a : targets_1) { u32 ulag = build.g[a].left.lag; @@ -1053,7 +1053,7 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, ulits.emplace_back(&build.literals.at(id), ulag); } } - + vector<pair<const rose_literal_id *, u32>> vlits; for (auto a : targets_2) { u32 vlag = build.g[a].left.lag; @@ -1061,21 +1061,21 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, vlits.emplace_back(&build.literals.at(id), vlag); } } - + if (!compatibleLiteralsForMerge(ulits, vlits)) { return false; } - + // Check preds are compatible as well. if (!checkPredDelays(build, targets_1, targets_2) || !checkPredDelays(build, targets_2, targets_1)) { return false; } - + DEBUG_PRINTF("vertex sets are mergeable\n"); return true; } - + /** * In block mode, we want to be a little more selective -- we will only merge * prefix engines when the literal sets are the same or if the merged graph @@ -1087,13 +1087,13 @@ bool goodBlockModeMerge(const RoseBuildImpl &build, const vector<RoseVertex> &v_verts, const left_id &v_eng) { assert(!build.cc.streaming); - + // Always merge infixes if we can (subject to the other criteria in // mergeableRoseVertices). if (!build.isRootSuccessor(u_verts.front())) { return true; } - + const RoseGraph &g = build.g; flat_set<u32> u_lits; @@ -1197,20 +1197,20 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, && (stop1.count() > 10 || stop2.count() > 10)) { DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); return false; - } + } size_t maxstop = max(stop1.count(), stop2.count()); if (maxstop > 200 && stopboth.count() < 200) { DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); return false; } } - + /* Rechecking that the targets are compatible, as we may have already * merged new states into r1 or r2 and we need to verify that this * candidate is still ok. */ if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { return false; - } + } if (!build.cc.streaming && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { @@ -1218,62 +1218,62 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, } return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); -} - -static -bool nfaHasNarrowStart(const NGHolder &g) { +} + +static +bool nfaHasNarrowStart(const NGHolder &g) { if (out_degree(g.startDs, g) > 1) { - return false; // unanchored - } - - CharReach cr; - - for (auto v : adjacent_vertices_range(g.start, g)) { - if (v == g.startDs) { - continue; - } - cr |= g[v].char_reach; - } - return cr.count() <= NARROW_START_MAX; -} - -static -bool nfaHasFiniteMaxWidth(const NGHolder &g) { - return findMaxWidth(g).is_finite(); -} - -static -bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { - if (!proper_out_degree(h.startDs, h)) { - return false; - } - - assert(!is_triggered(h)); - - NGHolder h_temp; - cloneHolder(h_temp, h); - - vector<BoundedRepeatData> repeats; - bool suitable_for_sds_reforming = false; - const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */ - const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */ - const bool simple_model_selection = true; // FIRST is considered simple - analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true, - simple_model_selection, grey, &suitable_for_sds_reforming); - - return suitable_for_sds_reforming; -} - -static -u32 commonPrefixLength(left_id &r1, left_id &r2) { - if (r1.graph() && r2.graph()) { + return false; // unanchored + } + + CharReach cr; + + for (auto v : adjacent_vertices_range(g.start, g)) { + if (v == g.startDs) { + continue; + } + cr |= g[v].char_reach; + } + return cr.count() <= NARROW_START_MAX; +} + +static +bool nfaHasFiniteMaxWidth(const NGHolder &g) { + return findMaxWidth(g).is_finite(); +} + +static +bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { + if (!proper_out_degree(h.startDs, h)) { + return false; + } + + assert(!is_triggered(h)); + + NGHolder h_temp; + cloneHolder(h_temp, h); + + vector<BoundedRepeatData> repeats; + bool suitable_for_sds_reforming = false; + const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */ + const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */ + const bool simple_model_selection = true; // FIRST is considered simple + analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true, + simple_model_selection, grey, &suitable_for_sds_reforming); + + return suitable_for_sds_reforming; +} + +static +u32 commonPrefixLength(left_id &r1, left_id &r2) { + if (r1.graph() && r2.graph()) { return commonPrefixLength(*r1.graph(), *r2.graph()); - } else if (r1.castle() && r2.castle()) { - return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); - } - return 0; -} - + } else if (r1.castle() && r2.castle()) { + return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); + } + return 0; +} + namespace { struct MergeKey { MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : @@ -1352,89 +1352,89 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { return eng_verts; } -/** - * This pass attempts to merge prefix/infix engines which share a common set of - * parent vertices. - * - * Engines are greedily merged pairwise by this process based on a priority - * queue keyed off the common prefix length. - * - * Engines are not merged if the lags are not compatible or if it would damage - * the stop alphabet. - * - * Infixes: +/** + * This pass attempts to merge prefix/infix engines which share a common set of + * parent vertices. + * + * Engines are greedily merged pairwise by this process based on a priority + * queue keyed off the common prefix length. + * + * Engines are not merged if the lags are not compatible or if it would damage + * the stop alphabet. + * + * Infixes: * - It is expected that when this is run all infixes are still at the single * top stage as we have not yet merged unrelated infixes together. After * execution, castles may have multiple (but equivalent) tops. - * - * Prefixes: - * - transient prefixes are not considered. - * - with a max width or a narrow start are kept segregated by - * this phase and can only be merged with similar infixes. - * - in block mode, merges are only performed if literal sets are the same. - * - merges are not considered in cases where dot star start state will be - * reformed to optimise a leading repeat. - */ + * + * Prefixes: + * - transient prefixes are not considered. + * - with a max width or a narrow start are kept segregated by + * this phase and can only be merged with similar infixes. + * - in block mode, merges are only performed if literal sets are the same. + * - merges are not considered in cases where dot star start state will be + * reformed to optimise a leading repeat. + */ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { if (!build.cc.grey.mergeRose) { - return; - } + return; + } assert(!hasOrphanedTops(build)); - + RoseGraph &g = build.g; - - DEBUG_PRINTF("-----\n"); - DEBUG_PRINTF("entry\n"); - DEBUG_PRINTF("-----\n"); - + + DEBUG_PRINTF("-----\n"); + DEBUG_PRINTF("entry\n"); + DEBUG_PRINTF("-----\n"); + auto eng_verts = get_eng_verts(g); - + map<MergeKey, vector<left_id>> engine_groups; for (const auto &e : eng_verts) { const left_id &left = e.first; const auto &verts = e.second; - // Only non-transient for the moment. + // Only non-transient for the moment. if (contains(build.transient, left)) { - continue; - } - - // No forced McClellan or Haig infix merges. + continue; + } + + // No forced McClellan or Haig infix merges. if (left.dfa() || left.haig()) { - continue; - } + continue; + } assert(left.graph() || left.castle()); - + if (left.graph()) { const NGHolder &h = *left.graph(); /* we should not have merged yet */ assert(!is_triggered(h) || onlyOneTop(h)); - + if (hasReformedStartDotStar(h, build.cc.grey)) { - continue; // preserve the optimisation of the leading repeat - } + continue; // preserve the optimisation of the leading repeat + } } else { assert(left.castle()); - + if (!build.cc.grey.allowCastle) { DEBUG_PRINTF("castle merging disallowed by greybox\n"); - continue; - } - } - - // We collapse the anchored root into the root vertex when calculating - // parents, so that we can merge differently-anchored prefix roses - // together. (Prompted by UE-2100) - + continue; + } + } + + // We collapse the anchored root into the root vertex when calculating + // parents, so that we can merge differently-anchored prefix roses + // together. (Prompted by UE-2100) + flat_set<RoseVertex> parents; for (RoseVertex v : verts) { insert(&parents, inv_adjacent_vertices_range(v, g)); - } - + } + if (contains(parents, build.anchored_root)) { parents.erase(build.anchored_root); parents.insert(build.root); - } - + } + assert(!parents.empty()); #ifndef _WIN32 @@ -1450,8 +1450,8 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { MergeKey *mk = new MergeKey(left, parents); engine_groups[*mk].push_back(left); #endif - } - + } + vector<vector<left_id>> chunks; for (auto &raw_group : engine_groups | map_values) { chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); @@ -1462,37 +1462,37 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { for (auto &roses : chunks) { if (roses.size() < 2) { - continue; - } + continue; + } // All pairs on the prio queue. u32 tie_breaker = 0; priority_queue<RoseMergeCandidate> pq; for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { left_id r1 = *it; const vector<RoseVertex> &targets_1 = eng_verts[r1]; - + for (auto jt = next(it); jt != ite; ++jt) { left_id r2 = *jt; - + /* we should have already split on engine types and reach */ assert(!r1.castle() == !r2.castle()); assert(!r1.graph() == !r2.graph()); assert(!r1.castle() || r1.castle()->reach() == r2.castle()->reach()); - + const vector<RoseVertex> &targets_2 = eng_verts[r2]; if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { continue; // No point queueing unmergeable cases. } - + u32 cpl = commonPrefixLength(r1, r2); pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); } } - + DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); - + while (!pq.empty()) { left_id r1 = pq.top().r1; left_id r2 = pq.top().r2; @@ -1505,47 +1505,47 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { targets_2)) { insert(&targets_2, targets_2.end(), targets_1); targets_1.clear(); - } - } - } - - DEBUG_PRINTF("-----\n"); - DEBUG_PRINTF("exit\n"); - DEBUG_PRINTF("-----\n"); + } + } + } + + DEBUG_PRINTF("-----\n"); + DEBUG_PRINTF("exit\n"); + DEBUG_PRINTF("-----\n"); assert(!hasOrphanedTops(build)); -} - -namespace { - -/** - * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. - */ -struct DedupeLeftKey { +} + +namespace { + +/** + * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. + */ +struct DedupeLeftKey { DedupeLeftKey(const RoseBuildImpl &build, flat_set<pair<size_t, u32>> preds_in, const left_id &left) : left_hash(hashLeftfix(left)), preds(move(preds_in)), transient(contains(build.transient, left)) { - } - - bool operator<(const DedupeLeftKey &b) const { + } + + bool operator<(const DedupeLeftKey &b) const { return tie(left_hash, preds, transient) < tie(b.left_hash, b.preds, b.transient); - } - -private: - /** Quick hash of the leftfix itself. Must be identical for a given pair of - * graphs if is_equal would return true. */ - size_t left_hash; - - /** For each in-edge, the pair of (parent index, edge top). */ + } + +private: + /** Quick hash of the leftfix itself. Must be identical for a given pair of + * graphs if is_equal would return true. */ + size_t left_hash; + + /** For each in-edge, the pair of (parent index, edge top). */ flat_set<pair<size_t, u32>> preds; /** We don't want to combine transient with non-transient. */ bool transient; -}; - -} // namespace - +}; + +} // namespace + static flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { flat_set<pair<size_t, u32>> preds; @@ -1555,50 +1555,50 @@ flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { return preds; } -/** - * This is a generalisation of \ref dedupeLeftfixes which relaxes two - * restrictions: multiple predecessor roles are allowed and the delay used by - * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes, - * the leftfixes' successor vertices are first grouped to reduce the number of - * potential candidates - the grouping in this case is by the set of - * predecessor roles with their associated top events. For the dedupe to be - * possible, it is required that: - * - * 1. the nfa graphs with respect to the relevant reports are identical - * 2. the nfa graphs are triggered by the same roles with same events (ensured - * by the initial grouping pass) - * 3. all the successor roles of either graph can inspect the combined leftfix - * without advancing the state of the leftfix past the point that another - * successor may want to inspect it; the overlap relationships between the - * involved literals are examined to ensure that this property holds. - * +/** + * This is a generalisation of \ref dedupeLeftfixes which relaxes two + * restrictions: multiple predecessor roles are allowed and the delay used by + * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes, + * the leftfixes' successor vertices are first grouped to reduce the number of + * potential candidates - the grouping in this case is by the set of + * predecessor roles with their associated top events. For the dedupe to be + * possible, it is required that: + * + * 1. the nfa graphs with respect to the relevant reports are identical + * 2. the nfa graphs are triggered by the same roles with same events (ensured + * by the initial grouping pass) + * 3. all the successor roles of either graph can inspect the combined leftfix + * without advancing the state of the leftfix past the point that another + * successor may want to inspect it; the overlap relationships between the + * involved literals are examined to ensure that this property holds. + * * Note: this is unable to dedupe when delayed literals are involved unlike * dedupeLeftfixes. - */ + */ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { - DEBUG_PRINTF("entry\n"); - + DEBUG_PRINTF("entry\n"); + RoseGraph &g = build.g; auto eng_verts = get_eng_verts(g); - + map<DedupeLeftKey, vector<left_id>> engine_groups; for (const auto &e : eng_verts) { const left_id &left = e.first; const auto &verts = e.second; - + /* There should only be one report on an engine as no merges have * happened yet. (aside from eod prefixes) */ if (all_reports(left).size() != 1) { assert(any_of_in(adjacent_vertices_range(verts.front(), g), [&](RoseVertex w) { return g[w].eod_accept; })); - continue; - } - + continue; + } + if (left.haig()) { /* TODO: allow deduping of identical haigs */ - continue; - } - + continue; + } + if (left.graph()) { /* we should not have merged yet */ assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); @@ -1612,52 +1612,52 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { } } engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); - } - + } + /* We don't bother chunking as we expect deduping to be successful if the * hashes match */ - + for (auto &group : engine_groups | map_values) { DEBUG_PRINTF("group of %zu roses\n", group.size()); if (group.size() < 2) { - continue; - } - + continue; + } + for (auto it = group.begin(); it != group.end(); ++it) { - left_id r1 = *it; + left_id r1 = *it; vector<RoseVertex> &verts1 = eng_verts[r1]; assert(!verts1.empty()); /* cleared engines should be behind us */ - + assert(all_reports(r1).size() == 1); ReportID r1_report = *all_reports(r1).begin(); for (auto jt = next(it); jt != group.end(); ++jt) { - left_id r2 = *jt; + left_id r2 = *jt; vector<RoseVertex> &verts2 = eng_verts[r2]; assert(!verts2.empty()); assert(all_reports(r2).size() == 1); ReportID r2_report = *all_reports(r2).begin(); - + if (!is_equal(r1, r1_report, r2, r2_report)) { - continue; - } - + continue; + } + if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { - continue; - } - - DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); - + continue; + } + + DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); + // Replace r1 with r2. - - for (auto v : verts1) { - DEBUG_PRINTF("replacing report %u with %u on %zu\n", + + for (auto v : verts1) { + DEBUG_PRINTF("replacing report %u with %u on %zu\n", r2_report, r1_report, g[v].index); - u32 orig_lag = g[v].left.lag; + u32 orig_lag = g[v].left.lag; g[v].left = g[verts2.front()].left; - g[v].left.lag = orig_lag; - } + g[v].left.lag = orig_lag; + } insert(&verts2, verts2.end(), verts1); verts1.clear(); @@ -1665,306 +1665,306 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { /* remove stale entry from transient set, if present */ build.transient.erase(r1); - break; - } - } - } -} - -static + break; + } + } + } +} + +static u32 findUnusedTop(const flat_set<u32> &tops) { - u32 i = 0; - while (contains(tops, i)) { - i++; - } - return i; -} - -// Replace top 't' on edges with new top 'u'. -static -void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { - for (const auto &e : out_edges_range(h.start, h)) { - NFAVertex v = target(e, h); - if (v == h.startDs) { - continue; - } + u32 i = 0; + while (contains(tops, i)) { + i++; + } + return i; +} + +// Replace top 't' on edges with new top 'u'. +static +void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; + } flat_set<u32> new_tops; for (u32 t : h[e].tops) { DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); new_tops.insert(top_mapping.at(t)); } h[e].tops = std::move(new_tops); - } -} - -static -bool setDistinctTops(NGHolder &h1, const NGHolder &h2, - map<u32, u32> &top_mapping) { + } +} + +static +bool setDistinctTops(NGHolder &h1, const NGHolder &h2, + map<u32, u32> &top_mapping) { flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); - - DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), - tops2.size()); - - // If our tops don't intersect, we're OK to merge with no changes. - if (!has_intersection(tops1, tops2)) { - DEBUG_PRINTF("tops don't intersect\n"); - return true; - } - - // Otherwise, we have to renumber the tops in h1 so that they don't overlap - // with the tops in h2. - top_mapping.clear(); - for (u32 t : tops1) { - u32 u = findUnusedTop(tops2); - DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u); - top_mapping.insert(make_pair(t, u)); - assert(!contains(tops2, u)); - tops2.insert(u); - } - - replaceTops(h1, top_mapping); - return true; -} - -bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, - const deque<RoseVertex> &verts1) { - map<u32, u32> top_mapping; - if (!setDistinctTops(h1, h2, top_mapping)) { - return false; - } - - if (top_mapping.empty()) { - return true; // No remapping necessary. - } - - for (auto v : verts1) { + + DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), + tops2.size()); + + // If our tops don't intersect, we're OK to merge with no changes. + if (!has_intersection(tops1, tops2)) { + DEBUG_PRINTF("tops don't intersect\n"); + return true; + } + + // Otherwise, we have to renumber the tops in h1 so that they don't overlap + // with the tops in h2. + top_mapping.clear(); + for (u32 t : tops1) { + u32 u = findUnusedTop(tops2); + DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u); + top_mapping.insert(make_pair(t, u)); + assert(!contains(tops2, u)); + tops2.insert(u); + } + + replaceTops(h1, top_mapping); + return true; +} + +bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, + const deque<RoseVertex> &verts1) { + map<u32, u32> top_mapping; + if (!setDistinctTops(h1, h2, top_mapping)) { + return false; + } + + if (top_mapping.empty()) { + return true; // No remapping necessary. + } + + for (auto v : verts1) { DEBUG_PRINTF("vertex %zu\n", g[v].index); - assert(!g[v].left.haig); - assert(!g[v].left.dfa); - for (const auto &e : in_edges_range(v, g)) { - u32 t = g[e].rose_top; - DEBUG_PRINTF("t=%u\n", t); - assert(contains(top_mapping, t)); - g[e].rose_top = top_mapping[t]; - DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", + assert(!g[v].left.haig); + assert(!g[v].left.dfa); + for (const auto &e : in_edges_range(v, g)) { + u32 t = g[e].rose_top; + DEBUG_PRINTF("t=%u\n", t); + assert(contains(top_mapping, t)); + g[e].rose_top = top_mapping[t]; + DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", g[source(e, g)].index, g[target(e, g)].index, t, - top_mapping[t]); - } - } - - return true; -} - -static -bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, - const deque<RoseVertex> &verts1) { - map<u32, u32> top_mapping; - if (!setDistinctTops(h1, h2, top_mapping)) { - return false; - } - - if (top_mapping.empty()) { - return true; // No remapping necessary. - } - - for (auto v : verts1) { + top_mapping[t]); + } + } + + return true; +} + +static +bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, + const deque<RoseVertex> &verts1) { + map<u32, u32> top_mapping; + if (!setDistinctTops(h1, h2, top_mapping)) { + return false; + } + + if (top_mapping.empty()) { + return true; // No remapping necessary. + } + + for (auto v : verts1) { DEBUG_PRINTF("vertex %zu\n", g[v].index); - u32 t = g[v].suffix.top; - assert(contains(top_mapping, t)); - g[v].suffix.top = top_mapping[t]; - } - - return true; -} - -/** \brief Estimate the number of accel states in the given graph when built as - * an NFA. - * - * (The easiest way to estimate something like this is to actually build it: - * the criteria for NFA acceleration are quite complicated and buried in - * limex_compile.) - */ -static -u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) { - return countAccelStates(h, &tbi.rm, tbi.cc); -} - -static + u32 t = g[v].suffix.top; + assert(contains(top_mapping, t)); + g[v].suffix.top = top_mapping[t]; + } + + return true; +} + +/** \brief Estimate the number of accel states in the given graph when built as + * an NFA. + * + * (The easiest way to estimate something like this is to actually build it: + * the criteria for NFA acceleration are quite complicated and buried in + * limex_compile.) + */ +static +u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) { + return countAccelStates(h, &tbi.rm, tbi.cc); +} + +static void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { - RoseGraph &g = tbi.g; - DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size()); - - // We track the number of accelerable states for each graph in a map and - // only recompute them when the graph is modified. + RoseGraph &g = tbi.g; + DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size()); + + // We track the number of accelerable states for each graph in a map and + // only recompute them when the graph is modified. unordered_map<left_id, u32> accel_count; - for (const auto &rose : roses) { - assert(rose.graph()->kind == NFA_INFIX); - accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); - } - - for (auto it = roses.begin(); it != roses.end(); ++it) { - left_id r1 = *it; - const deque<RoseVertex> &verts1 = roses.vertices(r1); - - deque<left_id> merged; - for (auto jt = next(it); jt != roses.end(); ++jt) { - left_id r2 = *jt; - const deque<RoseVertex> &verts2 = roses.vertices(r2); - - DEBUG_PRINTF("consider merging rose %p (%zu verts) " - "with %p (%zu verts)\n", - r1.graph(), verts1.size(), r2.graph(), verts2.size()); - - u32 accel1 = accel_count[r1]; - if (accel1 >= NFA_MAX_ACCEL_STATES) { - DEBUG_PRINTF("h1 has hit max accel\n"); - break; // next h1 - } - - u32 accel2 = accel_count[r2]; - if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { - DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " - "accel2=%u)\n", - accel1, accel2); - continue; // next h2 - } - - if (!mergeableRoseVertices(tbi, verts1, verts2)) { - DEBUG_PRINTF("not mergeable\n"); - continue; // next h2 - } - - // Attempt to merge h2 into h1. - - NGHolder victim; - cloneHolder(victim, *r2.graph()); - - // Store a copy of the in-edge properties in case we have to roll - // back. - map<RoseEdge, RoseEdgeProps> edge_props; - for (auto v : verts2) { - for (const auto &e : in_edges_range(v, g)) { - edge_props[e] = g[e]; - } - } - - if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) { - DEBUG_PRINTF("can't set distinct tops\n"); - continue; // next h2 - } - - assert(victim.kind == r1.graph()->kind); - assert(!generates_callbacks(*r1.graph())); - if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) { - DEBUG_PRINTF("merge failed\n"); - // Roll back in-edge properties. - for (const auto &m : edge_props) { - g[m.first] = m.second; - } - continue; // next h2 - } - - // Update h2's roses to point to h1 now - shared_ptr<NGHolder> winner = g[verts1.front()].left.graph; - for (auto v : verts2) { - g[v].left.graph = winner; - } - roses.insert(r1, verts2); - - merged.push_back(r2); - - if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) { - DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", - num_vertices(*winner)); - break; // next h1 - } - - // Update h1's accel count estimate. - accel_count[r1] = estimatedAccelStates(tbi, *winner); - } - - DEBUG_PRINTF("%zu roses merged\n", merged.size()); - roses.erase_all(merged.begin(), merged.end()); - } -} - -/** - * This pass attempts to merge prefix/infix engines with a small number of - * vertices together into larger engines. The engines must not be have a - * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines - * that have compatible lag are greedily grouped such that they remain - * accelerable and only have a small number of states. Note: if a role has an - * infix with multiple trigger vertices, the role will be left unchanged by this - * pass and will remain using an unmerged graph. - */ -void mergeSmallLeftfixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("entry\n"); - - if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) { - return; - } - - RoseGraph &g = tbi.g; - + for (const auto &rose : roses) { + assert(rose.graph()->kind == NFA_INFIX); + accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); + } + + for (auto it = roses.begin(); it != roses.end(); ++it) { + left_id r1 = *it; + const deque<RoseVertex> &verts1 = roses.vertices(r1); + + deque<left_id> merged; + for (auto jt = next(it); jt != roses.end(); ++jt) { + left_id r2 = *jt; + const deque<RoseVertex> &verts2 = roses.vertices(r2); + + DEBUG_PRINTF("consider merging rose %p (%zu verts) " + "with %p (%zu verts)\n", + r1.graph(), verts1.size(), r2.graph(), verts2.size()); + + u32 accel1 = accel_count[r1]; + if (accel1 >= NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("h1 has hit max accel\n"); + break; // next h1 + } + + u32 accel2 = accel_count[r2]; + if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " + "accel2=%u)\n", + accel1, accel2); + continue; // next h2 + } + + if (!mergeableRoseVertices(tbi, verts1, verts2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next h2 + } + + // Attempt to merge h2 into h1. + + NGHolder victim; + cloneHolder(victim, *r2.graph()); + + // Store a copy of the in-edge properties in case we have to roll + // back. + map<RoseEdge, RoseEdgeProps> edge_props; + for (auto v : verts2) { + for (const auto &e : in_edges_range(v, g)) { + edge_props[e] = g[e]; + } + } + + if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) { + DEBUG_PRINTF("can't set distinct tops\n"); + continue; // next h2 + } + + assert(victim.kind == r1.graph()->kind); + assert(!generates_callbacks(*r1.graph())); + if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) { + DEBUG_PRINTF("merge failed\n"); + // Roll back in-edge properties. + for (const auto &m : edge_props) { + g[m.first] = m.second; + } + continue; // next h2 + } + + // Update h2's roses to point to h1 now + shared_ptr<NGHolder> winner = g[verts1.front()].left.graph; + for (auto v : verts2) { + g[v].left.graph = winner; + } + roses.insert(r1, verts2); + + merged.push_back(r2); + + if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) { + DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", + num_vertices(*winner)); + break; // next h1 + } + + // Update h1's accel count estimate. + accel_count[r1] = estimatedAccelStates(tbi, *winner); + } + + DEBUG_PRINTF("%zu roses merged\n", merged.size()); + roses.erase_all(merged.begin(), merged.end()); + } +} + +/** + * This pass attempts to merge prefix/infix engines with a small number of + * vertices together into larger engines. The engines must not be have a + * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines + * that have compatible lag are greedily grouped such that they remain + * accelerable and only have a small number of states. Note: if a role has an + * infix with multiple trigger vertices, the role will be left unchanged by this + * pass and will remain using an unmerged graph. + */ +void mergeSmallLeftfixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) { + return; + } + + RoseGraph &g = tbi.g; + LeftfixBouquet nfa_leftfixes; - - for (auto v : vertices_range(g)) { - if (!g[v].left) { - continue; - } - - // Handle single-parent infixes only. - if (tbi.isRootSuccessor(v)) { - continue; - } - - left_id left(g[v].left); - - // Only non-transient for the moment. - if (contains(tbi.transient, left)) { - continue; - } - - // No DFAs or Haigs right now. - if (left.dfa() || left.haig()) { - continue; - } - - // Castles are handled by a different pass. - if (left.castle()) { - continue; - } - - assert(left.graph()); - NGHolder &h = *left.graph(); - - /* Ensure that kind on the graph is correct */ - assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); - - if (hasReformedStartDotStar(h, tbi.cc.grey)) { - /* We would lose optimisations of the leading repeat by merging. */ - continue; - } - - // Small roses only. - if (num_vertices(h) > small_rose_threshold(tbi.cc)) { - continue; - } - + + for (auto v : vertices_range(g)) { + if (!g[v].left) { + continue; + } + + // Handle single-parent infixes only. + if (tbi.isRootSuccessor(v)) { + continue; + } + + left_id left(g[v].left); + + // Only non-transient for the moment. + if (contains(tbi.transient, left)) { + continue; + } + + // No DFAs or Haigs right now. + if (left.dfa() || left.haig()) { + continue; + } + + // Castles are handled by a different pass. + if (left.castle()) { + continue; + } + + assert(left.graph()); + NGHolder &h = *left.graph(); + + /* Ensure that kind on the graph is correct */ + assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); + + if (hasReformedStartDotStar(h, tbi.cc.grey)) { + /* We would lose optimisations of the leading repeat by merging. */ + continue; + } + + // Small roses only. + if (num_vertices(h) > small_rose_threshold(tbi.cc)) { + continue; + } + nfa_leftfixes.insert(left, v); - } - + } + deque<LeftfixBouquet> leftfix_groups; chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX); nfa_leftfixes.clear(); DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n", leftfix_groups.size()); - + for (auto &group : leftfix_groups) { - mergeNfaLeftfixes(tbi, group); - } -} - + mergeNfaLeftfixes(tbi, group); + } +} + static void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { @@ -2029,514 +2029,514 @@ void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, * mainly depends on the reach being scanned. */ void mergeCastleLeftfixes(RoseBuildImpl &build) { - DEBUG_PRINTF("entry\n"); - + DEBUG_PRINTF("entry\n"); + if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) { - return; - } - + return; + } + RoseGraph &g = build.g; - + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; - - for (auto v : vertices_range(g)) { + + for (auto v : vertices_range(g)) { if (!g[v].left.castle) { - continue; - } - + continue; + } + // Handle infixes only. if (build.isRootSuccessor(v)) { - continue; - } - + continue; + } + eng_verts[g[v].left].push_back(v); } - + map<CharReach, vector<left_id>> by_reach; for (const auto &left : eng_verts | map_keys) { by_reach[left.castle()->reach()].push_back(left); } - + vector<vector<left_id>> chunks; for (auto &raw_group : by_reach | map_values) { chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); - } + } by_reach.clear(); - + DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); - + for (auto &chunk : chunks) { mergeCastleChunk(build, chunk, eng_verts); - } -} - -static -void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, - const bool acyclic) { - RoseGraph &g = tbi.g; - - DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size()); - - // If this isn't an acyclic case, we track the number of accelerable states - // for each graph in a map and only recompute them when the graph is - // modified. + } +} + +static +void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, + const bool acyclic) { + RoseGraph &g = tbi.g; + + DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size()); + + // If this isn't an acyclic case, we track the number of accelerable states + // for each graph in a map and only recompute them when the graph is + // modified. unordered_map<suffix_id, u32> accel_count; - if (!acyclic) { - for (const auto &suffix : suffixes) { - assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); - accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph()); - } - } - - for (auto it = suffixes.begin(); it != suffixes.end(); ++it) { - suffix_id s1 = *it; - const deque<RoseVertex> &verts1 = suffixes.vertices(s1); - assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); + if (!acyclic) { + for (const auto &suffix : suffixes) { + assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); + accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph()); + } + } + + for (auto it = suffixes.begin(); it != suffixes.end(); ++it) { + suffix_id s1 = *it; + const deque<RoseVertex> &verts1 = suffixes.vertices(s1); + assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); // Caller should ensure that we don't propose merges of graphs that are // already too big. assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); - deque<suffix_id> merged; - for (auto jt = next(it); jt != suffixes.end(); ++jt) { - suffix_id s2 = *jt; - const deque<RoseVertex> &verts2 = suffixes.vertices(s2); - assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); - - if (!acyclic) { - u32 accel1 = accel_count[s1]; - if (accel1 >= NFA_MAX_ACCEL_STATES) { - DEBUG_PRINTF("h1 has hit max accel\n"); - break; // next h1 - } - - u32 accel2 = accel_count[s2]; - if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { - DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " - "accel2=%u)\n", - accel1, accel2); - continue; // next h2 - } - } - - // Attempt to merge h2 into h1. - - NGHolder victim; - cloneHolder(victim, *s2.graph()); - - // Store a copy of the suffix tops in case we have to roll back. - map<RoseVertex, u32> old_tops; - for (auto v : verts2) { - old_tops[v] = g[v].suffix.top; - } - - if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) { - DEBUG_PRINTF("can't set distinct tops\n"); - continue; // next h2 - } - - if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) { - DEBUG_PRINTF("merge failed\n"); - // Roll back in-edge properties. - for (const auto &m : old_tops) { - g[m.first].suffix.top = m.second; - } - continue; // next h2 - } - - // Update h2's roses to point to h1 now - shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph; - for (auto v : verts2) { - g[v].suffix.graph = winner; - } - suffixes.insert(s1, verts2); - merged.push_back(s2); - - if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) { - DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", - num_vertices(*s1.graph())); - break; // next h1 - } - - if (!acyclic) { - // Update h1's accel count estimate. - accel_count[s1] = estimatedAccelStates(tbi, *s1.graph()); - } - } - - DEBUG_PRINTF("%zu suffixes merged\n", merged.size()); - suffixes.erase_all(merged.begin(), merged.end()); - } -} - -/** - * This merge pass combines suffixes from unrelated roles into a single - * suffix with multiple top events in order to distinguish the triggers - * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes - * while mergeSmallSuffixes only considers small suffixes. The merges will - * group roles with suffixes in the graph into clusters of at most - * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the - * suffixes and attempting to pairwise merge it with another member. Merges - * will fail if the result is not implementable, requires too many distinct top - * events, or if it losses the ability to be accelerated. The merge will modify - * the existing suffix graph of the one member (g1), the other member updates - * it graph to refer to g1 instead of its previous graph (g2) and use the new - * tops created. Other roles may have been sharing g1 - these are unaffected by - * the change as the existing top events are left untouched. Other roles using - * g2 are also unaffected as g2 will continue to exist until while it has any - * roles triggering it. - * - * Note: suffixes destined for the LBR are not considered for these merges as - * the LBR can only handle a single repeat and this type of repeat is ideally - * handled outside of an NFA or DFA. - */ -void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("entry\n"); - - if (!tbi.cc.grey.mergeSuffixes) { - return; - } - - SuffixBouquet suffixes; - - RoseGraph &g = tbi.g; - - for (auto v : vertices_range(g)) { - shared_ptr<NGHolder> h = g[v].suffix.graph; - if (!h || tbi.isInETable(v)) { - continue; - } - - assert(!g[v].suffix.haig); - + deque<suffix_id> merged; + for (auto jt = next(it); jt != suffixes.end(); ++jt) { + suffix_id s2 = *jt; + const deque<RoseVertex> &verts2 = suffixes.vertices(s2); + assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); + + if (!acyclic) { + u32 accel1 = accel_count[s1]; + if (accel1 >= NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("h1 has hit max accel\n"); + break; // next h1 + } + + u32 accel2 = accel_count[s2]; + if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { + DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " + "accel2=%u)\n", + accel1, accel2); + continue; // next h2 + } + } + + // Attempt to merge h2 into h1. + + NGHolder victim; + cloneHolder(victim, *s2.graph()); + + // Store a copy of the suffix tops in case we have to roll back. + map<RoseVertex, u32> old_tops; + for (auto v : verts2) { + old_tops[v] = g[v].suffix.top; + } + + if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) { + DEBUG_PRINTF("can't set distinct tops\n"); + continue; // next h2 + } + + if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) { + DEBUG_PRINTF("merge failed\n"); + // Roll back in-edge properties. + for (const auto &m : old_tops) { + g[m.first].suffix.top = m.second; + } + continue; // next h2 + } + + // Update h2's roses to point to h1 now + shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph; + for (auto v : verts2) { + g[v].suffix.graph = winner; + } + suffixes.insert(s1, verts2); + merged.push_back(s2); + + if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) { + DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n", + num_vertices(*s1.graph())); + break; // next h1 + } + + if (!acyclic) { + // Update h1's accel count estimate. + accel_count[s1] = estimatedAccelStates(tbi, *s1.graph()); + } + } + + DEBUG_PRINTF("%zu suffixes merged\n", merged.size()); + suffixes.erase_all(merged.begin(), merged.end()); + } +} + +/** + * This merge pass combines suffixes from unrelated roles into a single + * suffix with multiple top events in order to distinguish the triggers + * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes + * while mergeSmallSuffixes only considers small suffixes. The merges will + * group roles with suffixes in the graph into clusters of at most + * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the + * suffixes and attempting to pairwise merge it with another member. Merges + * will fail if the result is not implementable, requires too many distinct top + * events, or if it losses the ability to be accelerated. The merge will modify + * the existing suffix graph of the one member (g1), the other member updates + * it graph to refer to g1 instead of its previous graph (g2) and use the new + * tops created. Other roles may have been sharing g1 - these are unaffected by + * the change as the existing top events are left untouched. Other roles using + * g2 are also unaffected as g2 will continue to exist until while it has any + * roles triggering it. + * + * Note: suffixes destined for the LBR are not considered for these merges as + * the LBR can only handle a single repeat and this type of repeat is ideally + * handled outside of an NFA or DFA. + */ +void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + SuffixBouquet suffixes; + + RoseGraph &g = tbi.g; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h || tbi.isInETable(v)) { + continue; + } + + assert(!g[v].suffix.haig); + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { - continue; - } - + continue; + } + if (!isAcyclic(*h)) { - continue; - } - - suffixes.insert(g[v].suffix, v); - } - - deque<SuffixBouquet> suff_groups; - chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); - DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), - suff_groups.size()); - suffixes.clear(); - - for (auto &group : suff_groups) { - mergeSuffixes(tbi, group, true); - } -} - -/** - * This merge pass combines suffixes from unrelated roles into a single - * suffix with multiple top events in order to distinguish the triggers - * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes - * while mergeSmallSuffixes only considers small suffixes. The merges will - * group roles with suffixes in the graph into clusters of at most - * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the - * suffixes and attempting to pairwise merge it with another member. Merges - * will fail if the result is not implementable, requires too many distinct top - * events, or if it losses the ability to be accelerated. The merge will modify - * the existing suffix graph of the one member (g1), the other member updates - * it graph to refer to g1 instead of its previous graph (g2) and use the new - * tops created. Other roles may have been sharing g1 - these are unaffected by - * the change as the existing top events are left untouched. Other roles using - * g2 are also unaffected as g2 will continue to exist until while it has any - * roles triggering it. - * - * Note: suffixes destined for the LBR are not considered for these merges as - * the LBR can only handle a single repeat and this type of repeat is ideally - * handled outside of an NFA or DFA. - */ -void mergeSmallSuffixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("entry\n"); - - if (!tbi.cc.grey.mergeSuffixes) { - return; - } - - RoseGraph &g = tbi.g; - SuffixBouquet suffixes; - - for (auto v : vertices_range(g)) { - shared_ptr<NGHolder> h = g[v].suffix.graph; - if (!h || tbi.isInETable(v)) { - continue; - } - assert(!g[v].suffix.haig); - - // Leave acyclics out for the moment. - if (isAcyclic(*h)) { - continue; - } - - // Small-ish suffixes only. - if (num_vertices(*h) > 32) { - continue; - } - - suffixes.insert(g[v].suffix, v); - } - - deque<SuffixBouquet> suff_groups; - chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); - DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), - suff_groups.size()); - suffixes.clear(); - - for (auto &group : suff_groups) { - mergeSuffixes(tbi, group, false); - } -} - -static -void removeDeadOutfixes(vector<OutfixInfo> &outfixes) { - auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); }; - outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead), - end(outfixes)); -} - -static -void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { - assert(!winner.is_dead()); - - winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth); - winner.minWidth = min(winner.minWidth, victim.minWidth); - winner.maxWidth = max(winner.maxWidth, victim.maxWidth); - winner.maxOffset = max(winner.maxOffset, victim.maxOffset); - mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info); - - // This outfix can be ignored in small block mode if both were. The dedupe - // layer at runtime will protect us from extra matches if only one was in - // the small block matcher. - winner.in_sbmatcher &= victim.in_sbmatcher; -} - -static -map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, - const vector<NGHolder *> &nfas) { - map<NGHolder *, NGHolder *> merged; - - vector<NGHolder *> batch; - for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) { - batch.push_back(*it); - assert((*it)->kind == NFA_OUTFIX); - if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { + continue; + } + + suffixes.insert(g[v].suffix, v); + } + + deque<SuffixBouquet> suff_groups; + chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); + DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), + suff_groups.size()); + suffixes.clear(); + + for (auto &group : suff_groups) { + mergeSuffixes(tbi, group, true); + } +} + +/** + * This merge pass combines suffixes from unrelated roles into a single + * suffix with multiple top events in order to distinguish the triggers + * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes + * while mergeSmallSuffixes only considers small suffixes. The merges will + * group roles with suffixes in the graph into clusters of at most + * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the + * suffixes and attempting to pairwise merge it with another member. Merges + * will fail if the result is not implementable, requires too many distinct top + * events, or if it losses the ability to be accelerated. The merge will modify + * the existing suffix graph of the one member (g1), the other member updates + * it graph to refer to g1 instead of its previous graph (g2) and use the new + * tops created. Other roles may have been sharing g1 - these are unaffected by + * the change as the existing top events are left untouched. Other roles using + * g2 are also unaffected as g2 will continue to exist until while it has any + * roles triggering it. + * + * Note: suffixes destined for the LBR are not considered for these merges as + * the LBR can only handle a single repeat and this type of repeat is ideally + * handled outside of an NFA or DFA. + */ +void mergeSmallSuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + RoseGraph &g = tbi.g; + SuffixBouquet suffixes; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h || tbi.isInETable(v)) { + continue; + } + assert(!g[v].suffix.haig); + + // Leave acyclics out for the moment. + if (isAcyclic(*h)) { + continue; + } + + // Small-ish suffixes only. + if (num_vertices(*h) > 32) { + continue; + } + + suffixes.insert(g[v].suffix, v); + } + + deque<SuffixBouquet> suff_groups; + chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); + DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(), + suff_groups.size()); + suffixes.clear(); + + for (auto &group : suff_groups) { + mergeSuffixes(tbi, group, false); + } +} + +static +void removeDeadOutfixes(vector<OutfixInfo> &outfixes) { + auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); }; + outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead), + end(outfixes)); +} + +static +void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { + assert(!winner.is_dead()); + + winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth); + winner.minWidth = min(winner.minWidth, victim.minWidth); + winner.maxWidth = max(winner.maxWidth, victim.maxWidth); + winner.maxOffset = max(winner.maxOffset, victim.maxOffset); + mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info); + + // This outfix can be ignored in small block mode if both were. The dedupe + // layer at runtime will protect us from extra matches if only one was in + // the small block matcher. + winner.in_sbmatcher &= victim.in_sbmatcher; +} + +static +map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, + const vector<NGHolder *> &nfas) { + map<NGHolder *, NGHolder *> merged; + + vector<NGHolder *> batch; + for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) { + batch.push_back(*it); + assert((*it)->kind == NFA_OUTFIX); + if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); insert(&merged, batch_merged); - batch.clear(); - } - } - - return merged; -} - -static -void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { - DEBUG_PRINTF("merging %zu nfas\n", nfas.size()); - if (nfas.size() < 2) { - return; - } - - vector<OutfixInfo> &outfixes = tbi.outfixes; - - map<NGHolder *, size_t> nfa_mapping; - for (size_t i = 0; i < outfixes.size(); i++) { + batch.clear(); + } + } + + return merged; +} + +static +void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { + DEBUG_PRINTF("merging %zu nfas\n", nfas.size()); + if (nfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + + map<NGHolder *, size_t> nfa_mapping; + for (size_t i = 0; i < outfixes.size(); i++) { auto *holder = outfixes[i].holder(); if (holder) { nfa_mapping[holder] = i; - } - } - - map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas); - if (merged.empty()) { - return; - } - - DEBUG_PRINTF("%zu nfas merged\n", merged.size()); - - // Update the outfix info for merged holders. - for (const auto &m : merged) { - OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]); - OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]); - mergeOutfixInfo(winner, victim); - victim.clear(); - } - - removeDeadOutfixes(outfixes); -} - -namespace { -struct MergeMcClellan { - MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in) - : rm(rm_in), grey(grey_in) {} - - unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const { - assert(d1 && d2); - return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey); - } - -private: - const ReportManager &rm; - const Grey &grey; -}; - -struct MergeHaig { - explicit MergeHaig(u32 limit_in) : limit(limit_in) {} - - unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1, - const raw_som_dfa *d2) const { - assert(d1 && d2); - return attemptToMergeHaig({d1, d2}, limit); - } - -private: - const u32 limit; //!< state limit for merged result. -}; -} - -/** - * Generic pairwise merge algorithm that can be used for either McClellan - * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge - * operation to a merge functor, which allows the caller to set some policy - * (state limits, etc). - * - * This is currently astonishingly simple and just considers every pair of - * DFAs, slow and steady. We may wish to actually apply a merge ordering - * strategy in the future. - */ -template<class RawDfa, class MergeFunctor> -static -void pairwiseDfaMerge(vector<RawDfa *> &dfas, + } + } + + map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas); + if (merged.empty()) { + return; + } + + DEBUG_PRINTF("%zu nfas merged\n", merged.size()); + + // Update the outfix info for merged holders. + for (const auto &m : merged) { + OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]); + OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]); + mergeOutfixInfo(winner, victim); + victim.clear(); + } + + removeDeadOutfixes(outfixes); +} + +namespace { +struct MergeMcClellan { + MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in) + : rm(rm_in), grey(grey_in) {} + + unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const { + assert(d1 && d2); + return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey); + } + +private: + const ReportManager &rm; + const Grey &grey; +}; + +struct MergeHaig { + explicit MergeHaig(u32 limit_in) : limit(limit_in) {} + + unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1, + const raw_som_dfa *d2) const { + assert(d1 && d2); + return attemptToMergeHaig({d1, d2}, limit); + } + +private: + const u32 limit; //!< state limit for merged result. +}; +} + +/** + * Generic pairwise merge algorithm that can be used for either McClellan + * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge + * operation to a merge functor, which allows the caller to set some policy + * (state limits, etc). + * + * This is currently astonishingly simple and just considers every pair of + * DFAs, slow and steady. We may wish to actually apply a merge ordering + * strategy in the future. + */ +template<class RawDfa, class MergeFunctor> +static +void pairwiseDfaMerge(vector<RawDfa *> &dfas, unordered_map<RawDfa *, size_t> &dfa_mapping, - vector<OutfixInfo> &outfixes, - MergeFunctor merge_func) { - DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); - - for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) { - if (!*it) { - continue; - } - for (auto jt = next(it); jt != ite; ++jt) { - if (!*jt) { - continue; - } - - DEBUG_PRINTF("try merge %p and %p\n", *it, *jt); - unique_ptr<RawDfa> rdfa = merge_func(*it, *jt); - if (!rdfa) { - continue; // Merge failed. - } - - DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get()); - OutfixInfo &winner = outfixes.at(dfa_mapping[*it]); - OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]); - assert(!winner.is_dead() && !victim.is_dead()); - - RawDfa *dfa_ptr = rdfa.get(); - dfa_mapping[dfa_ptr] = dfa_mapping[*it]; - dfa_mapping.erase(*it); + vector<OutfixInfo> &outfixes, + MergeFunctor merge_func) { + DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); + + for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) { + if (!*it) { + continue; + } + for (auto jt = next(it); jt != ite; ++jt) { + if (!*jt) { + continue; + } + + DEBUG_PRINTF("try merge %p and %p\n", *it, *jt); + unique_ptr<RawDfa> rdfa = merge_func(*it, *jt); + if (!rdfa) { + continue; // Merge failed. + } + + DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get()); + OutfixInfo &winner = outfixes.at(dfa_mapping[*it]); + OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]); + assert(!winner.is_dead() && !victim.is_dead()); + + RawDfa *dfa_ptr = rdfa.get(); + dfa_mapping[dfa_ptr] = dfa_mapping[*it]; + dfa_mapping.erase(*it); winner.proto = move(rdfa); - - mergeOutfixInfo(winner, victim); - - victim.clear(); - *jt = nullptr; // to be deleted. - *it = dfa_ptr; - } - } -} - -template<class RawDfa, class MergeFunctor> -static -void chunkedDfaMerge(vector<RawDfa *> &dfas, + + mergeOutfixInfo(winner, victim); + + victim.clear(); + *jt = nullptr; // to be deleted. + *it = dfa_ptr; + } + } +} + +template<class RawDfa, class MergeFunctor> +static +void chunkedDfaMerge(vector<RawDfa *> &dfas, unordered_map<RawDfa *, size_t> &dfa_mapping, - vector<OutfixInfo> &outfixes, - MergeFunctor merge_func) { - DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); - - vector<RawDfa *> out_dfas; - vector<RawDfa *> chunk; - for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) { - chunk.push_back(*it); - if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { - pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func); - out_dfas.insert(end(out_dfas), begin(chunk), end(chunk)); - chunk.clear(); - } - } - - // Remove null (merged) DFAs and update vector for subsequent use. - out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr), - out_dfas.end()); - dfas.swap(out_dfas); - DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size()); -} - -static -void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { - DEBUG_PRINTF("merging %zu nfas\n", dfas.size()); - if (dfas.size() < 2) { - return; - } - - vector<OutfixInfo> &outfixes = tbi.outfixes; - - /* key is index into outfix array as iterators, etc may be invalidated by - * element addition. */ + vector<OutfixInfo> &outfixes, + MergeFunctor merge_func) { + DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); + + vector<RawDfa *> out_dfas; + vector<RawDfa *> chunk; + for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) { + chunk.push_back(*it); + if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { + pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func); + out_dfas.insert(end(out_dfas), begin(chunk), end(chunk)); + chunk.clear(); + } + } + + // Remove null (merged) DFAs and update vector for subsequent use. + out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr), + out_dfas.end()); + dfas.swap(out_dfas); + DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size()); +} + +static +void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { + DEBUG_PRINTF("merging %zu nfas\n", dfas.size()); + if (dfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + + /* key is index into outfix array as iterators, etc may be invalidated by + * element addition. */ unordered_map<raw_dfa *, size_t> dfa_mapping; - for (size_t i = 0; i < outfixes.size(); i++) { + for (size_t i = 0; i < outfixes.size(); i++) { auto *rdfa = outfixes[i].rdfa(); if (rdfa) { dfa_mapping[rdfa] = i; - } - } - - chunkedDfaMerge(dfas, dfa_mapping, outfixes, - MergeMcClellan(tbi.rm, tbi.cc.grey)); - removeDeadOutfixes(outfixes); -} - -static -void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, - const Grey &grey) { - if (!grey.roseMcClellanOutfix) { - return; - } - - DEBUG_PRINTF("merge combo\n"); - - bool seen_dfa = false; - u32 nfa_count = 0; - for (const auto &outfix : tbi.outfixes) { + } + } + + chunkedDfaMerge(dfas, dfa_mapping, outfixes, + MergeMcClellan(tbi.rm, tbi.cc.grey)); + removeDeadOutfixes(outfixes); +} + +static +void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, + const Grey &grey) { + if (!grey.roseMcClellanOutfix) { + return; + } + + DEBUG_PRINTF("merge combo\n"); + + bool seen_dfa = false; + u32 nfa_count = 0; + for (const auto &outfix : tbi.outfixes) { if (outfix.holder()) { - DEBUG_PRINTF("nfa\n"); - nfa_count++; + DEBUG_PRINTF("nfa\n"); + nfa_count++; } else if (outfix.rdfa()) { - DEBUG_PRINTF("dfa\n"); - seen_dfa = true; - } - } - - DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count, - (int)seen_dfa); - if (!nfa_count || (nfa_count == 1 && !seen_dfa)) { - DEBUG_PRINTF("no combo merges possible\n"); - return; - } - - /* key is index into outfix array as iterators, etc may be invalidated by - * element addition. */ - size_t new_dfas = 0; + DEBUG_PRINTF("dfa\n"); + seen_dfa = true; + } + } + + DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count, + (int)seen_dfa); + if (!nfa_count || (nfa_count == 1 && !seen_dfa)) { + DEBUG_PRINTF("no combo merges possible\n"); + return; + } + + /* key is index into outfix array as iterators, etc may be invalidated by + * element addition. */ + size_t new_dfas = 0; unordered_map<raw_dfa *, size_t> dfa_mapping; - vector<raw_dfa *> dfas; - - for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { + vector<raw_dfa *> dfas; + + for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { auto &outfix = *it; assert(!outfix.is_dead()); @@ -2544,75 +2544,75 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, auto *rdfa = outfix.rdfa(); dfas.push_back(rdfa); dfa_mapping[rdfa] = it - tbi.outfixes.begin(); - continue; - } - + continue; + } + if (!outfix.holder()) { - continue; - } - + continue; + } + NGHolder *h = outfix.holder(); - assert(h->kind == NFA_OUTFIX); - auto rdfa = buildMcClellan(*h, &rm, grey); - if (rdfa) { - // Transform this outfix into a DFA and add it to the merge set. - dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); - dfas.push_back(rdfa.get()); + assert(h->kind == NFA_OUTFIX); + auto rdfa = buildMcClellan(*h, &rm, grey); + if (rdfa) { + // Transform this outfix into a DFA and add it to the merge set. + dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); + dfas.push_back(rdfa.get()); outfix.proto = move(rdfa); - new_dfas++; - } - } - - DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas); - - if (!new_dfas) { - /* assumes normal dfas have already been fully merged */ - return; - } - - chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes, - MergeMcClellan(tbi.rm, tbi.cc.grey)); - removeDeadOutfixes(tbi.outfixes); -} - -static -void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, - u32 limit) { - if (dfas.size() < 2) { - return; - } - - vector<OutfixInfo> &outfixes = tbi.outfixes; - + new_dfas++; + } + } + + DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas); + + if (!new_dfas) { + /* assumes normal dfas have already been fully merged */ + return; + } + + chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes, + MergeMcClellan(tbi.rm, tbi.cc.grey)); + removeDeadOutfixes(tbi.outfixes); +} + +static +void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, + u32 limit) { + if (dfas.size() < 2) { + return; + } + + vector<OutfixInfo> &outfixes = tbi.outfixes; + unordered_map<raw_som_dfa *, size_t> dfa_mapping; - for (size_t i = 0; i < outfixes.size(); i++) { + for (size_t i = 0; i < outfixes.size(); i++) { auto *haig = outfixes[i].haig(); if (haig) { dfa_mapping[haig] = i; - } - } - - chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit)); - removeDeadOutfixes(outfixes); -} - -/** - * This pass attempts to merge outfix engines together. At this point in time, - * the engine type (NFA, DFA, Haig) has already been decided for each outfix - * and outfixes can only merged with others of their same type. NFAs are merged - * in a priority order based on common prefix length. The other types are - * merged blindly. Engines are merged to the extent that they can still be - * implemented efficiently. - */ -void mergeOutfixes(RoseBuildImpl &tbi) { - if (!tbi.cc.grey.mergeOutfixes) { - return; - } - - vector<NGHolder *> nfas; - vector<raw_dfa *> dfas; - vector<raw_som_dfa *> som_dfas; - + } + } + + chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit)); + removeDeadOutfixes(outfixes); +} + +/** + * This pass attempts to merge outfix engines together. At this point in time, + * the engine type (NFA, DFA, Haig) has already been decided for each outfix + * and outfixes can only merged with others of their same type. NFAs are merged + * in a priority order based on common prefix length. The other types are + * merged blindly. Engines are merged to the extent that they can still be + * implemented efficiently. + */ +void mergeOutfixes(RoseBuildImpl &tbi) { + if (!tbi.cc.grey.mergeOutfixes) { + return; + } + + vector<NGHolder *> nfas; + vector<raw_dfa *> dfas; + vector<raw_som_dfa *> som_dfas; + for (auto &outfix : tbi.outfixes) { if (outfix.rdfa()) { dfas.push_back(outfix.rdfa()); @@ -2620,199 +2620,199 @@ void mergeOutfixes(RoseBuildImpl &tbi) { nfas.push_back(outfix.holder()); } else if (outfix.haig()) { som_dfas.push_back(outfix.haig()); - } - } - - DEBUG_PRINTF("merging %zu dfas, %zu nfas\n", - dfas.size(), nfas.size()); - - mergeOutfixNfas(tbi, nfas); - mergeOutfixDfas(tbi, dfas); - mergeOutfixHaigs(tbi, som_dfas, 255); - mergeOutfixHaigs(tbi, som_dfas, 8192); - mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey); -} - -static -u32 allowedSquashDistance(const CharReach &cr, u32 min_width, - const RoseBuildImpl &tbi, - RoseVertex tv) { - CharReach accept_cr; - DEBUG_PRINTF("hello |cr|=%zu\n", cr.count()); - - const RoseGraph &g = tbi.g; - - /* TODO: inspect further back in the pattern */ - for (u32 lit_id : g[tv].literals) { + } + } + + DEBUG_PRINTF("merging %zu dfas, %zu nfas\n", + dfas.size(), nfas.size()); + + mergeOutfixNfas(tbi, nfas); + mergeOutfixDfas(tbi, dfas); + mergeOutfixHaigs(tbi, som_dfas, 255); + mergeOutfixHaigs(tbi, som_dfas, 8192); + mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey); +} + +static +u32 allowedSquashDistance(const CharReach &cr, u32 min_width, + const RoseBuildImpl &tbi, + RoseVertex tv) { + CharReach accept_cr; + DEBUG_PRINTF("hello |cr|=%zu\n", cr.count()); + + const RoseGraph &g = tbi.g; + + /* TODO: inspect further back in the pattern */ + for (u32 lit_id : g[tv].literals) { const rose_literal_id &lit = tbi.literals.at(lit_id); - if (lit.delay) { - return 0; /* TODO: better */ - } - if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) { - return 0; - } - assert(!lit.s.empty()); - accept_cr |= *lit.s.rbegin(); - } - - DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count()); - - if ((accept_cr & cr).any()) { - DEBUG_PRINTF("no squash\n"); - return 0; /* the accept byte doesn't always kill the puffette. TODO: - * maybe if we look further back we could find something that - * would kill the puffette... */ - } - - DEBUG_PRINTF("allowed to squash %u\n", min_width); - return min_width; -} - -void mergePuffixes(RoseBuildImpl &tbi) { - DEBUG_PRINTF("entry\n"); - - if (!tbi.cc.grey.mergeSuffixes) { - return; - } - - RoseGraph &g = tbi.g; - - for (auto v : vertices_range(g)) { - shared_ptr<NGHolder> h = g[v].suffix.graph; - if (!h) { - continue; - } - assert(!g[v].suffix.haig); - assert(!g[v].eod_accept); - - assert(onlyOneTop(*h)); /* we should not have merged yet */ - bool fixed_depth = g[v].min_offset == g[v].max_offset; - - if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) { - continue; - } - - PureRepeat repeat; - if (!isPureRepeat(*h, repeat)) { - assert(0); - continue; - } - - if (repeat.bounds.min == depth(0)) { - assert(0); // No vacuous puffs allowed. - continue; - } - - assert(repeat.bounds.min.is_finite() && - repeat.bounds.max.is_reachable()); - assert(repeat.bounds.max == repeat.bounds.min || - repeat.bounds.max.is_infinite()); - - const bool unbounded = repeat.bounds.max.is_infinite(); - const set<ReportID> reports = all_reports(*h); - assert(reports.size() == 1); - ReportID report = *reports.begin(); - - DEBUG_PRINTF("got puffette candidate %u:%s\n", report, - repeat.bounds.str().c_str()); - - raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach); - - u32 queue; - u32 event; - tbi.addChainTail(rp, &queue, &event); - u32 squashDistance = - allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); - + if (lit.delay) { + return 0; /* TODO: better */ + } + if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) { + return 0; + } + assert(!lit.s.empty()); + accept_cr |= *lit.s.rbegin(); + } + + DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count()); + + if ((accept_cr & cr).any()) { + DEBUG_PRINTF("no squash\n"); + return 0; /* the accept byte doesn't always kill the puffette. TODO: + * maybe if we look further back we could find something that + * would kill the puffette... */ + } + + DEBUG_PRINTF("allowed to squash %u\n", min_width); + return min_width; +} + +void mergePuffixes(RoseBuildImpl &tbi) { + DEBUG_PRINTF("entry\n"); + + if (!tbi.cc.grey.mergeSuffixes) { + return; + } + + RoseGraph &g = tbi.g; + + for (auto v : vertices_range(g)) { + shared_ptr<NGHolder> h = g[v].suffix.graph; + if (!h) { + continue; + } + assert(!g[v].suffix.haig); + assert(!g[v].eod_accept); + + assert(onlyOneTop(*h)); /* we should not have merged yet */ + bool fixed_depth = g[v].min_offset == g[v].max_offset; + + if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) { + continue; + } + + PureRepeat repeat; + if (!isPureRepeat(*h, repeat)) { + assert(0); + continue; + } + + if (repeat.bounds.min == depth(0)) { + assert(0); // No vacuous puffs allowed. + continue; + } + + assert(repeat.bounds.min.is_finite() && + repeat.bounds.max.is_reachable()); + assert(repeat.bounds.max == repeat.bounds.min || + repeat.bounds.max.is_infinite()); + + const bool unbounded = repeat.bounds.max.is_infinite(); + const set<ReportID> reports = all_reports(*h); + assert(reports.size() == 1); + ReportID report = *reports.begin(); + + DEBUG_PRINTF("got puffette candidate %u:%s\n", report, + repeat.bounds.str().c_str()); + + raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach); + + u32 queue; + u32 event; + tbi.addChainTail(rp, &queue, &event); + u32 squashDistance = + allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); + Report ir = makeMpvTrigger(event, squashDistance); - ReportID id = tbi.rm.getInternalId(ir); - - DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); - g[v].suffix.reset(); - g[v].reports.insert(id); - } -} - -static -void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, - u32 top, const vector<RoseVertex> &verts) { + ReportID id = tbi.rm.getInternalId(ir); + + DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); + g[v].suffix.reset(); + g[v].reports.insert(id); + } +} + +static +void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, + u32 top, const vector<RoseVertex> &verts) { DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, m.get(), verts.size()); - - for (auto v : verts) { - assert(g[v].suffix.castle); - g[v].suffix.castle = m; - g[v].suffix.top = top; - } -} - -static + + for (auto v : verts) { + assert(g[v].suffix.castle); + g[v].suffix.castle = m; + g[v].suffix.top = top; + } +} + +static void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { - if (castles.size() <= 1) { - return; - } - + if (castles.size() <= 1) { + return; + } + DEBUG_PRINTF("merging reach %s, %zu elements\n", describeClass(castles[0]->reach()).c_str(), castles.size()); - + CastleProto *m = nullptr; - + for (CastleProto *c : castles) { - assert(c->repeats.size() == 1); // Not yet merged. + assert(c->repeats.size() == 1); // Not yet merged. assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); if (!m) { m = c; - continue; - } - + continue; + } + u32 top = m->merge(c->repeats[0]); if (top == CastleProto::max_occupancy) { - // No room left to merge into 'm'. This one becomes the new 'm'. - DEBUG_PRINTF("next mergee\n"); - m = c; + // No room left to merge into 'm'. This one becomes the new 'm'. + DEBUG_PRINTF("next mergee\n"); + m = c; continue; - } + } updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, eng_verts.at(c)); DEBUG_PRINTF("added to %p, top %u\n", m, top); - } -} - + } +} + void mergeCastleSuffixes(RoseBuildImpl &build) { - DEBUG_PRINTF("entry\n"); - + DEBUG_PRINTF("entry\n"); + if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { - return; - } - + return; + } + unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; map<CharReach, vector<CastleProto *>> by_reach; - + RoseGraph &g = build.g; - - for (auto v : vertices_range(g)) { - if (!g[v].suffix.castle) { - continue; - } - + + for (auto v : vertices_range(g)) { + if (!g[v].suffix.castle) { + continue; + } + CastleProto *c = g[v].suffix.castle.get(); - - if (c->repeats.size() != 1) { - // This code assumes it's the only place merging is being done. - assert(0); - continue; - } - + + if (c->repeats.size() != 1) { + // This code assumes it's the only place merging is being done. + assert(0); + continue; + } + if (!contains(eng_verts, c)) { - by_reach[c->reach()].push_back(c); - } + by_reach[c->reach()].push_back(c); + } eng_verts[c].push_back(v); - } - + } + for (auto &chunk : by_reach | map_values) { mergeCastleSuffixChunk(g, chunk, eng_verts); - } -} - -} // namespace ue2 + } +} + +} // namespace ue2 |