Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.

author: Ivan Blinkov <ivan@blinkov.ru> 2022-02-10 16:47:10 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:10 +0300
commit: 1aeb9a455974457866f78722ad98114bafc84e8a (patch)
tree: e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp
parent: bd5ef432f5cfb1e18851381329d94665a4c22470 (diff)
download: ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz
1 files changed, 3047 insertions, 3047 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp
index 685d452150..4eb4196da1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp
@@ -1,3068 +1,3068 @@
-/*
+/* 
  * Copyright (c) 2016-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *  * Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of Intel Corporation nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "ng_violet.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_dominators.h"
-#include "ng_dump.h"
-#include "ng_equivalence.h"
-#include "ng_holder.h"
-#include "ng_is_equal.h"
-#include "ng_literal_analysis.h"
-#include "ng_limex.h"
-#include "ng_mcclellan.h"
-#include "ng_netflow.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_reports.h"
-#include "ng_split.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "nfa/rdfa.h"
-#include "rose/rose_build.h"
-#include "rose/rose_build_util.h"
-#include "rose/rose_in_dump.h"
-#include "rose/rose_in_graph.h"
-#include "rose/rose_in_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+ * 
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met: 
+ * 
+ *  * Redistributions of source code must retain the above copyright notice, 
+ *    this list of conditions and the following disclaimer. 
+ *  * Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ *  * Neither the name of Intel Corporation nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+ 
+#include "config.h" 
+ 
+#include "ng_violet.h" 
+ 
+#include "grey.h" 
+#include "ng_depth.h" 
+#include "ng_dominators.h" 
+#include "ng_dump.h" 
+#include "ng_equivalence.h" 
+#include "ng_holder.h" 
+#include "ng_is_equal.h" 
+#include "ng_literal_analysis.h" 
+#include "ng_limex.h" 
+#include "ng_mcclellan.h" 
+#include "ng_netflow.h" 
+#include "ng_prune.h" 
+#include "ng_redundancy.h" 
+#include "ng_region.h" 
+#include "ng_reports.h" 
+#include "ng_split.h" 
+#include "ng_util.h" 
+#include "ng_width.h" 
+#include "nfa/rdfa.h" 
+#include "rose/rose_build.h" 
+#include "rose/rose_build_util.h" 
+#include "rose/rose_in_dump.h" 
+#include "rose/rose_in_graph.h" 
+#include "rose/rose_in_util.h" 
+#include "util/compare.h" 
+#include "util/compile_context.h" 
+#include "util/container.h" 
+#include "util/flat_containers.h" 
+#include "util/graph.h" 
+#include "util/graph_range.h" 
 #include "util/graph_small_color_map.h"
-#include "util/insertion_ordered.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/target_info.h"
-#include "util/ue2string.h"
-
-#include <set>
-#include <utility>
-#include <vector>
-#include <boost/dynamic_bitset.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-#define STAGE_DEBUG_PRINTF DEBUG_PRINTF
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-/* createsAnchoredLHS() is conservative as the depths take into account
- * back edges that come from beyond the split point and would be missing after
- * the graph is split. */
-static
-bool createsAnchoredLHS(const NGHolder &g, const vector<NFAVertex> &vv,
-                        const vector<NFAVertexDepth> &depths,
-                        const Grey &grey, depth max_depth = depth::infinity()) {
-    max_depth = min(max_depth, depth(grey.maxAnchoredRegion));
-
-    for (auto v : vv) {
-        /* avoid issues of self loops blowing out depths:
-         *     look at preds, add 1 */
-        for (auto u : inv_adjacent_vertices_range(v, g)) {
-            if (u == v) {
-                continue;
-            }
-
-            u32 idx = g[u].index;
-            assert(idx < depths.size());
-            if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-/* createsTransientLHS() is conservative as the depths take into account
- * back edges that come from beyond the split point and would be missing after
- * the graph is split. */
-static
-bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv,
-                         const vector<NFAVertexDepth> &depths,
-                         const Grey &grey) {
-    const depth max_depth(grey.maxHistoryAvailable);
-
-    for (auto v : vv) {
-        /* avoid issues of self loops blowing out depths:
-         *     look at preds, add 1 */
-        for (auto u : inv_adjacent_vertices_range(v, g)) {
-            if (u == v) {
-                continue;
-            }
-
-            u32 idx = g[u].index;
-            assert(idx < depths.size());
-            if (maxDistFromInit(depths.at(idx)) >= max_depth) {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
+#include "util/insertion_ordered.h" 
+#include "util/make_unique.h" 
+#include "util/order_check.h" 
+#include "util/target_info.h" 
+#include "util/ue2string.h" 
+ 
+#include <set> 
+#include <utility> 
+#include <vector> 
+#include <boost/dynamic_bitset.hpp> 
+#include <boost/range/adaptor/map.hpp> 
+ 
+#define STAGE_DEBUG_PRINTF DEBUG_PRINTF 
+ 
+using namespace std; 
+using boost::adaptors::map_values; 
+ 
+namespace ue2 { 
+ 
+/* createsAnchoredLHS() is conservative as the depths take into account 
+ * back edges that come from beyond the split point and would be missing after 
+ * the graph is split. */ 
+static 
+bool createsAnchoredLHS(const NGHolder &g, const vector<NFAVertex> &vv, 
+                        const vector<NFAVertexDepth> &depths, 
+                        const Grey &grey, depth max_depth = depth::infinity()) { 
+    max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); 
+ 
+    for (auto v : vv) { 
+        /* avoid issues of self loops blowing out depths: 
+         *     look at preds, add 1 */ 
+        for (auto u : inv_adjacent_vertices_range(v, g)) { 
+            if (u == v) { 
+                continue; 
+            } 
+ 
+            u32 idx = g[u].index; 
+            assert(idx < depths.size()); 
+            if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { 
+                return false; 
+            } 
+        } 
+    } 
+    return true; 
+} 
+ 
+/* createsTransientLHS() is conservative as the depths take into account 
+ * back edges that come from beyond the split point and would be missing after 
+ * the graph is split. */ 
+static 
+bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv, 
+                         const vector<NFAVertexDepth> &depths, 
+                         const Grey &grey) { 
+    const depth max_depth(grey.maxHistoryAvailable); 
+ 
+    for (auto v : vv) { 
+        /* avoid issues of self loops blowing out depths: 
+         *     look at preds, add 1 */ 
+        for (auto u : inv_adjacent_vertices_range(v, g)) { 
+            if (u == v) { 
+                continue; 
+            } 
+ 
+            u32 idx = g[u].index; 
+            assert(idx < depths.size()); 
+            if (maxDistFromInit(depths.at(idx)) >= max_depth) { 
+                return false; 
+            } 
+        } 
+    } 
+    return true; 
+} 
+ 
 /**
  * Counts the number of vertices that are reachable from the set of sources
  * given.
  */
-static
+static 
 size_t count_reachable(const NGHolder &g, const vector<NFAVertex> &sources,
                 small_color_map<decltype(get(vertex_index, g))> &color_map) {
     auto null_visitor = boost::make_dfs_visitor(boost::null_visitor());
     color_map.fill(small_color::white);
-
+ 
     for (auto v : sources) {
         boost::depth_first_visit(g, v, null_visitor, color_map);
     }
 
     return color_map.count(small_color::black);
-}
-
-static
-size_t shorter_than(const set<ue2_literal> &s, size_t limit) {
-    return count_if(s.begin(), s.end(),
-                    [&](const ue2_literal &a) { return a.length() < limit; });
-}
-
-static
-u32 min_len(const set<ue2_literal> &s) {
-    u32 rv = ~0U;
-
-    for (const auto &lit : s) {
-        rv = min(rv, (u32)lit.length());
-    }
-
-    return rv;
-}
-
-static
-u32 min_period(const set<ue2_literal> &s) {
-    u32 rv = ~0U;
-
-    for (const auto &lit : s) {
-        rv = min(rv, (u32)minStringPeriod(lit));
-    }
-    DEBUG_PRINTF("min period %u\n", rv);
-    return rv;
-}
-
-namespace {
-/**
- * Information on a cut: vertices and literals.
- */
-struct VertLitInfo {
-    VertLitInfo() {}
-    VertLitInfo(NFAVertex v, const set<ue2_literal> &litlit, bool c_anch,
-                bool c_tran = false)
-        : vv(vector<NFAVertex>(1, v)), lit(litlit), creates_anchored(c_anch),
-          creates_transient(c_tran) {}
-    VertLitInfo(const vector<NFAVertex> &vv_in, const set<ue2_literal> &lit_in,
-                bool c_anch)
-        : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {}
-    vector<NFAVertex> vv;
-    set<ue2_literal> lit;
-
-    bool creates_anchored = false;
-    bool creates_transient = false;
-    double split_ratio = 0;
-};
-
-#define LAST_CHANCE_STRONG_LEN 1
-
-/**
- * \brief Comparator class for comparing different literal cuts.
- */
-class LitComparator {
-public:
-    LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc)
-        : g(g_in), seeking_anchored(sa), seeking_transient(st),
-          last_chance(lc) {}
-    bool operator()(const unique_ptr<VertLitInfo> &a,
-                    const unique_ptr<VertLitInfo> &b) const {
-        assert(a && b);
-
-        if (seeking_anchored) {
-            if (a->creates_anchored != b->creates_anchored) {
-                return a->creates_anchored < b->creates_anchored;
-            }
-        }
-
-        if (seeking_transient) {
-            if (a->creates_transient != b->creates_transient) {
-                return a->creates_transient < b->creates_transient;
-            }
-        }
-
-        if (last_chance
-            && min_len(a->lit) > LAST_CHANCE_STRONG_LEN
-            && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) {
-            DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio,
-                          b->split_ratio);
-            return a->split_ratio < b->split_ratio;
-        }
-
-        u64a score_a = scoreSet(a->lit);
-        u64a score_b = scoreSet(b->lit);
-
-        if (score_a != score_b) {
-            return score_a > score_b;
-        }
-
-        /* vertices should only be in one candidate cut */
-        assert(a->vv == b->vv || a->vv.front() != b->vv.front());
-        return g[a->vv.front()].index > g[b->vv.front()].index;
-    }
-
-private:
-    const NGHolder &g; /**< graph on which cuts are found */
-
-    bool seeking_anchored;
-    bool seeking_transient;
-    bool last_chance;
-};
-}
-
-#define MIN_ANCHORED_LEN 2
-#define MIN_ANCHORED_DESPERATE_LEN 1
-
-/* anchored here means that the cut creates a 'usefully' anchored LHS */
-static
-bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score,
-                                   bool anchored, u32 min_allowed_floating_len,
-                                   bool desperation, bool last_chance) {
-    u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN
-                                   : min_allowed_floating_len;
-    if (anchored && last_chance) {
-        min_allowed_len = MIN_ANCHORED_DESPERATE_LEN;
-    }
-    if (last_chance) {
-        desperation = true;
-    }
-
-    DEBUG_PRINTF("validating%s set, min allowed len %u\n",
-                 anchored ? " anchored" : "", min_allowed_len);
-
-    assert(none_of(begin(s), end(s), bad_mixed_sensitivity));
-
-    if (score >= NO_LITERAL_AT_EDGE_SCORE) {
-        DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size());
-        return false;
-    }
-
-    assert(!s.empty());
-    if (s.empty()) {
-        DEBUG_PRINTF("candidate is too bad/something went wrong\n");
-        return false;
-    }
-
-    u32 s_min_len = min_len(s);
-    u32 s_min_period = min_period(s);
-    size_t short_count = shorter_than(s, 5);
-
-    DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u"
-                 " short_count=%zu desp=%d\n",
-                 dumpString(*s.begin()).c_str(), score, s.size(), s_min_len,
-                 s_min_period, short_count, (int)desperation);
-
-    bool ok = true;
-
-    if (s.size() > 10 /* magic number is magic */
-        || s_min_len < min_allowed_len
-        || (s_min_period <= 1 && min_allowed_len != 1)) {
-        DEBUG_PRINTF("candidate may be bad\n");
-        ok = false;
-    }
-
-    if (!ok && desperation
-        && s.size() <= 20 /* more magic numbers are magical */
-        && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10))
-        && s_min_period > 1) {
-        DEBUG_PRINTF("candidate is ok\n");
-        ok = true;
-    }
-
-    if (!ok && desperation
-        && s.size() <= 50 /* more magic numbers are magical */
-        && s_min_len > 10
-        && s_min_period > 1) {
-        DEBUG_PRINTF("candidate is ok\n");
-        ok = true;
-    }
-
-    if (!ok) {
-        DEBUG_PRINTF("candidate is too shitty\n");
-        return false;
-    }
-
-    return true;
-}
-
-static UNUSED
-void dumpRoseLiteralSet(const set<ue2_literal> &s) {
-    for (UNUSED const auto &lit : s) {
-        DEBUG_PRINTF("    lit: %s\n", dumpString(lit).c_str());
-    }
-}
-
-static
-void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored,
-                           const vector<NFAVertexDepth> *depths,
-                           const set<NFAVertex> &a_dom,
-                           vector<unique_ptr<VertLitInfo>> *lits,
-                           u32 min_allowed_len, bool desperation,
-                           bool last_chance, const CompileContext &cc) {
-    assert(depths || !seeking_anchored);
-
-    map<NFAVertex, u64a> scores;
-    map<NFAVertex, unique_ptr<VertLitInfo>> lit_info;
-    set<ue2_literal> s;
-
-    for (auto v : a_dom) {
-        s = getLiteralSet(g, v, true); /* RHS will take responsibility for any
-                                          revisits to the target vertex */
-
-        if (s.empty()) {
-            DEBUG_PRINTF("candidate is too shitty\n");
-            continue;
-        }
-
-        DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size());
-        dumpRoseLiteralSet(s);
-        u64a score = sanitizeAndCompressAndScore(s);
-
-        bool anchored = false;
-        if (seeking_anchored) {
-            anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey);
-        }
-
-        if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
-                                           desperation, last_chance)) {
-            continue;
-        }
-
-        DEBUG_PRINTF("candidate is a candidate\n");
-        scores[v] = score;
-        lit_info[v] = std::make_unique<VertLitInfo>(v, s, anchored);
-    }
-
-    /* try to filter out cases where appending some characters produces worse
-     * literals. Only bother to look back one byte, TODO make better */
-    for (auto u : a_dom) {
-        if (out_degree(u, g) != 1 || !scores[u]) {
-            continue;
-        }
-        NFAVertex v = *adjacent_vertices(u, g).first;
-        if (contains(scores, v) && scores[v] >= scores[u]) {
-            DEBUG_PRINTF("killing off v as score %llu >= %llu\n",
-                         scores[v], scores[u]);
-            lit_info.erase(v);
-        }
-    }
-
-    lits->reserve(lit_info.size());
-    for (auto &m : lit_info) {
-        lits->push_back(move(m.second));
-    }
-    DEBUG_PRINTF("%zu candidate literal sets\n", lits->size());
-}
-
-static
-void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored,
-                           const vector<NFAVertexDepth> *depths,
-                           const set<NFAVertex> &bad,
-                           const set<NFAVertex> *allowed,
-                           vector<unique_ptr<VertLitInfo>> *lits,
-                           u32 min_allowed_len, bool desperation,
-                           bool last_chance, const CompileContext &cc) {
-    /* This allows us to get more places to split the graph as we are not
-       limited to points where there is a single vertex to split at. */
-
-    assert(depths || !seeking_anchored);
-
-    /* TODO: operate over 'proto-regions' which ignore back edges */
-    auto regions = assignRegions(g);
-
-    set<u32> mand, optional;
-    map<u32, vector<NFAVertex> > exits;
-
-    for (auto v : vertices_range(g)) {
-        u32 region = regions[v];
-        if (is_any_start(v, g) || region == 0) {
-            continue;
-        }
-
-        if (is_any_accept(v, g)) {
-            continue;
-        }
-
-        if (!generates_callbacks(g) && is_match_vertex(v, g)) {
-            /* we cannot leave a completely vacuous infix */
-            continue;
-        }
-
-        if (isRegionExit(g, v, regions)) {
-            exits[region].push_back(v);
-        }
-
-        if (isRegionEntry(g, v, regions)) {
-            // Determine whether this region is mandatory or optional. We only
-            // need to do this check for the first entry vertex we encounter
-            // for this region.
-            if (!contains(mand, region) && !contains(optional, region)) {
-                if (isOptionalRegion(g, v, regions)) {
-                    optional.insert(region);
-                } else {
-                    mand.insert(region);
-                }
-            }
-        }
-    }
-
-    for (const auto &m : exits) {
-        if (false) {
-        next_cand:
-            continue;
-        }
-
-        const u32 region = m.first;
-        const vector<NFAVertex> &vv = m.second;
-        assert(!vv.empty());
-
-        if (!contains(mand, region)) {
-            continue;
-        }
-
-        for (auto v : vv) {
-             /* if an exit is in bad, the region is already handled well
-              * by getSimpleRoseLiterals or is otherwise bad */
-            if (contains(bad, v)) {
-                goto next_cand;
-            }
-            /* if we are only allowed to consider some vertices, v must be in
-               the list; */
-            if (allowed && !contains(*allowed, v)) {
-                goto next_cand;
-            }
-        }
-
-        /* the final region may not have a neat exit. validate that all exits
-         * have an edge to each accept or none do */
-        bool edge_to_a = edge(vv[0], g.accept, g).second;
-        bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second;
-        const auto &reports = g[vv[0]].reports;
-        for (auto v : vv) {
-            if (edge_to_a != edge(v, g.accept, g).second) {
-                goto next_cand;
-            }
-
-            if (edge_to_aeod != edge(v, g.acceptEod, g).second) {
-                goto next_cand;
-            }
-
-            if (g[v].reports != reports) {
-                goto next_cand;
-            }
-        }
-
-        DEBUG_PRINTF("inspecting region %u\n", region);
-        set<ue2_literal> s;
-        for (auto v : vv) {
-            DEBUG_PRINTF("   exit vertex: %zu\n", g[v].index);
-            /* Note: RHS can not be depended on to take all subsequent revisits
-             * to this vertex */
-            set<ue2_literal> ss = getLiteralSet(g, v, false);
-            if (ss.empty()) {
-                DEBUG_PRINTF("candidate is too shitty\n");
-                goto next_cand;
-            }
-            insert(&s, ss);
-        }
-
-        assert(!s.empty());
-
-        DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size());
-        dumpRoseLiteralSet(s);
-        u64a score = sanitizeAndCompressAndScore(s);
-
-        DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size());
-        dumpRoseLiteralSet(s);
-
-        bool anchored = false;
-        if (seeking_anchored) {
-            anchored = createsAnchoredLHS(g, vv, *depths, cc.grey);
-        }
-
-        if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
-                                           desperation, last_chance)) {
-            goto next_cand;
-        }
-
-        DEBUG_PRINTF("candidate is a candidate\n");
-        lits->push_back(std::make_unique<VertLitInfo>(vv, s, anchored));
-    }
-}
-
-static
-void filterCandPivots(const NGHolder &g, const set<NFAVertex> &cand_raw,
-                      set<NFAVertex> *out) {
-    for (auto u : cand_raw) {
-        const CharReach &u_cr = g[u].char_reach;
-        if (u_cr.count() > 40) {
-            continue; /* too wide to be plausible */
-        }
-
-        if (u_cr.count() > 2) {
-            /* include u as a candidate as successor may have backed away from
-             * expanding through it */
-            out->insert(u);
-            continue;
-        }
-
-        NFAVertex v = getSoleDestVertex(g, u);
-        if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) {
-            const CharReach &v_cr = g[v].char_reach;
-            if (v_cr.count() == 1 || v_cr.isCaselessChar()) {
-                continue; /* v will always generate better literals */
-            }
-        }
-
-        out->insert(u);
-    }
-}
-
-/* cand_raw is the candidate set before filtering points which are clearly
- * a bad idea. */
-static
-void getCandidatePivots(const NGHolder &g, set<NFAVertex> *cand,
-                        set<NFAVertex> *cand_raw) {
-    auto dominators = findDominators(g);
-
-    set<NFAVertex> accepts;
-
-    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
-        if (is_special(v, g)) {
-            continue;
-        }
-        accepts.insert(v);
-    }
-    for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
-        if (is_special(v, g)) {
-            continue;
-        }
-        accepts.insert(v);
-    }
-
-    assert(!accepts.empty());
-
-    vector<NFAVertex> dom_trace;
-    auto ait = accepts.begin();
-    assert(ait != accepts.end());
-    NFAVertex curr = *ait;
-    while (curr && !is_special(curr, g)) {
-        dom_trace.push_back(curr);
-        curr = dominators[curr];
-    }
-    reverse(dom_trace.begin(), dom_trace.end());
-    for (++ait; ait != accepts.end(); ++ait) {
-        curr = *ait;
-        vector<NFAVertex> dom_trace2;
-        while (curr && !is_special(curr, g)) {
-            dom_trace2.push_back(curr);
-            curr = dominators[curr];
-        }
-        reverse(dom_trace2.begin(), dom_trace2.end());
-        auto dti = dom_trace.begin(), dtie = dom_trace.end();
-        auto dtj = dom_trace2.begin(), dtje = dom_trace2.end();
-        while (dti != dtie && dtj != dtje && *dti == *dtj) {
-            ++dti;
-            ++dtj;
-        }
-        dom_trace.erase(dti, dtie);
-    }
-
-    cand_raw->insert(dom_trace.begin(), dom_trace.end());
-
-    filterCandPivots(g, *cand_raw, cand);
-}
-
-static
-unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
-                                      const vector<NFAVertexDepth> *depths,
-                                      bool for_prefix, u32 min_len,
-                                      const set<NFAVertex> *allowed_cand,
-                                      const set<NFAVertex> *disallowed_cand,
-                                      bool last_chance,
-                                      const CompileContext &cc) {
-    assert(!for_prefix || depths);
-
-    /* look for a single simple split point */
-    set<NFAVertex> cand;
-    set<NFAVertex> cand_raw;
-
-    getCandidatePivots(g, &cand, &cand_raw);
-
-    if (allowed_cand) {
-        set<NFAVertex> cand2;
-        set<NFAVertex> cand2_raw;
-        set_intersection(allowed_cand->begin(), allowed_cand->end(),
-                         cand.begin(), cand.end(),
-                         inserter(cand2, cand2.begin()));
-
-        set_intersection(allowed_cand->begin(), allowed_cand->end(),
-                         cand_raw.begin(), cand_raw.end(),
-                         inserter(cand2_raw, cand2_raw.begin()));
-
-        cand = std::move(cand2);
-        cand_raw = std::move(cand2_raw);
-    }
-    if (disallowed_cand) {
-        DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size());
-        DEBUG_PRINTF("|old cand| = %zu\n", cand.size());
-        erase_all(&cand, *disallowed_cand);
-        insert(&cand_raw, *disallowed_cand);
-    }
-
-    if (!generates_callbacks(g)) {
-        /* not output exposed so must leave some RHS */
-        for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) {
-            cand.erase(v);
-            cand_raw.erase(v);
-        }
-
-        for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) {
-            cand.erase(v);
-            cand_raw.erase(v);
-        }
-    }
-
-    DEBUG_PRINTF("|cand| = %zu\n", cand.size());
-
-    bool seeking_anchored = for_prefix;
-    bool seeking_transient = for_prefix;
-
-    bool desperation = for_prefix && cc.streaming;
-
-    vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */
-
-    getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len,
-                          desperation, last_chance, cc);
-    getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand,
-                          &lits, min_len, desperation, last_chance, cc);
-
-    if (lits.empty()) {
-        DEBUG_PRINTF("no literals found\n");
-        return nullptr;
-    }
-
-    if (seeking_transient) {
-        for (auto &a : lits) {
-            a->creates_transient
-                = createsTransientLHS(g, a->vv, *depths, cc.grey);
-        }
-    }
-
-    if (last_chance) {
+} 
+ 
+static 
+size_t shorter_than(const set<ue2_literal> &s, size_t limit) { 
+    return count_if(s.begin(), s.end(), 
+                    [&](const ue2_literal &a) { return a.length() < limit; }); 
+} 
+ 
+static 
+u32 min_len(const set<ue2_literal> &s) { 
+    u32 rv = ~0U; 
+ 
+    for (const auto &lit : s) { 
+        rv = min(rv, (u32)lit.length()); 
+    } 
+ 
+    return rv; 
+} 
+ 
+static 
+u32 min_period(const set<ue2_literal> &s) { 
+    u32 rv = ~0U; 
+ 
+    for (const auto &lit : s) { 
+        rv = min(rv, (u32)minStringPeriod(lit)); 
+    } 
+    DEBUG_PRINTF("min period %u\n", rv); 
+    return rv; 
+} 
+ 
+namespace { 
+/** 
+ * Information on a cut: vertices and literals. 
+ */ 
+struct VertLitInfo { 
+    VertLitInfo() {} 
+    VertLitInfo(NFAVertex v, const set<ue2_literal> &litlit, bool c_anch, 
+                bool c_tran = false) 
+        : vv(vector<NFAVertex>(1, v)), lit(litlit), creates_anchored(c_anch), 
+          creates_transient(c_tran) {} 
+    VertLitInfo(const vector<NFAVertex> &vv_in, const set<ue2_literal> &lit_in, 
+                bool c_anch) 
+        : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} 
+    vector<NFAVertex> vv; 
+    set<ue2_literal> lit; 
+ 
+    bool creates_anchored = false; 
+    bool creates_transient = false; 
+    double split_ratio = 0; 
+}; 
+ 
+#define LAST_CHANCE_STRONG_LEN 1 
+ 
+/** 
+ * \brief Comparator class for comparing different literal cuts. 
+ */ 
+class LitComparator { 
+public: 
+    LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc) 
+        : g(g_in), seeking_anchored(sa), seeking_transient(st), 
+          last_chance(lc) {} 
+    bool operator()(const unique_ptr<VertLitInfo> &a, 
+                    const unique_ptr<VertLitInfo> &b) const { 
+        assert(a && b); 
+ 
+        if (seeking_anchored) { 
+            if (a->creates_anchored != b->creates_anchored) { 
+                return a->creates_anchored < b->creates_anchored; 
+            } 
+        } 
+ 
+        if (seeking_transient) { 
+            if (a->creates_transient != b->creates_transient) { 
+                return a->creates_transient < b->creates_transient; 
+            } 
+        } 
+ 
+        if (last_chance 
+            && min_len(a->lit) > LAST_CHANCE_STRONG_LEN 
+            && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) { 
+            DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio, 
+                          b->split_ratio); 
+            return a->split_ratio < b->split_ratio; 
+        } 
+ 
+        u64a score_a = scoreSet(a->lit); 
+        u64a score_b = scoreSet(b->lit); 
+ 
+        if (score_a != score_b) { 
+            return score_a > score_b; 
+        } 
+ 
+        /* vertices should only be in one candidate cut */ 
+        assert(a->vv == b->vv || a->vv.front() != b->vv.front()); 
+        return g[a->vv.front()].index > g[b->vv.front()].index; 
+    } 
+ 
+private: 
+    const NGHolder &g; /**< graph on which cuts are found */ 
+ 
+    bool seeking_anchored; 
+    bool seeking_transient; 
+    bool last_chance; 
+}; 
+} 
+ 
+#define MIN_ANCHORED_LEN 2 
+#define MIN_ANCHORED_DESPERATE_LEN 1 
+ 
+/* anchored here means that the cut creates a 'usefully' anchored LHS */ 
+static 
+bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score, 
+                                   bool anchored, u32 min_allowed_floating_len, 
+                                   bool desperation, bool last_chance) { 
+    u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN 
+                                   : min_allowed_floating_len; 
+    if (anchored && last_chance) { 
+        min_allowed_len = MIN_ANCHORED_DESPERATE_LEN; 
+    } 
+    if (last_chance) { 
+        desperation = true; 
+    } 
+ 
+    DEBUG_PRINTF("validating%s set, min allowed len %u\n", 
+                 anchored ? " anchored" : "", min_allowed_len); 
+ 
+    assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); 
+ 
+    if (score >= NO_LITERAL_AT_EDGE_SCORE) { 
+        DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); 
+        return false; 
+    } 
+ 
+    assert(!s.empty()); 
+    if (s.empty()) { 
+        DEBUG_PRINTF("candidate is too bad/something went wrong\n"); 
+        return false; 
+    } 
+ 
+    u32 s_min_len = min_len(s); 
+    u32 s_min_period = min_period(s); 
+    size_t short_count = shorter_than(s, 5); 
+ 
+    DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" 
+                 " short_count=%zu desp=%d\n", 
+                 dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, 
+                 s_min_period, short_count, (int)desperation); 
+ 
+    bool ok = true; 
+ 
+    if (s.size() > 10 /* magic number is magic */ 
+        || s_min_len < min_allowed_len 
+        || (s_min_period <= 1 && min_allowed_len != 1)) { 
+        DEBUG_PRINTF("candidate may be bad\n"); 
+        ok = false; 
+    } 
+ 
+    if (!ok && desperation 
+        && s.size() <= 20 /* more magic numbers are magical */ 
+        && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) 
+        && s_min_period > 1) { 
+        DEBUG_PRINTF("candidate is ok\n"); 
+        ok = true; 
+    } 
+ 
+    if (!ok && desperation 
+        && s.size() <= 50 /* more magic numbers are magical */ 
+        && s_min_len > 10 
+        && s_min_period > 1) { 
+        DEBUG_PRINTF("candidate is ok\n"); 
+        ok = true; 
+    } 
+ 
+    if (!ok) { 
+        DEBUG_PRINTF("candidate is too shitty\n"); 
+        return false; 
+    } 
+ 
+    return true; 
+} 
+ 
+static UNUSED 
+void dumpRoseLiteralSet(const set<ue2_literal> &s) { 
+    for (UNUSED const auto &lit : s) { 
+        DEBUG_PRINTF("    lit: %s\n", dumpString(lit).c_str()); 
+    } 
+} 
+ 
+static 
+void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, 
+                           const vector<NFAVertexDepth> *depths, 
+                           const set<NFAVertex> &a_dom, 
+                           vector<unique_ptr<VertLitInfo>> *lits, 
+                           u32 min_allowed_len, bool desperation, 
+                           bool last_chance, const CompileContext &cc) { 
+    assert(depths || !seeking_anchored); 
+ 
+    map<NFAVertex, u64a> scores; 
+    map<NFAVertex, unique_ptr<VertLitInfo>> lit_info; 
+    set<ue2_literal> s; 
+ 
+    for (auto v : a_dom) { 
+        s = getLiteralSet(g, v, true); /* RHS will take responsibility for any 
+                                          revisits to the target vertex */ 
+ 
+        if (s.empty()) { 
+            DEBUG_PRINTF("candidate is too shitty\n"); 
+            continue; 
+        } 
+ 
+        DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); 
+        dumpRoseLiteralSet(s); 
+        u64a score = sanitizeAndCompressAndScore(s); 
+ 
+        bool anchored = false; 
+        if (seeking_anchored) { 
+            anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey); 
+        } 
+ 
+        if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, 
+                                           desperation, last_chance)) { 
+            continue; 
+        } 
+ 
+        DEBUG_PRINTF("candidate is a candidate\n"); 
+        scores[v] = score; 
+        lit_info[v] = std::make_unique<VertLitInfo>(v, s, anchored); 
+    } 
+ 
+    /* try to filter out cases where appending some characters produces worse 
+     * literals. Only bother to look back one byte, TODO make better */ 
+    for (auto u : a_dom) { 
+        if (out_degree(u, g) != 1 || !scores[u]) { 
+            continue; 
+        } 
+        NFAVertex v = *adjacent_vertices(u, g).first; 
+        if (contains(scores, v) && scores[v] >= scores[u]) { 
+            DEBUG_PRINTF("killing off v as score %llu >= %llu\n", 
+                         scores[v], scores[u]); 
+            lit_info.erase(v); 
+        } 
+    } 
+ 
+    lits->reserve(lit_info.size()); 
+    for (auto &m : lit_info) { 
+        lits->push_back(move(m.second)); 
+    } 
+    DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); 
+} 
+ 
+static 
+void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, 
+                           const vector<NFAVertexDepth> *depths, 
+                           const set<NFAVertex> &bad, 
+                           const set<NFAVertex> *allowed, 
+                           vector<unique_ptr<VertLitInfo>> *lits, 
+                           u32 min_allowed_len, bool desperation, 
+                           bool last_chance, const CompileContext &cc) { 
+    /* This allows us to get more places to split the graph as we are not 
+       limited to points where there is a single vertex to split at. */ 
+ 
+    assert(depths || !seeking_anchored); 
+ 
+    /* TODO: operate over 'proto-regions' which ignore back edges */ 
+    auto regions = assignRegions(g); 
+ 
+    set<u32> mand, optional; 
+    map<u32, vector<NFAVertex> > exits; 
+ 
+    for (auto v : vertices_range(g)) { 
+        u32 region = regions[v]; 
+        if (is_any_start(v, g) || region == 0) { 
+            continue; 
+        } 
+ 
+        if (is_any_accept(v, g)) { 
+            continue; 
+        } 
+ 
+        if (!generates_callbacks(g) && is_match_vertex(v, g)) { 
+            /* we cannot leave a completely vacuous infix */ 
+            continue; 
+        } 
+ 
+        if (isRegionExit(g, v, regions)) { 
+            exits[region].push_back(v); 
+        } 
+ 
+        if (isRegionEntry(g, v, regions)) { 
+            // Determine whether this region is mandatory or optional. We only 
+            // need to do this check for the first entry vertex we encounter 
+            // for this region. 
+            if (!contains(mand, region) && !contains(optional, region)) { 
+                if (isOptionalRegion(g, v, regions)) { 
+                    optional.insert(region); 
+                } else { 
+                    mand.insert(region); 
+                } 
+            } 
+        } 
+    } 
+ 
+    for (const auto &m : exits) { 
+        if (false) { 
+        next_cand: 
+            continue; 
+        } 
+ 
+        const u32 region = m.first; 
+        const vector<NFAVertex> &vv = m.second; 
+        assert(!vv.empty()); 
+ 
+        if (!contains(mand, region)) { 
+            continue; 
+        } 
+ 
+        for (auto v : vv) { 
+             /* if an exit is in bad, the region is already handled well 
+              * by getSimpleRoseLiterals or is otherwise bad */ 
+            if (contains(bad, v)) { 
+                goto next_cand; 
+            } 
+            /* if we are only allowed to consider some vertices, v must be in 
+               the list; */ 
+            if (allowed && !contains(*allowed, v)) { 
+                goto next_cand; 
+            } 
+        } 
+ 
+        /* the final region may not have a neat exit. validate that all exits 
+         * have an edge to each accept or none do */ 
+        bool edge_to_a = edge(vv[0], g.accept, g).second; 
+        bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; 
+        const auto &reports = g[vv[0]].reports; 
+        for (auto v : vv) { 
+            if (edge_to_a != edge(v, g.accept, g).second) { 
+                goto next_cand; 
+            } 
+ 
+            if (edge_to_aeod != edge(v, g.acceptEod, g).second) { 
+                goto next_cand; 
+            } 
+ 
+            if (g[v].reports != reports) { 
+                goto next_cand; 
+            } 
+        } 
+ 
+        DEBUG_PRINTF("inspecting region %u\n", region); 
+        set<ue2_literal> s; 
+        for (auto v : vv) { 
+            DEBUG_PRINTF("   exit vertex: %zu\n", g[v].index); 
+            /* Note: RHS can not be depended on to take all subsequent revisits 
+             * to this vertex */ 
+            set<ue2_literal> ss = getLiteralSet(g, v, false); 
+            if (ss.empty()) { 
+                DEBUG_PRINTF("candidate is too shitty\n"); 
+                goto next_cand; 
+            } 
+            insert(&s, ss); 
+        } 
+ 
+        assert(!s.empty()); 
+ 
+        DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); 
+        dumpRoseLiteralSet(s); 
+        u64a score = sanitizeAndCompressAndScore(s); 
+ 
+        DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); 
+        dumpRoseLiteralSet(s); 
+ 
+        bool anchored = false; 
+        if (seeking_anchored) { 
+            anchored = createsAnchoredLHS(g, vv, *depths, cc.grey); 
+        } 
+ 
+        if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, 
+                                           desperation, last_chance)) { 
+            goto next_cand; 
+        } 
+ 
+        DEBUG_PRINTF("candidate is a candidate\n"); 
+        lits->push_back(std::make_unique<VertLitInfo>(vv, s, anchored)); 
+    } 
+} 
+ 
+static 
+void filterCandPivots(const NGHolder &g, const set<NFAVertex> &cand_raw, 
+                      set<NFAVertex> *out) { 
+    for (auto u : cand_raw) { 
+        const CharReach &u_cr = g[u].char_reach; 
+        if (u_cr.count() > 40) { 
+            continue; /* too wide to be plausible */ 
+        } 
+ 
+        if (u_cr.count() > 2) { 
+            /* include u as a candidate as successor may have backed away from 
+             * expanding through it */ 
+            out->insert(u); 
+            continue; 
+        } 
+ 
+        NFAVertex v = getSoleDestVertex(g, u); 
+        if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { 
+            const CharReach &v_cr = g[v].char_reach; 
+            if (v_cr.count() == 1 || v_cr.isCaselessChar()) { 
+                continue; /* v will always generate better literals */ 
+            } 
+        } 
+ 
+        out->insert(u); 
+    } 
+} 
+ 
+/* cand_raw is the candidate set before filtering points which are clearly 
+ * a bad idea. */ 
+static 
+void getCandidatePivots(const NGHolder &g, set<NFAVertex> *cand, 
+                        set<NFAVertex> *cand_raw) { 
+    auto dominators = findDominators(g); 
+ 
+    set<NFAVertex> accepts; 
+ 
+    for (auto v : inv_adjacent_vertices_range(g.accept, g)) { 
+        if (is_special(v, g)) { 
+            continue; 
+        } 
+        accepts.insert(v); 
+    } 
+    for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { 
+        if (is_special(v, g)) { 
+            continue; 
+        } 
+        accepts.insert(v); 
+    } 
+ 
+    assert(!accepts.empty()); 
+ 
+    vector<NFAVertex> dom_trace; 
+    auto ait = accepts.begin(); 
+    assert(ait != accepts.end()); 
+    NFAVertex curr = *ait; 
+    while (curr && !is_special(curr, g)) { 
+        dom_trace.push_back(curr); 
+        curr = dominators[curr]; 
+    } 
+    reverse(dom_trace.begin(), dom_trace.end()); 
+    for (++ait; ait != accepts.end(); ++ait) { 
+        curr = *ait; 
+        vector<NFAVertex> dom_trace2; 
+        while (curr && !is_special(curr, g)) { 
+            dom_trace2.push_back(curr); 
+            curr = dominators[curr]; 
+        } 
+        reverse(dom_trace2.begin(), dom_trace2.end()); 
+        auto dti = dom_trace.begin(), dtie = dom_trace.end(); 
+        auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); 
+        while (dti != dtie && dtj != dtje && *dti == *dtj) { 
+            ++dti; 
+            ++dtj; 
+        } 
+        dom_trace.erase(dti, dtie); 
+    } 
+ 
+    cand_raw->insert(dom_trace.begin(), dom_trace.end()); 
+ 
+    filterCandPivots(g, *cand_raw, cand); 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g, 
+                                      const vector<NFAVertexDepth> *depths, 
+                                      bool for_prefix, u32 min_len, 
+                                      const set<NFAVertex> *allowed_cand, 
+                                      const set<NFAVertex> *disallowed_cand, 
+                                      bool last_chance, 
+                                      const CompileContext &cc) { 
+    assert(!for_prefix || depths); 
+ 
+    /* look for a single simple split point */ 
+    set<NFAVertex> cand; 
+    set<NFAVertex> cand_raw; 
+ 
+    getCandidatePivots(g, &cand, &cand_raw); 
+ 
+    if (allowed_cand) { 
+        set<NFAVertex> cand2; 
+        set<NFAVertex> cand2_raw; 
+        set_intersection(allowed_cand->begin(), allowed_cand->end(), 
+                         cand.begin(), cand.end(), 
+                         inserter(cand2, cand2.begin())); 
+ 
+        set_intersection(allowed_cand->begin(), allowed_cand->end(), 
+                         cand_raw.begin(), cand_raw.end(), 
+                         inserter(cand2_raw, cand2_raw.begin())); 
+ 
+        cand = std::move(cand2); 
+        cand_raw = std::move(cand2_raw); 
+    } 
+    if (disallowed_cand) { 
+        DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size()); 
+        DEBUG_PRINTF("|old cand| = %zu\n", cand.size()); 
+        erase_all(&cand, *disallowed_cand); 
+        insert(&cand_raw, *disallowed_cand); 
+    } 
+ 
+    if (!generates_callbacks(g)) { 
+        /* not output exposed so must leave some RHS */ 
+        for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) { 
+            cand.erase(v); 
+            cand_raw.erase(v); 
+        } 
+ 
+        for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) { 
+            cand.erase(v); 
+            cand_raw.erase(v); 
+        } 
+    } 
+ 
+    DEBUG_PRINTF("|cand| = %zu\n", cand.size()); 
+ 
+    bool seeking_anchored = for_prefix; 
+    bool seeking_transient = for_prefix; 
+ 
+    bool desperation = for_prefix && cc.streaming; 
+ 
+    vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */ 
+ 
+    getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, 
+                          desperation, last_chance, cc); 
+    getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, 
+                          &lits, min_len, desperation, last_chance, cc); 
+ 
+    if (lits.empty()) { 
+        DEBUG_PRINTF("no literals found\n"); 
+        return nullptr; 
+    } 
+ 
+    if (seeking_transient) { 
+        for (auto &a : lits) { 
+            a->creates_transient 
+                = createsTransientLHS(g, a->vv, *depths, cc.grey); 
+        } 
+    } 
+ 
+    if (last_chance) { 
         const size_t num_verts = num_vertices(g);
         auto color_map = make_small_color_map(g);
-        for (auto &a : lits) {
+        for (auto &a : lits) { 
             size_t num_reachable = count_reachable(g, a->vv, color_map);
             double ratio = (double)num_reachable / (double)num_verts;
             a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio;
-        }
-    }
-
-    auto cmp = LitComparator(g, seeking_anchored, seeking_transient,
-                             last_chance);
-
-    unique_ptr<VertLitInfo> best = move(lits.back());
-    lits.pop_back();
-    while (!lits.empty()) {
-        if (cmp(best, lits.back())) {
-            best = move(lits.back());
-        }
-        lits.pop_back();
-    }
-
-    DEBUG_PRINTF("best is '%s' %zu a%d t%d\n",
-        dumpString(*best->lit.begin()).c_str(),
-        g[best->vv.front()].index,
-        depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0,
-        depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0);
-
-    return best;
-}
-
-static
-void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ,
-                         bool overhang_ok, flat_set<NFAEdge> &bad) {
-    DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n",
-                 num_vertices(h), succ.length());
-
-    using EdgeSet = boost::dynamic_bitset<>;
-
-    const size_t edge_count = num_edges(h);
-    EdgeSet bad_edges(edge_count);
-
-    unordered_map<NFAVertex, EdgeSet> curr;
-    for (const auto &e : in_edges_range(h.accept, h)) {
-        auto &path_set = curr[source(e, h)];
-        if (path_set.empty()) {
-            path_set.resize(edge_count);
-        }
-        path_set.set(h[e].index);
-    }
-
-    unordered_map<NFAVertex, EdgeSet> next;
-    for (auto it = succ.rbegin(); it != succ.rend(); ++it) {
-        for (const auto &path : curr) {
-            NFAVertex u = path.first;
-            const auto &path_set = path.second;
-            if (u == h.start && overhang_ok) {
-                DEBUG_PRINTF("poisoning early %zu [overhang]\n",
-                             path_set.count());
-                bad_edges |= path_set;
-                continue;
-            }
-            if (overlaps(h[u].char_reach, *it)) {
-                for (const auto &e : in_edges_range(u, h)) {
-                    auto &new_path_set = next[source(e, h)];
-                    if (new_path_set.empty()) {
-                        new_path_set.resize(edge_count);
-                    }
-                    new_path_set |= path_set;
-                    new_path_set.set(h[e].index);
-                }
-            }
-        }
-        DEBUG_PRINTF("succ char matches at %zu paths\n", next.size());
-        assert(overhang_ok || !curr.empty());
-        swap(curr, next);
-        next.clear();
-    }
-
-    assert(overhang_ok || !curr.empty());
-    for (const auto &path : curr) {
-        bad_edges |= path.second;
-        DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count());
-    }
-
-    for (const auto &e : edges_range(h)) {
-        if (bad_edges.test(h[e].index)) {
-            bad.insert(e);
-        }
-    }
-}
-
-static
-void poisonForGoodPrefix(const NGHolder &h,
-                         const vector<NFAVertexDepth> &depths,
-                         flat_set<NFAEdge> &bad, const Grey &grey) {
-    for (const auto &v : vertices_range(h)) {
-        if (!createsAnchoredLHS(h, {v}, depths, grey)
-            && !createsTransientLHS(h, {v}, depths, grey)) {
-            insert(&bad, in_edges_range(v, h));
-        }
-    }
-}
-
-static UNUSED
-bool is_any_accept_type(RoseInVertexType t) {
-    return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD;
-}
-
-static
-flat_set<NFAEdge> poisonEdges(const NGHolder &h,
-                         const vector<NFAVertexDepth> *depths,
-                         const RoseInGraph &vg, const vector<RoseInEdge> &ee,
-                         bool for_prefix, const Grey &grey) {
-    DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size());
-
-    /* poison edges covered by successor literal */
-
-    set<pair<ue2_literal, bool> > succs;
-    for (const RoseInEdge &ve : ee) {
-        if (vg[target(ve, vg)].type != RIV_LITERAL) {
-            /* nothing to poison in suffixes/outfixes */
-            assert(generates_callbacks(h));
-            assert(is_any_accept_type(vg[target(ve, vg)].type));
-            continue;
-        }
-        succs.insert({vg[target(ve, vg)].s,
-                    vg[source(ve, vg)].type == RIV_LITERAL});
-
-    }
-
-    DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size());
-
-    flat_set<NFAEdge> bad;
-    for (const auto &p : succs) {
-        poisonFromSuccessor(h, p.first, p.second, bad);
-    }
-
-    /* poison edges which don't significantly improve a prefix */
-
-    if (for_prefix) {
-        poisonForGoodPrefix(h, *depths, bad, grey);
-    }
-
-    return bad;
-}
-
-static
-set<NFAVertex> poisonVertices(const NGHolder &h, const RoseInGraph &vg,
-                              const vector<RoseInEdge> &ee, const Grey &grey) {
-    flat_set<NFAEdge> bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey);
-    set<NFAVertex> bad_vertices;
-    for (const NFAEdge &e : bad_edges) {
-        bad_vertices.insert(target(e, h));
-        DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index,
-                     h[target(e, h)].index);
-    }
-
-    return bad_vertices;
-}
-
-static
-unique_ptr<VertLitInfo> findBestNormalSplit(const NGHolder &g,
-                                            const RoseInGraph &vg,
-                                            const vector<RoseInEdge> &ee,
-                                            const CompileContext &cc) {
-    assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX);
-    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
-
-    return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
-                         nullptr, &bad_vertices, false, cc);
-}
-
-static
-unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g,
-                                                const RoseInGraph &vg,
-                                                const vector<RoseInEdge> &ee,
-                                                const CompileContext &cc) {
-    assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX);
-    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
-
-    return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
-                         nullptr, &bad_vertices, true, cc);
-}
-
-static
-unique_ptr<VertLitInfo> findSimplePrefixSplit(const NGHolder &g,
-                                              const CompileContext &cc) {
-    DEBUG_PRINTF("looking for simple prefix split\n");
-    bool anchored = !proper_out_degree(g.startDs, g);
-    NFAVertex u = anchored ? g.start : g.startDs;
-
-    if (out_degree(u, g) != 2) { /* startDs + succ */
-        return nullptr;
-    }
-
-    NFAVertex v = NGHolder::null_vertex();
-    for (NFAVertex t : adjacent_vertices_range(u, g)) {
-        if (t != g.startDs) {
-            assert(!v);
-            v = t;
-        }
-    }
-    assert(v);
-
-    if (!anchored) {
-        if (out_degree(g.start, g) > 2) {
-            return nullptr;
-        }
-        if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) {
-            return nullptr;
-        }
-    }
-
-    NFAVertex best_v = NGHolder::null_vertex();
-    ue2_literal best_lit;
-
-    u32 limit = cc.grey.maxHistoryAvailable;
-    if (anchored) {
-        LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion);
-    }
-
-    ue2_literal curr_lit;
-    for (u32 i = 0; i < limit; i++) {
-        const auto &v_cr = g[v].char_reach;
-        if (v_cr.count() == 1 || v_cr.isCaselessChar()) {
-            curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar());
-        } else {
-            curr_lit.clear();
-        }
-
-        if (curr_lit.length() > best_lit.length()) {
-            best_lit = curr_lit;
-            best_v = v;
-        }
-
-        if (out_degree(v, g) != 1) {
-            break;
-        }
-        v = *adjacent_vertices(v, g).first;
-    }
-
-    if (best_lit.length() < cc.grey.minRoseLiteralLength) {
-        return nullptr;
-    }
-
-    set<ue2_literal> best_lit_set({best_lit});
-    if (bad_mixed_sensitivity(best_lit)) {
-        sanitizeAndCompressAndScore(best_lit_set);
-    }
-
-    return ue2::make_unique<VertLitInfo>(best_v, best_lit_set, anchored, true);
-}
-
-static
-unique_ptr<VertLitInfo> findBestPrefixSplit(const NGHolder &g,
-                                        const vector<NFAVertexDepth> &depths,
-                                        const RoseInGraph &vg,
-                                        const vector<RoseInEdge> &ee,
-                                        bool last_chance,
-                                        const CompileContext &cc) {
-    assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX);
-    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
-    auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength,
-                            nullptr, &bad_vertices, last_chance, cc);
-
-    /* large back edges may prevent us identifying anchored or transient cases
-     * properly - use a simple walk instead */
-    if (!rv || !(rv->creates_transient || rv->creates_anchored)) {
-        auto rv2 = findSimplePrefixSplit(g, cc);
-        if (rv2) {
-            return rv2;
-        }
-    }
-
-    return rv;
-}
-
-static
-unique_ptr<VertLitInfo> findBestCleanSplit(const NGHolder &g,
-                                           const CompileContext &cc) {
-    assert(g.kind != NFA_PREFIX);
-    set<NFAVertex> cleanSplits;
-    for (NFAVertex v : vertices_range(g)) {
-        if (!g[v].char_reach.all() || !edge(v, v, g).second) {
-            continue;
-        }
-        insert(&cleanSplits, inv_adjacent_vertices(v, g));
-        cleanSplits.erase(v);
-    }
-    cleanSplits.erase(g.start);
-    if (cleanSplits.empty()) {
-        return nullptr;
-    }
-    return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen,
-                         &cleanSplits, nullptr, false, cc);
-}
-
-static
-bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) {
-    set<NFAVertex> curr, next;
-    curr.insert(g.accept);
-
-    for (auto it = lit.rbegin(); it != lit.rend(); ++it) {
-        next.clear();
-
-        for (auto v : curr) {
-            for (auto u : inv_adjacent_vertices_range(v, g)) {
-                if (u == g.start) {
-                    if (overhang_ok) {
-                        DEBUG_PRINTF("bail\n");
-                        return true;
-                    } else {
-                        continue; /* it is not possible for a lhs literal to
-                                   * overhang the start */
-                    }
-                }
-
-                const CharReach &cr = g[u].char_reach;
-                if (!overlaps(*it, cr)) {
-                     continue;
-                }
-
-                next.insert(u);
-            }
-        }
-
-        curr.swap(next);
-    }
-
-    return !curr.empty();
-}
-
-static
-bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
-                   const vector<RoseInEdge> &ee, const VertLitInfo &split) {
-    const vector<NFAVertex> &splitters = split.vv;
-    assert(!splitters.empty());
-
-    shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
-    shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
-
-    unordered_map<NFAVertex, NFAVertex> lhs_map;
-    unordered_map<NFAVertex, NFAVertex> rhs_map;
-
-    splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map);
-    DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n",
-                 to_string(base_graph.kind).c_str(), num_vertices(base_graph),
-                 to_string(lhs->kind).c_str(), num_vertices(*lhs),
-                 to_string(rhs->kind).c_str(), num_vertices(*rhs));
-
-    bool suffix = generates_callbacks(base_graph);
-
-    if (is_triggered(base_graph)) {
-        /* if we are already guarded, check if the split reduces the size of
-         * the problem before continuing with the split */
-        if (num_vertices(*lhs) >= num_vertices(base_graph)
-            && !(suffix && isVacuous(*rhs))) {
-            DEBUG_PRINTF("split's lhs is no smaller\n");
-            return false;
-        }
-
-        if (num_vertices(*rhs) >= num_vertices(base_graph)) {
-            DEBUG_PRINTF("split's rhs is no smaller\n");
-            return false;
-        }
-    }
-
-    bool do_accept = false;
-    bool do_accept_eod = false;
-    assert(rhs);
-    if (isVacuous(*rhs) && suffix) {
-        if (edge(rhs->start, rhs->accept, *rhs).second) {
-            DEBUG_PRINTF("rhs has a cliche\n");
-            do_accept = true;
-            remove_edge(rhs->start, rhs->accept, *rhs);
-        }
-
-        if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
-            DEBUG_PRINTF("rhs has an eod cliche\n");
-            do_accept_eod = true;
-            remove_edge(rhs->start, rhs->acceptEod, *rhs);
-        }
-
-        renumber_edges(*rhs);
-    }
-
-    /* check if we still have a useful graph left over */
-    bool do_norm = out_degree(rhs->start, *rhs) != 1;
-
-    set<ReportID> splitter_reports;
-    for (auto v : splitters) {
-        insert(&splitter_reports, base_graph[v].reports);
-    }
-
-    /* find the targets of each source vertex; insertion_ordered_map used to
-     * preserve deterministic ordering */
-    insertion_ordered_map<RoseInVertex, vector<RoseInVertex>> images;
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex src = source(e, vg);
-        RoseInVertex dest = target(e, vg);
-        images[src].push_back(dest);
-        remove_edge(e, vg);
-    }
-
-    map<vector<RoseInVertex>, vector<RoseInVertex>> verts_by_image;
-
-    for (const auto &m : images) {
-        const auto &u = m.first;
-        const auto &image = m.second;
-
-        if (contains(verts_by_image, image)) {
-            for (RoseInVertex v : verts_by_image[image]) {
-                add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
-            }
-            continue;
-        }
-
-        for (const auto &lit : split.lit) {
-            assert(!bad_mixed_sensitivity(lit));
-
-            /* don't allow overhang in can_match() as literals should
-             * correspond to the edge graph being split; overhanging the graph
-             * would indicate a false path.*/
-            if (!can_match(*lhs, lit, false)) {
-                DEBUG_PRINTF("'%s' did not match lhs\n",
-                             escapeString(lit).c_str());
-                continue;
-            }
-
-            DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str());
-            auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
-            add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
-
-            /* work out delay later */
-            if (do_accept) {
-                DEBUG_PRINTF("rhs has a cliche\n");
-                auto tt = add_vertex(RoseInVertexProps::makeAccept(
-                                                         splitter_reports), vg);
-                add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
-            }
-
-            if (do_accept_eod) {
-                DEBUG_PRINTF("rhs has an eod cliche\n");
-                auto tt = add_vertex(RoseInVertexProps::makeAcceptEod(
-                                                         splitter_reports), vg);
-                add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
-            }
-
-            if (do_norm) {
-                assert(out_degree(rhs->start, *rhs) > 1);
-                for (RoseInVertex dest : image) {
-                    add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg);
-                }
-            }
-            verts_by_image[image].push_back(v);
-        }
-    }
-
-    assert(hasCorrectlyNumberedVertices(*rhs));
-    assert(hasCorrectlyNumberedEdges(*rhs));
-    assert(isCorrectlyTopped(*rhs));
-    assert(hasCorrectlyNumberedVertices(*lhs));
-    assert(hasCorrectlyNumberedEdges(*lhs));
-    assert(isCorrectlyTopped(*lhs));
-
-    return true;
-}
-
-#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */
-#define MAX_LEN_2_LITERALS_PER_CUT 3
-
-static
-bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores,
-                           const map<NFAEdge, set<ue2_literal>> &cut_lits,
-                           u32 min_allowed_length) {
-    DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(),
-                 min_allowed_length);
-    if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) {
-        return false;
-    }
-
-    u32 len_2_count = 0;
-
-    for (const auto &cut : cut_lits) {
-        if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) {
-            DEBUG_PRINTF("cut uses a forbidden edge\n");
-            return false;
-        }
-
-        if (min_len(cut.second) < min_allowed_length) {
-            DEBUG_PRINTF("cut uses a bad literal\n");
-            return false;
-        }
-
-        for (const auto &lit : cut.second) {
-            if (lit.length() == 2) {
-                len_2_count++;
-            }
-        }
-    }
-
-    if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) {
-        return false;
-    }
-
-    return true;
-}
-
-static
-void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
-                     const vector<RoseInEdge> &to_cut,
-                     const vector<NFAEdge> &cut,
-                     const map<NFAEdge, set<ue2_literal>> &cut_lits) {
-    DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(),
-                 num_vertices(h));
-
-    /* create literal vertices and connect preds */
-    unordered_set<RoseInVertex> done_sources;
-    map<RoseInVertex, vector<pair<RoseInVertex, NFAVertex>>> verts_by_source;
-    for (const RoseInEdge &ve : to_cut) {
-        assert(&h == &*vg[ve].graph);
-        RoseInVertex src = source(ve, vg);
-        if (!done_sources.insert(src).second) {
-            continue; /* already processed */
-        }
-
-        /* iterate over cut for determinism */
-        for (const auto &e : cut) {
-            NFAVertex prev_v = source(e, h);
-            NFAVertex pivot = target(e, h);
-
-            DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
-            unordered_map<NFAVertex, NFAVertex> temp_map;
-            shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
-            splitLHS(h, pivot, new_lhs.get(), &temp_map);
-
-            /* want to cut off paths to pivot from things other than the pivot -
-             * makes a more svelte graphy */
-            clear_in_edges(temp_map[pivot], *new_lhs);
-            NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot],
-                                          *new_lhs);
-            if (is_triggered(h) && prev_v == h.start) {
-                (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP);
-            }
-
-            pruneUseless(*new_lhs, false);
-            renumber_vertices(*new_lhs);
-            renumber_edges(*new_lhs);
-
-            DEBUG_PRINTF("    into lhs %s (%zu vertices)\n",
-                         to_string(new_lhs->kind).c_str(),
-                         num_vertices(*new_lhs));
-
-            assert(hasCorrectlyNumberedVertices(*new_lhs));
-            assert(hasCorrectlyNumberedEdges(*new_lhs));
-            assert(isCorrectlyTopped(*new_lhs));
-
-            const set<ue2_literal> &lits = cut_lits.at(e);
-            for (const auto &lit : lits) {
-                if (!can_match(*new_lhs, lit, is_triggered(h))) {
-                    continue;
-                }
-
-                RoseInVertex v
-                    = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
-
-                /* if this is a prefix/infix an edge directly to accept should
-                 * represent a false path as we have poisoned vertices covered
-                 * by the literals. */
-                if (generates_callbacks(h)) {
-                    if (edge(pivot, h.accept, h).second) {
-                        DEBUG_PRINTF("adding acceptEod\n");
-                        /* literal has a direct connection to accept */
-                        const flat_set<ReportID> &reports = h[pivot].reports;
-                        auto tt = add_vertex(
-                                    RoseInVertexProps::makeAccept(reports), vg);
-                        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
-                    }
-
-                    if (edge(pivot, h.acceptEod, h).second) {
-                        assert(generates_callbacks(h));
-                        DEBUG_PRINTF("adding acceptEod\n");
-                        /* literal has a direct connection to accept */
-                        const flat_set<ReportID> &reports = h[pivot].reports;
-                        auto tt = add_vertex(
-                                 RoseInVertexProps::makeAcceptEod(reports), vg);
-                        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
-                    }
-                }
-
-                add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg);
-                verts_by_source[src].push_back({v, pivot});
-            }
-        }
-    }
-
-    /* wire the literal vertices up to successors */
-    map<vector<NFAVertex>, shared_ptr<NGHolder> > done_rhs;
-    for (const RoseInEdge &ve : to_cut) {
-        RoseInVertex src = source(ve, vg);
-        RoseInVertex dest = target(ve, vg);
-
-        /* iterate over cut for determinism */
-        for (const auto &elem : verts_by_source[src]) {
-            NFAVertex pivot = elem.second;
-            RoseInVertex v = elem.first;
-
-            vector<NFAVertex> adj;
-            insert(&adj, adj.end(), adjacent_vertices(pivot, h));
-            /* we can ignore presence of accept, accepteod in adj as it is best
-               effort */
-
-            if (!contains(done_rhs, adj)) {
-                unordered_map<NFAVertex, NFAVertex> temp_map;
-                shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
-                splitRHS(h, adj, new_rhs.get(), &temp_map);
-                remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
-                remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
-                renumber_edges(*new_rhs);
-                DEBUG_PRINTF("    into rhs %s (%zu vertices)\n",
-                             to_string(new_rhs->kind).c_str(),
-                             num_vertices(*new_rhs));
-                done_rhs.emplace(adj, new_rhs);
-                assert(isCorrectlyTopped(*new_rhs));
-            }
-
-            assert(done_rhs[adj].get());
-            shared_ptr<NGHolder> new_rhs = done_rhs[adj];
-
-            assert(hasCorrectlyNumberedVertices(*new_rhs));
-            assert(hasCorrectlyNumberedEdges(*new_rhs));
-            assert(isCorrectlyTopped(*new_rhs));
-
-            if (vg[dest].type == RIV_LITERAL
-                && !can_match(*new_rhs, vg[dest].s, true)) {
-                continue;
-            }
-
-            if (out_degree(new_rhs->start, *new_rhs) != 1) {
-                add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg);
-            }
-        }
-
-        remove_edge(ve, vg);
-    }
-}
-
-static
-bool doNetflowCut(NGHolder &h,
-                  const vector<NFAVertexDepth> *depths,
-                  RoseInGraph &vg,
-                  const vector<RoseInEdge> &ee, bool for_prefix,
-                  const Grey &grey, u32 min_allowed_length = 0U) {
-    ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength);
-
-    DEBUG_PRINTF("doing netflow cut\n");
-    /* TODO: we should really get literals/scores from the full graph as this
-     * allows us to overlap with previous cuts. */
-    assert(!ee.empty());
-    assert(&h == &*vg[ee.front()].graph);
-    assert(!for_prefix || depths);
-
-    if (num_edges(h) > grey.maxRoseNetflowEdges) {
-        /* We have a limit on this because scoring edges and running netflow
-         * gets very slow for big graphs. */
-        DEBUG_PRINTF("too many edges, skipping netflow cut\n");
-        return false;
-    }
-
-    assert(hasCorrectlyNumberedVertices(h));
-    assert(hasCorrectlyNumberedEdges(h));
-
-    auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey);
-
-    /* Step 1: Get scores for all edges */
-    vector<u64a> scores = scoreEdges(h, known_bad); /* scores by edge_index */
-
-    /* Step 2: Find cutset based on scores */
-    vector<NFAEdge> cut = findMinCut(h, scores);
-
-    /* Step 3: Get literals corresponding to cut edges */
-    map<NFAEdge, set<ue2_literal>> cut_lits;
-    for (const auto &e : cut) {
-        set<ue2_literal> lits = getLiteralSet(h, e);
-        sanitizeAndCompressAndScore(lits);
-
-        cut_lits[e] = lits;
-    }
-
-    /* if literals are underlength bail or if it involves a forbidden edge*/
-    if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) {
-        return false;
-    }
-    DEBUG_PRINTF("splitting\n");
-
-    /* Step 4: Split graph based on cuts */
-    splitEdgesByCut(h, vg, ee, cut, cut_lits);
-
-    return true;
-}
-
-static
-bool deanchorIfNeeded(NGHolder &g) {
-    DEBUG_PRINTF("hi\n");
-    if (proper_out_degree(g.startDs, g)) {
-        return false;
-    }
-
-    /* look for a non-special dot with a loop following start */
-    set<NFAVertex> succ_g;
-    insert(&succ_g, adjacent_vertices(g.start, g));
-    succ_g.erase(g.startDs);
-
-    for (auto v : adjacent_vertices_range(g.start, g)) {
-        DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index,
-                     g[v].char_reach.count());
-
-        if (v == g.startDs || !g[v].char_reach.all()) {
-            continue;
-        }
-
-        set<NFAVertex> succ_v;
-        insert(&succ_v, adjacent_vertices(v, g));
-
-        if (succ_v == succ_g) {
-            DEBUG_PRINTF("found ^.*\n");
-            for (auto succ : adjacent_vertices_range(g.start, g)) {
-                if (succ == g.startDs) {
-                    continue;
-                }
-                add_edge(g.startDs, succ, g);
-            }
-            clear_vertex(v, g);
-            remove_vertex(v, g);
-            renumber_vertices(g);
-            return true;
-        }
-
-        if (succ_g.size() == 1 && hasSelfLoop(v, g)) {
-            DEBUG_PRINTF("found ^.+\n");
-            add_edge(g.startDs, v, g);
-            remove_edge(v, v, g);
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static
-RoseInGraph populateTrivialGraph(const NGHolder &h) {
-    RoseInGraph g;
-    shared_ptr<NGHolder> root_g = cloneHolder(h);
-    bool orig_anch = isAnchored(*root_g);
-    orig_anch |= deanchorIfNeeded(*root_g);
-
-    DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch);
-
-    auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g);
-    auto accept = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
-
-    add_edge(start, accept, RoseInEdgeProps(root_g, 0), g);
-
-    return g;
-}
-
-static
-void avoidOutfixes(RoseInGraph &vg, bool last_chance,
-                   const CompileContext &cc) {
-    STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n");
-    assert(num_vertices(vg) == 2);
-    assert(num_edges(vg) == 1);
-
-    RoseInEdge e = *edges(vg).first;
-
-    NGHolder &h = *vg[e].graph;
-    assert(isCorrectlyTopped(h));
-
-    renumber_vertices(h);
-    renumber_edges(h);
-
-    unique_ptr<VertLitInfo>  split = findBestNormalSplit(h, vg, {e}, cc);
-
-    if (split && splitRoseEdge(h, vg, {e}, *split)) {
-        DEBUG_PRINTF("split on simple literal\n");
-        return;
-    }
-
-    if (last_chance) {
-        /* look for a prefix split as it allows us to accept very weak anchored
-         * literals. */
-        auto depths = calcDepths(h);
-
-        split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc);
-
-        if (split && splitRoseEdge(h, vg, {e}, *split)) {
-            DEBUG_PRINTF("split on simple literal\n");
-            return;
-        }
-    }
-
-    doNetflowCut(h, nullptr, vg, {e}, false, cc.grey);
-}
-
-static
-void removeRedundantPrefixes(RoseInGraph &g) {
-    STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n");
-
-    for (const RoseInEdge &e : edges_range(g)) {
-        RoseInVertex s = source(e, g);
-        RoseInVertex t = target(e, g);
-
-        if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) {
-            continue;
-        }
-
-        if (!g[e].graph) {
-            continue;
-        }
-
-        assert(!g[t].delay);
-        const ue2_literal &lit = g[t].s;
-
-        if (!literalIsWholeGraph(*g[e].graph, lit)) {
-            DEBUG_PRINTF("not whole graph\n");
-            continue;
-        }
-
-        if (!isFloating(*g[e].graph)) {
-            DEBUG_PRINTF("not floating\n");
-            continue;
-        }
-        g[e].graph.reset();
-    }
-}
-
-static
-u32 maxDelay(const CompileContext &cc) {
-    if (!cc.streaming) {
-        return MO_INVALID_IDX;
-    }
-    return cc.grey.maxHistoryAvailable;
-}
-
-static
-void removeRedundantLiteralsFromPrefixes(RoseInGraph &g,
-                                         const CompileContext &cc) {
-    STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n");
-
-    vector<RoseInEdge> to_anchor;
-    for (const RoseInEdge &e : edges_range(g)) {
-        RoseInVertex s = source(e, g);
-        RoseInVertex t = target(e, g);
-
-        if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) {
-            continue;
-        }
-
-        if (g[t].type != RIV_LITERAL) {
-            continue;
-        }
-
-        if (!g[e].graph) {
-            continue;
-        }
-
-        if (g[e].graph_lag) {
-            /* already removed redundant parts of literals */
-            continue;
-        }
-
-        if (g[e].dfa) {
-            /* if we removed any more states, we would need to rebuild the
-             * the dfa which can be time consuming. */
-            continue;
-        }
-
-        assert(!g[t].delay);
-        const ue2_literal &lit = g[t].s;
-
-        DEBUG_PRINTF("removing states for literal: %s\n",
-                     dumpString(lit).c_str());
-
-        unique_ptr<NGHolder> h = cloneHolder(*g[e].graph);
-        const u32 max_delay = maxDelay(cc);
-
-        u32 delay = removeTrailingLiteralStates(*h, lit, max_delay,
-                                              false /* can't overhang start */);
-
-        DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay);
-
-        if (edge(h->startDs, h->accept, *h).second) {
-            /* we should have delay == lit.length(), but in really complex
-             * cases we may fail to identify that we can remove the whole
-             * graph. Regardless, the fact that sds is wired to accept means the
-             * graph serves no purpose. */
-            DEBUG_PRINTF("whole graph\n");
-            g[e].graph.reset();
-            continue;
-        }
-
-        if (delay == lit.length() && edge(h->start, h->accept, *h).second
-            && num_vertices(*h) == N_SPECIALS) {
-            to_anchor.push_back(e);
-            continue;
-        }
-
-        /* if we got here we should still have an interesting graph */
-        assert(delay == max_delay || num_vertices(*h) > N_SPECIALS);
-
-        if (delay && delay != MO_INVALID_IDX) {
-            DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get());
-
-            g[e].graph = move(h);
-            g[e].graph_lag = delay;
-        }
-    }
-
-    if (!to_anchor.empty()) {
-        RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g);
-
-        for (RoseInEdge e : to_anchor) {
-            DEBUG_PRINTF("rehoming to anchor\n");
-            RoseInVertex v = target(e, g);
-            add_edge(anch, v, g);
-            remove_edge(e, g);
-        }
-    }
-}
-
-static
-bool isStarCliche(const NGHolder &g) {
-    DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g));
-
-    bool nonspecials_seen = false;
-
-    for (auto v : vertices_range(g)) {
-        if (is_special(v, g)) {
-            continue;
-        }
-
-        if (nonspecials_seen) {
-            return false;
-        }
-        nonspecials_seen = true;
-
-        if (!g[v].char_reach.all()) {
-            return false;
-        }
-
-        if (!hasSelfLoop(v, g)) {
-            return false;
-        }
-        if (!edge(v, g.accept, g).second) {
-            return false;
-        }
-    }
-
-    if (!nonspecials_seen) {
-        return false;
-    }
-
-    if (!edge(g.start, g.accept, g).second) {
-        return false;
-    }
-
-    return true;
-}
-
-static
-void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig,
-                                      const vector<RoseInEdge> &ee,
-                                      const CompileContext &cc) {
-    /* TODO: This could be better by not creating a separate graph for each
-     * successor literal. This would require using distinct report ids and also
-     * taking into account overlap of successor literals. */
-
-    set<ue2_literal> preds;
-    set<ue2_literal> succs;
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex u = source(e, ig);
-        assert(ig[u].type == RIV_LITERAL);
-        assert(!ig[u].delay);
-        preds.insert(ig[u].s);
-
-        RoseInVertex v = target(e, ig);
-        assert(ig[v].type == RIV_LITERAL);
-        assert(!ig[v].delay);
-        succs.insert(ig[v].s);
-
-        if (ig[e].graph_lag) {
-            /* already removed redundant parts of literals */
-            return;
-        }
-
-        assert(!ig[e].dfa);
-    }
-
-    map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */
-
-    for (const ue2_literal &right : succs) {
-        size_t max_overlap = 0;
-        for (const ue2_literal &left : preds) {
-            size_t overlap = maxOverlap(left, right, 0);
-            ENSURE_AT_LEAST(&max_overlap, overlap);
-        }
-
-        u32 max_allowed_delay = right.length() - max_overlap;
-
-        if (cc.streaming) {
-            LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable);
-        }
-
-        if (!max_allowed_delay) {
-            continue;
-        }
-
-        shared_ptr<NGHolder> h_new = cloneHolder(h);
-
-        u32 delay = removeTrailingLiteralStates(*h_new, right,
-                                                max_allowed_delay);
-
-        if (delay == MO_INVALID_IDX) {
-            /* successor literal could not match infix -> ignore false path */
-            assert(0);
-            continue;
-        }
-
-        if (!delay) {
-            /* unable to trim graph --> no point swapping to new holder */
-            continue;
-        }
-
-        assert(isCorrectlyTopped(*h_new));
-        graphs[right] = make_pair(h_new, delay);
-    }
-
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex v = target(e, ig);
-        const ue2_literal &succ = ig[v].s;
-        if (!contains(graphs, succ)) {
-            continue;
-        }
-
-        ig[e].graph = graphs[succ].first;
-        ig[e].graph_lag = graphs[succ].second;
-
-        if (isStarCliche(*ig[e].graph)) {
-            DEBUG_PRINTF("is a X star!\n");
-            ig[e].graph.reset();
-            ig[e].graph_lag = 0;
-        }
-    }
-}
-
-static
-void removeRedundantLiteralsFromInfixes(RoseInGraph &g,
-                                        const CompileContext &cc) {
-    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> infixes;
-
-    for (const RoseInEdge &e : edges_range(g)) {
-        RoseInVertex s = source(e, g);
-        RoseInVertex t = target(e, g);
-
-        if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) {
-            continue;
-        }
-
-        if (!g[e].graph) {
-            continue;
-        }
-
-        assert(!g[t].delay);
-        if (g[e].dfa) {
-            /* if we removed any more states, we would need to rebuild the
-             * the dfa which can be time consuming. */
-            continue;
-        }
-
-        NGHolder *h = g[e].graph.get();
-        infixes[h].push_back(e);
-    }
-
-    for (const auto &m : infixes) {
-        NGHolder *h = m.first;
-        const auto &edges = m.second;
-        removeRedundantLiteralsFromInfix(*h, g, edges, cc);
-    }
-}
-
-static
-void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) {
-    removeRedundantLiteralsFromPrefixes(g, cc);
-    removeRedundantLiteralsFromInfixes(g, cc);
-}
-
-static
-RoseInVertex getStart(RoseInGraph &vg) {
-    for (RoseInVertex v : vertices_range(vg)) {
-        if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) {
-            return v;
-        }
-    }
-    assert(0);
-    return RoseInGraph::null_vertex();
-}
-
-/**
- * Finds the initial accept vertex created to which suffix/outfixes are
- * attached.
- */
-static
-RoseInVertex getPrimaryAccept(RoseInGraph &vg) {
-    for (RoseInVertex v : vertices_range(vg)) {
-        if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) {
-            return v;
-        }
-    }
-    assert(0);
-    return RoseInGraph::null_vertex();
-}
-
-static
-bool willBeTransient(const depth &max_depth, const CompileContext &cc) {
-    if (!cc.streaming) {
-        return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH);
-    } else {
-        return max_depth <= depth(cc.grey.maxHistoryAvailable + 1);
-    }
-}
-
-static
-bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) {
-    return max_depth <= depth(grey.maxAnchoredRegion);
-}
-
-static
-unique_ptr<NGHolder> make_chain(u32 count) {
-    assert(count);
-
-    auto rv = std::make_unique<NGHolder>(NFA_INFIX);
-
-    NGHolder &h = *rv;
-
-    NFAVertex u = h.start;
-    for (u32 i = 0; i < count; i++) {
-        NFAVertex v = add_vertex(h);
-        h[v].char_reach = CharReach::dot();
-        add_edge(u, v, h);
-        u = v;
-    }
-    h[u].reports.insert(0);
-    add_edge(u, h.accept, h);
-
-    setTops(h);
-
-    return rv;
-}
-
-#define SHORT_TRIGGER_LEN 16
-
-static
-bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg,
-                                  const vector<RoseInEdge> &ee,
-                                  const CompileContext &cc) {
-    /* check max width and literal lengths to see if possible */
-    size_t min_lit = (size_t)~0ULL;
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex v = target(e, vg);
-        LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length());
-    }
-
-    if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) {
-        return false;
-    }
-
-    depth max_width = findMaxWidth(h);
-
-    u32 delta = min_lit - SHORT_TRIGGER_LEN;
-
-    if (!willBeTransient(max_width - depth(delta), cc)
-        && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) {
-        return false;
-    }
-
-    DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit);
-    DEBUG_PRINTF("delta = %u\n", delta);
-
-    /* try split */
-    map<RoseInVertex, shared_ptr<NGHolder> > graphs;
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex v = target(e, vg);
-
-        shared_ptr<NGHolder> h_new = cloneHolder(h);
-
-        u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta);
-
-        DEBUG_PRINTF("delay %u\n", delay);
-
-        if (delay != delta) {
-            DEBUG_PRINTF("unable to trim literal\n");
-            return false;
-        }
-
-        if (in_degree(v, vg) != 1) {
-            DEBUG_PRINTF("complicated\n");
-            return false;
-        }
-
-        DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new));
-        assert(willBeTransient(findMaxWidth(*h_new), cc)
-               || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey));
-
-        assert(isCorrectlyTopped(*h_new));
-        graphs[v] = h_new;
-    }
-
-    /* add .{repeats} from prefixes to long literals */
-    for (const RoseInEdge &e : ee) {
-        RoseInVertex s = source(e, vg);
-        RoseInVertex t = target(e, vg);
-
-        remove_edge(e, vg);
-        const ue2_literal &orig_lit = vg[t].s;
-
-        ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta);
-
-        ue2_literal lit2(orig_lit.end() - delta, orig_lit.end());
-
-        assert(lit.length() + delta == orig_lit.length());
-
-        vg[t].s = lit2;
-
-        RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
-        add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg);
-        add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg);
-    }
-
-    DEBUG_PRINTF("success\n");
-    /* TODO: alter split point to avoid pathological splits */
-    return true;
-}
-
-static
-void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
-                                  u32 delay, const vector<NFAVertex> &preds) {
-    assert(delay <= lit.length());
-    assert(isCorrectlyTopped(g));
-    DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay);
-
-    NFAVertex prev = g.accept;
-    auto it = lit.rbegin();
-    while (delay--) {
-        NFAVertex curr = add_vertex(g);
-        assert(it != lit.rend());
-        g[curr].char_reach = *it;
-        add_edge(curr, prev, g);
-        ++it;
-        prev = curr;
-    }
-
-    for (auto v : preds) {
-        NFAEdge e = add_edge_if_not_present(v, prev, g);
-        if (v == g.start && is_triggered(g)) {
-            g[e].tops.insert(DEFAULT_TOP);
-        }
-    }
-
-    // Every predecessor of accept must have a report.
-    set_report(g, 0);
-
-    renumber_vertices(g);
-    renumber_edges(g);
-    assert(allMatchStatesHaveReports(g));
-    assert(isCorrectlyTopped(g));
-}
-
-static
-void restoreTrailingLiteralStates(NGHolder &g,
-                                  const vector<pair<ue2_literal, u32>> &lits) {
-    vector<NFAVertex> preds;
-    insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g));
-    clear_in_edges(g.accept, g);
-
-    for (auto v : preds) {
-        g[v].reports.clear(); /* clear report from old accepts */
-    }
-
-    for (const auto &p : lits) {
-        const ue2_literal &lit = p.first;
-        u32 delay = p.second;
-
-        restoreTrailingLiteralStates(g, lit, delay, preds);
-    }
-}
-
-static
-bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee,
-                   const CompileContext &cc) {
-    DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h,
-                  num_vertices(h));
-    assert(isCorrectlyTopped(h));
-
-    renumber_vertices(h);
-    renumber_edges(h);
-
-    auto depths = calcDepths(h);
-
-    /* If the reason the prefix is not transient is due to a very long literal
-     * following, we can make it transient by restricting ourselves to using
-     * just the head of the literal. */
-    if (makeTransientFromLongLiteral(h, vg, ee, cc)) {
-        return true;
-    }
-
-    auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc);
-
-    if (split && (split->creates_transient || split->creates_anchored)
-        && splitRoseEdge(h, vg, ee, *split)) {
-        DEBUG_PRINTF("split on simple literal\n");
-        return true;
-    }
-
-    /* large back edges may prevent us identifing anchored or transient cases
-     * properly - use a simple walk instead */
-
-    if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) {
-        return true;
-    }
-
-    if (split && splitRoseEdge(h, vg, ee, *split)) {
-        /* use the simple split even though it doesn't create a transient
-         * prefix */
-        DEBUG_PRINTF("split on simple literal\n");
-        return true;
-    }
-
-    /* look for netflow cuts which don't produce good prefixes */
-    if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) {
-        return true;
-    }
-
-    if (ee.size() > 1) {
-        DEBUG_PRINTF("split the prefix apart based on succ literals\n");
-        unordered_map<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> >,
+        } 
+    } 
+ 
+    auto cmp = LitComparator(g, seeking_anchored, seeking_transient, 
+                             last_chance); 
+ 
+    unique_ptr<VertLitInfo> best = move(lits.back()); 
+    lits.pop_back(); 
+    while (!lits.empty()) { 
+        if (cmp(best, lits.back())) { 
+            best = move(lits.back()); 
+        } 
+        lits.pop_back(); 
+    } 
+ 
+    DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", 
+        dumpString(*best->lit.begin()).c_str(), 
+        g[best->vv.front()].index, 
+        depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, 
+        depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0); 
+ 
+    return best; 
+} 
+ 
+static 
+void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, 
+                         bool overhang_ok, flat_set<NFAEdge> &bad) { 
+    DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", 
+                 num_vertices(h), succ.length()); 
+ 
+    using EdgeSet = boost::dynamic_bitset<>; 
+ 
+    const size_t edge_count = num_edges(h); 
+    EdgeSet bad_edges(edge_count); 
+ 
+    unordered_map<NFAVertex, EdgeSet> curr; 
+    for (const auto &e : in_edges_range(h.accept, h)) { 
+        auto &path_set = curr[source(e, h)]; 
+        if (path_set.empty()) { 
+            path_set.resize(edge_count); 
+        } 
+        path_set.set(h[e].index); 
+    } 
+ 
+    unordered_map<NFAVertex, EdgeSet> next; 
+    for (auto it = succ.rbegin(); it != succ.rend(); ++it) { 
+        for (const auto &path : curr) { 
+            NFAVertex u = path.first; 
+            const auto &path_set = path.second; 
+            if (u == h.start && overhang_ok) { 
+                DEBUG_PRINTF("poisoning early %zu [overhang]\n", 
+                             path_set.count()); 
+                bad_edges |= path_set; 
+                continue; 
+            } 
+            if (overlaps(h[u].char_reach, *it)) { 
+                for (const auto &e : in_edges_range(u, h)) { 
+                    auto &new_path_set = next[source(e, h)]; 
+                    if (new_path_set.empty()) { 
+                        new_path_set.resize(edge_count); 
+                    } 
+                    new_path_set |= path_set; 
+                    new_path_set.set(h[e].index); 
+                } 
+            } 
+        } 
+        DEBUG_PRINTF("succ char matches at %zu paths\n", next.size()); 
+        assert(overhang_ok || !curr.empty()); 
+        swap(curr, next); 
+        next.clear(); 
+    } 
+ 
+    assert(overhang_ok || !curr.empty()); 
+    for (const auto &path : curr) { 
+        bad_edges |= path.second; 
+        DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count()); 
+    } 
+ 
+    for (const auto &e : edges_range(h)) { 
+        if (bad_edges.test(h[e].index)) { 
+            bad.insert(e); 
+        } 
+    } 
+} 
+ 
+static 
+void poisonForGoodPrefix(const NGHolder &h, 
+                         const vector<NFAVertexDepth> &depths, 
+                         flat_set<NFAEdge> &bad, const Grey &grey) { 
+    for (const auto &v : vertices_range(h)) { 
+        if (!createsAnchoredLHS(h, {v}, depths, grey) 
+            && !createsTransientLHS(h, {v}, depths, grey)) { 
+            insert(&bad, in_edges_range(v, h)); 
+        } 
+    } 
+} 
+ 
+static UNUSED 
+bool is_any_accept_type(RoseInVertexType t) { 
+    return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD; 
+} 
+ 
+static 
+flat_set<NFAEdge> poisonEdges(const NGHolder &h, 
+                         const vector<NFAVertexDepth> *depths, 
+                         const RoseInGraph &vg, const vector<RoseInEdge> &ee, 
+                         bool for_prefix, const Grey &grey) { 
+    DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size()); 
+ 
+    /* poison edges covered by successor literal */ 
+ 
+    set<pair<ue2_literal, bool> > succs; 
+    for (const RoseInEdge &ve : ee) { 
+        if (vg[target(ve, vg)].type != RIV_LITERAL) { 
+            /* nothing to poison in suffixes/outfixes */ 
+            assert(generates_callbacks(h)); 
+            assert(is_any_accept_type(vg[target(ve, vg)].type)); 
+            continue; 
+        } 
+        succs.insert({vg[target(ve, vg)].s, 
+                    vg[source(ve, vg)].type == RIV_LITERAL}); 
+ 
+    } 
+ 
+    DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); 
+ 
+    flat_set<NFAEdge> bad; 
+    for (const auto &p : succs) { 
+        poisonFromSuccessor(h, p.first, p.second, bad); 
+    } 
+ 
+    /* poison edges which don't significantly improve a prefix */ 
+ 
+    if (for_prefix) { 
+        poisonForGoodPrefix(h, *depths, bad, grey); 
+    } 
+ 
+    return bad; 
+} 
+ 
+static 
+set<NFAVertex> poisonVertices(const NGHolder &h, const RoseInGraph &vg, 
+                              const vector<RoseInEdge> &ee, const Grey &grey) { 
+    flat_set<NFAEdge> bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey); 
+    set<NFAVertex> bad_vertices; 
+    for (const NFAEdge &e : bad_edges) { 
+        bad_vertices.insert(target(e, h)); 
+        DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index, 
+                     h[target(e, h)].index); 
+    } 
+ 
+    return bad_vertices; 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findBestNormalSplit(const NGHolder &g, 
+                                            const RoseInGraph &vg, 
+                                            const vector<RoseInEdge> &ee, 
+                                            const CompileContext &cc) { 
+    assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); 
+    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); 
+ 
+    return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, 
+                         nullptr, &bad_vertices, false, cc); 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g, 
+                                                const RoseInGraph &vg, 
+                                                const vector<RoseInEdge> &ee, 
+                                                const CompileContext &cc) { 
+    assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); 
+    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); 
+ 
+    return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, 
+                         nullptr, &bad_vertices, true, cc); 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findSimplePrefixSplit(const NGHolder &g, 
+                                              const CompileContext &cc) { 
+    DEBUG_PRINTF("looking for simple prefix split\n"); 
+    bool anchored = !proper_out_degree(g.startDs, g); 
+    NFAVertex u = anchored ? g.start : g.startDs; 
+ 
+    if (out_degree(u, g) != 2) { /* startDs + succ */ 
+        return nullptr; 
+    } 
+ 
+    NFAVertex v = NGHolder::null_vertex(); 
+    for (NFAVertex t : adjacent_vertices_range(u, g)) { 
+        if (t != g.startDs) { 
+            assert(!v); 
+            v = t; 
+        } 
+    } 
+    assert(v); 
+ 
+    if (!anchored) { 
+        if (out_degree(g.start, g) > 2) { 
+            return nullptr; 
+        } 
+        if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) { 
+            return nullptr; 
+        } 
+    } 
+ 
+    NFAVertex best_v = NGHolder::null_vertex(); 
+    ue2_literal best_lit; 
+ 
+    u32 limit = cc.grey.maxHistoryAvailable; 
+    if (anchored) { 
+        LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion); 
+    } 
+ 
+    ue2_literal curr_lit; 
+    for (u32 i = 0; i < limit; i++) { 
+        const auto &v_cr = g[v].char_reach; 
+        if (v_cr.count() == 1 || v_cr.isCaselessChar()) { 
+            curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar()); 
+        } else { 
+            curr_lit.clear(); 
+        } 
+ 
+        if (curr_lit.length() > best_lit.length()) { 
+            best_lit = curr_lit; 
+            best_v = v; 
+        } 
+ 
+        if (out_degree(v, g) != 1) { 
+            break; 
+        } 
+        v = *adjacent_vertices(v, g).first; 
+    } 
+ 
+    if (best_lit.length() < cc.grey.minRoseLiteralLength) { 
+        return nullptr; 
+    } 
+ 
+    set<ue2_literal> best_lit_set({best_lit}); 
+    if (bad_mixed_sensitivity(best_lit)) { 
+        sanitizeAndCompressAndScore(best_lit_set); 
+    } 
+ 
+    return ue2::make_unique<VertLitInfo>(best_v, best_lit_set, anchored, true); 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findBestPrefixSplit(const NGHolder &g, 
+                                        const vector<NFAVertexDepth> &depths, 
+                                        const RoseInGraph &vg, 
+                                        const vector<RoseInEdge> &ee, 
+                                        bool last_chance, 
+                                        const CompileContext &cc) { 
+    assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX); 
+    set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey); 
+    auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, 
+                            nullptr, &bad_vertices, last_chance, cc); 
+ 
+    /* large back edges may prevent us identifying anchored or transient cases 
+     * properly - use a simple walk instead */ 
+    if (!rv || !(rv->creates_transient || rv->creates_anchored)) { 
+        auto rv2 = findSimplePrefixSplit(g, cc); 
+        if (rv2) { 
+            return rv2; 
+        } 
+    } 
+ 
+    return rv; 
+} 
+ 
+static 
+unique_ptr<VertLitInfo> findBestCleanSplit(const NGHolder &g, 
+                                           const CompileContext &cc) { 
+    assert(g.kind != NFA_PREFIX); 
+    set<NFAVertex> cleanSplits; 
+    for (NFAVertex v : vertices_range(g)) { 
+        if (!g[v].char_reach.all() || !edge(v, v, g).second) { 
+            continue; 
+        } 
+        insert(&cleanSplits, inv_adjacent_vertices(v, g)); 
+        cleanSplits.erase(v); 
+    } 
+    cleanSplits.erase(g.start); 
+    if (cleanSplits.empty()) { 
+        return nullptr; 
+    } 
+    return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, 
+                         &cleanSplits, nullptr, false, cc); 
+} 
+ 
+static 
+bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { 
+    set<NFAVertex> curr, next; 
+    curr.insert(g.accept); 
+ 
+    for (auto it = lit.rbegin(); it != lit.rend(); ++it) { 
+        next.clear(); 
+ 
+        for (auto v : curr) { 
+            for (auto u : inv_adjacent_vertices_range(v, g)) { 
+                if (u == g.start) { 
+                    if (overhang_ok) { 
+                        DEBUG_PRINTF("bail\n"); 
+                        return true; 
+                    } else { 
+                        continue; /* it is not possible for a lhs literal to 
+                                   * overhang the start */ 
+                    } 
+                } 
+ 
+                const CharReach &cr = g[u].char_reach; 
+                if (!overlaps(*it, cr)) { 
+                     continue; 
+                } 
+ 
+                next.insert(u); 
+            } 
+        } 
+ 
+        curr.swap(next); 
+    } 
+ 
+    return !curr.empty(); 
+} 
+ 
+static 
+bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, 
+                   const vector<RoseInEdge> &ee, const VertLitInfo &split) { 
+    const vector<NFAVertex> &splitters = split.vv; 
+    assert(!splitters.empty()); 
+ 
+    shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); 
+    shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); 
+ 
+    unordered_map<NFAVertex, NFAVertex> lhs_map; 
+    unordered_map<NFAVertex, NFAVertex> rhs_map; 
+ 
+    splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map); 
+    DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n", 
+                 to_string(base_graph.kind).c_str(), num_vertices(base_graph), 
+                 to_string(lhs->kind).c_str(), num_vertices(*lhs), 
+                 to_string(rhs->kind).c_str(), num_vertices(*rhs)); 
+ 
+    bool suffix = generates_callbacks(base_graph); 
+ 
+    if (is_triggered(base_graph)) { 
+        /* if we are already guarded, check if the split reduces the size of 
+         * the problem before continuing with the split */ 
+        if (num_vertices(*lhs) >= num_vertices(base_graph) 
+            && !(suffix && isVacuous(*rhs))) { 
+            DEBUG_PRINTF("split's lhs is no smaller\n"); 
+            return false; 
+        } 
+ 
+        if (num_vertices(*rhs) >= num_vertices(base_graph)) { 
+            DEBUG_PRINTF("split's rhs is no smaller\n"); 
+            return false; 
+        } 
+    } 
+ 
+    bool do_accept = false; 
+    bool do_accept_eod = false; 
+    assert(rhs); 
+    if (isVacuous(*rhs) && suffix) { 
+        if (edge(rhs->start, rhs->accept, *rhs).second) { 
+            DEBUG_PRINTF("rhs has a cliche\n"); 
+            do_accept = true; 
+            remove_edge(rhs->start, rhs->accept, *rhs); 
+        } 
+ 
+        if (edge(rhs->start, rhs->acceptEod, *rhs).second) { 
+            DEBUG_PRINTF("rhs has an eod cliche\n"); 
+            do_accept_eod = true; 
+            remove_edge(rhs->start, rhs->acceptEod, *rhs); 
+        } 
+ 
+        renumber_edges(*rhs); 
+    } 
+ 
+    /* check if we still have a useful graph left over */ 
+    bool do_norm = out_degree(rhs->start, *rhs) != 1; 
+ 
+    set<ReportID> splitter_reports; 
+    for (auto v : splitters) { 
+        insert(&splitter_reports, base_graph[v].reports); 
+    } 
+ 
+    /* find the targets of each source vertex; insertion_ordered_map used to 
+     * preserve deterministic ordering */ 
+    insertion_ordered_map<RoseInVertex, vector<RoseInVertex>> images; 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex src = source(e, vg); 
+        RoseInVertex dest = target(e, vg); 
+        images[src].push_back(dest); 
+        remove_edge(e, vg); 
+    } 
+ 
+    map<vector<RoseInVertex>, vector<RoseInVertex>> verts_by_image; 
+ 
+    for (const auto &m : images) { 
+        const auto &u = m.first; 
+        const auto &image = m.second; 
+ 
+        if (contains(verts_by_image, image)) { 
+            for (RoseInVertex v : verts_by_image[image]) { 
+                add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); 
+            } 
+            continue; 
+        } 
+ 
+        for (const auto &lit : split.lit) { 
+            assert(!bad_mixed_sensitivity(lit)); 
+ 
+            /* don't allow overhang in can_match() as literals should 
+             * correspond to the edge graph being split; overhanging the graph 
+             * would indicate a false path.*/ 
+            if (!can_match(*lhs, lit, false)) { 
+                DEBUG_PRINTF("'%s' did not match lhs\n", 
+                             escapeString(lit).c_str()); 
+                continue; 
+            } 
+ 
+            DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); 
+            auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); 
+            add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); 
+ 
+            /* work out delay later */ 
+            if (do_accept) { 
+                DEBUG_PRINTF("rhs has a cliche\n"); 
+                auto tt = add_vertex(RoseInVertexProps::makeAccept( 
+                                                         splitter_reports), vg); 
+                add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); 
+            } 
+ 
+            if (do_accept_eod) { 
+                DEBUG_PRINTF("rhs has an eod cliche\n"); 
+                auto tt = add_vertex(RoseInVertexProps::makeAcceptEod( 
+                                                         splitter_reports), vg); 
+                add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); 
+            } 
+ 
+            if (do_norm) { 
+                assert(out_degree(rhs->start, *rhs) > 1); 
+                for (RoseInVertex dest : image) { 
+                    add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg); 
+                } 
+            } 
+            verts_by_image[image].push_back(v); 
+        } 
+    } 
+ 
+    assert(hasCorrectlyNumberedVertices(*rhs)); 
+    assert(hasCorrectlyNumberedEdges(*rhs)); 
+    assert(isCorrectlyTopped(*rhs)); 
+    assert(hasCorrectlyNumberedVertices(*lhs)); 
+    assert(hasCorrectlyNumberedEdges(*lhs)); 
+    assert(isCorrectlyTopped(*lhs)); 
+ 
+    return true; 
+} 
+ 
+#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ 
+#define MAX_LEN_2_LITERALS_PER_CUT 3 
+ 
+static 
+bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores, 
+                           const map<NFAEdge, set<ue2_literal>> &cut_lits, 
+                           u32 min_allowed_length) { 
+    DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), 
+                 min_allowed_length); 
+    if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { 
+        return false; 
+    } 
+ 
+    u32 len_2_count = 0; 
+ 
+    for (const auto &cut : cut_lits) { 
+        if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { 
+            DEBUG_PRINTF("cut uses a forbidden edge\n"); 
+            return false; 
+        } 
+ 
+        if (min_len(cut.second) < min_allowed_length) { 
+            DEBUG_PRINTF("cut uses a bad literal\n"); 
+            return false; 
+        } 
+ 
+        for (const auto &lit : cut.second) { 
+            if (lit.length() == 2) { 
+                len_2_count++; 
+            } 
+        } 
+    } 
+ 
+    if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { 
+        return false; 
+    } 
+ 
+    return true; 
+} 
+ 
+static 
+void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, 
+                     const vector<RoseInEdge> &to_cut, 
+                     const vector<NFAEdge> &cut, 
+                     const map<NFAEdge, set<ue2_literal>> &cut_lits) { 
+    DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(), 
+                 num_vertices(h)); 
+ 
+    /* create literal vertices and connect preds */ 
+    unordered_set<RoseInVertex> done_sources; 
+    map<RoseInVertex, vector<pair<RoseInVertex, NFAVertex>>> verts_by_source; 
+    for (const RoseInEdge &ve : to_cut) { 
+        assert(&h == &*vg[ve].graph); 
+        RoseInVertex src = source(ve, vg); 
+        if (!done_sources.insert(src).second) { 
+            continue; /* already processed */ 
+        } 
+ 
+        /* iterate over cut for determinism */ 
+        for (const auto &e : cut) { 
+            NFAVertex prev_v = source(e, h); 
+            NFAVertex pivot = target(e, h); 
+ 
+            DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); 
+            unordered_map<NFAVertex, NFAVertex> temp_map; 
+            shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>(); 
+            splitLHS(h, pivot, new_lhs.get(), &temp_map); 
+ 
+            /* want to cut off paths to pivot from things other than the pivot - 
+             * makes a more svelte graphy */ 
+            clear_in_edges(temp_map[pivot], *new_lhs); 
+            NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], 
+                                          *new_lhs); 
+            if (is_triggered(h) && prev_v == h.start) { 
+                (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); 
+            } 
+ 
+            pruneUseless(*new_lhs, false); 
+            renumber_vertices(*new_lhs); 
+            renumber_edges(*new_lhs); 
+ 
+            DEBUG_PRINTF("    into lhs %s (%zu vertices)\n", 
+                         to_string(new_lhs->kind).c_str(), 
+                         num_vertices(*new_lhs)); 
+ 
+            assert(hasCorrectlyNumberedVertices(*new_lhs)); 
+            assert(hasCorrectlyNumberedEdges(*new_lhs)); 
+            assert(isCorrectlyTopped(*new_lhs)); 
+ 
+            const set<ue2_literal> &lits = cut_lits.at(e); 
+            for (const auto &lit : lits) { 
+                if (!can_match(*new_lhs, lit, is_triggered(h))) { 
+                    continue; 
+                } 
+ 
+                RoseInVertex v 
+                    = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); 
+ 
+                /* if this is a prefix/infix an edge directly to accept should 
+                 * represent a false path as we have poisoned vertices covered 
+                 * by the literals. */ 
+                if (generates_callbacks(h)) { 
+                    if (edge(pivot, h.accept, h).second) { 
+                        DEBUG_PRINTF("adding acceptEod\n"); 
+                        /* literal has a direct connection to accept */ 
+                        const flat_set<ReportID> &reports = h[pivot].reports; 
+                        auto tt = add_vertex( 
+                                    RoseInVertexProps::makeAccept(reports), vg); 
+                        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); 
+                    } 
+ 
+                    if (edge(pivot, h.acceptEod, h).second) { 
+                        assert(generates_callbacks(h)); 
+                        DEBUG_PRINTF("adding acceptEod\n"); 
+                        /* literal has a direct connection to accept */ 
+                        const flat_set<ReportID> &reports = h[pivot].reports; 
+                        auto tt = add_vertex( 
+                                 RoseInVertexProps::makeAcceptEod(reports), vg); 
+                        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); 
+                    } 
+                } 
+ 
+                add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg); 
+                verts_by_source[src].push_back({v, pivot}); 
+            } 
+        } 
+    } 
+ 
+    /* wire the literal vertices up to successors */ 
+    map<vector<NFAVertex>, shared_ptr<NGHolder> > done_rhs; 
+    for (const RoseInEdge &ve : to_cut) { 
+        RoseInVertex src = source(ve, vg); 
+        RoseInVertex dest = target(ve, vg); 
+ 
+        /* iterate over cut for determinism */ 
+        for (const auto &elem : verts_by_source[src]) { 
+            NFAVertex pivot = elem.second; 
+            RoseInVertex v = elem.first; 
+ 
+            vector<NFAVertex> adj; 
+            insert(&adj, adj.end(), adjacent_vertices(pivot, h)); 
+            /* we can ignore presence of accept, accepteod in adj as it is best 
+               effort */ 
+ 
+            if (!contains(done_rhs, adj)) { 
+                unordered_map<NFAVertex, NFAVertex> temp_map; 
+                shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>(); 
+                splitRHS(h, adj, new_rhs.get(), &temp_map); 
+                remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); 
+                remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); 
+                renumber_edges(*new_rhs); 
+                DEBUG_PRINTF("    into rhs %s (%zu vertices)\n", 
+                             to_string(new_rhs->kind).c_str(), 
+                             num_vertices(*new_rhs)); 
+                done_rhs.emplace(adj, new_rhs); 
+                assert(isCorrectlyTopped(*new_rhs)); 
+            } 
+ 
+            assert(done_rhs[adj].get()); 
+            shared_ptr<NGHolder> new_rhs = done_rhs[adj]; 
+ 
+            assert(hasCorrectlyNumberedVertices(*new_rhs)); 
+            assert(hasCorrectlyNumberedEdges(*new_rhs)); 
+            assert(isCorrectlyTopped(*new_rhs)); 
+ 
+            if (vg[dest].type == RIV_LITERAL 
+                && !can_match(*new_rhs, vg[dest].s, true)) { 
+                continue; 
+            } 
+ 
+            if (out_degree(new_rhs->start, *new_rhs) != 1) { 
+                add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg); 
+            } 
+        } 
+ 
+        remove_edge(ve, vg); 
+    } 
+} 
+ 
+static 
+bool doNetflowCut(NGHolder &h, 
+                  const vector<NFAVertexDepth> *depths, 
+                  RoseInGraph &vg, 
+                  const vector<RoseInEdge> &ee, bool for_prefix, 
+                  const Grey &grey, u32 min_allowed_length = 0U) { 
+    ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength); 
+ 
+    DEBUG_PRINTF("doing netflow cut\n"); 
+    /* TODO: we should really get literals/scores from the full graph as this 
+     * allows us to overlap with previous cuts. */ 
+    assert(!ee.empty()); 
+    assert(&h == &*vg[ee.front()].graph); 
+    assert(!for_prefix || depths); 
+ 
+    if (num_edges(h) > grey.maxRoseNetflowEdges) { 
+        /* We have a limit on this because scoring edges and running netflow 
+         * gets very slow for big graphs. */ 
+        DEBUG_PRINTF("too many edges, skipping netflow cut\n"); 
+        return false; 
+    } 
+ 
+    assert(hasCorrectlyNumberedVertices(h)); 
+    assert(hasCorrectlyNumberedEdges(h)); 
+ 
+    auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey); 
+ 
+    /* Step 1: Get scores for all edges */ 
+    vector<u64a> scores = scoreEdges(h, known_bad); /* scores by edge_index */ 
+ 
+    /* Step 2: Find cutset based on scores */ 
+    vector<NFAEdge> cut = findMinCut(h, scores); 
+ 
+    /* Step 3: Get literals corresponding to cut edges */ 
+    map<NFAEdge, set<ue2_literal>> cut_lits; 
+    for (const auto &e : cut) { 
+        set<ue2_literal> lits = getLiteralSet(h, e); 
+        sanitizeAndCompressAndScore(lits); 
+ 
+        cut_lits[e] = lits; 
+    } 
+ 
+    /* if literals are underlength bail or if it involves a forbidden edge*/ 
+    if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) { 
+        return false; 
+    } 
+    DEBUG_PRINTF("splitting\n"); 
+ 
+    /* Step 4: Split graph based on cuts */ 
+    splitEdgesByCut(h, vg, ee, cut, cut_lits); 
+ 
+    return true; 
+} 
+ 
+static 
+bool deanchorIfNeeded(NGHolder &g) { 
+    DEBUG_PRINTF("hi\n"); 
+    if (proper_out_degree(g.startDs, g)) { 
+        return false; 
+    } 
+ 
+    /* look for a non-special dot with a loop following start */ 
+    set<NFAVertex> succ_g; 
+    insert(&succ_g, adjacent_vertices(g.start, g)); 
+    succ_g.erase(g.startDs); 
+ 
+    for (auto v : adjacent_vertices_range(g.start, g)) { 
+        DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index, 
+                     g[v].char_reach.count()); 
+ 
+        if (v == g.startDs || !g[v].char_reach.all()) { 
+            continue; 
+        } 
+ 
+        set<NFAVertex> succ_v; 
+        insert(&succ_v, adjacent_vertices(v, g)); 
+ 
+        if (succ_v == succ_g) { 
+            DEBUG_PRINTF("found ^.*\n"); 
+            for (auto succ : adjacent_vertices_range(g.start, g)) { 
+                if (succ == g.startDs) { 
+                    continue; 
+                } 
+                add_edge(g.startDs, succ, g); 
+            } 
+            clear_vertex(v, g); 
+            remove_vertex(v, g); 
+            renumber_vertices(g); 
+            return true; 
+        } 
+ 
+        if (succ_g.size() == 1 && hasSelfLoop(v, g)) { 
+            DEBUG_PRINTF("found ^.+\n"); 
+            add_edge(g.startDs, v, g); 
+            remove_edge(v, v, g); 
+            return true; 
+        } 
+    } 
+ 
+    return false; 
+} 
+ 
+static 
+RoseInGraph populateTrivialGraph(const NGHolder &h) { 
+    RoseInGraph g; 
+    shared_ptr<NGHolder> root_g = cloneHolder(h); 
+    bool orig_anch = isAnchored(*root_g); 
+    orig_anch |= deanchorIfNeeded(*root_g); 
+ 
+    DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); 
+ 
+    auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g); 
+    auto accept = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g); 
+ 
+    add_edge(start, accept, RoseInEdgeProps(root_g, 0), g); 
+ 
+    return g; 
+} 
+ 
+static 
+void avoidOutfixes(RoseInGraph &vg, bool last_chance, 
+                   const CompileContext &cc) { 
+    STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); 
+    assert(num_vertices(vg) == 2); 
+    assert(num_edges(vg) == 1); 
+ 
+    RoseInEdge e = *edges(vg).first; 
+ 
+    NGHolder &h = *vg[e].graph; 
+    assert(isCorrectlyTopped(h)); 
+ 
+    renumber_vertices(h); 
+    renumber_edges(h); 
+ 
+    unique_ptr<VertLitInfo>  split = findBestNormalSplit(h, vg, {e}, cc); 
+ 
+    if (split && splitRoseEdge(h, vg, {e}, *split)) { 
+        DEBUG_PRINTF("split on simple literal\n"); 
+        return; 
+    } 
+ 
+    if (last_chance) { 
+        /* look for a prefix split as it allows us to accept very weak anchored 
+         * literals. */ 
+        auto depths = calcDepths(h); 
+ 
+        split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); 
+ 
+        if (split && splitRoseEdge(h, vg, {e}, *split)) { 
+            DEBUG_PRINTF("split on simple literal\n"); 
+            return; 
+        } 
+    } 
+ 
+    doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); 
+} 
+ 
+static 
+void removeRedundantPrefixes(RoseInGraph &g) { 
+    STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n"); 
+ 
+    for (const RoseInEdge &e : edges_range(g)) { 
+        RoseInVertex s = source(e, g); 
+        RoseInVertex t = target(e, g); 
+ 
+        if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) { 
+            continue; 
+        } 
+ 
+        if (!g[e].graph) { 
+            continue; 
+        } 
+ 
+        assert(!g[t].delay); 
+        const ue2_literal &lit = g[t].s; 
+ 
+        if (!literalIsWholeGraph(*g[e].graph, lit)) { 
+            DEBUG_PRINTF("not whole graph\n"); 
+            continue; 
+        } 
+ 
+        if (!isFloating(*g[e].graph)) { 
+            DEBUG_PRINTF("not floating\n"); 
+            continue; 
+        } 
+        g[e].graph.reset(); 
+    } 
+} 
+ 
+static 
+u32 maxDelay(const CompileContext &cc) { 
+    if (!cc.streaming) { 
+        return MO_INVALID_IDX; 
+    } 
+    return cc.grey.maxHistoryAvailable; 
+} 
+ 
+static 
+void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, 
+                                         const CompileContext &cc) { 
+    STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n"); 
+ 
+    vector<RoseInEdge> to_anchor; 
+    for (const RoseInEdge &e : edges_range(g)) { 
+        RoseInVertex s = source(e, g); 
+        RoseInVertex t = target(e, g); 
+ 
+        if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) { 
+            continue; 
+        } 
+ 
+        if (g[t].type != RIV_LITERAL) { 
+            continue; 
+        } 
+ 
+        if (!g[e].graph) { 
+            continue; 
+        } 
+ 
+        if (g[e].graph_lag) { 
+            /* already removed redundant parts of literals */ 
+            continue; 
+        } 
+ 
+        if (g[e].dfa) { 
+            /* if we removed any more states, we would need to rebuild the 
+             * the dfa which can be time consuming. */ 
+            continue; 
+        } 
+ 
+        assert(!g[t].delay); 
+        const ue2_literal &lit = g[t].s; 
+ 
+        DEBUG_PRINTF("removing states for literal: %s\n", 
+                     dumpString(lit).c_str()); 
+ 
+        unique_ptr<NGHolder> h = cloneHolder(*g[e].graph); 
+        const u32 max_delay = maxDelay(cc); 
+ 
+        u32 delay = removeTrailingLiteralStates(*h, lit, max_delay, 
+                                              false /* can't overhang start */); 
+ 
+        DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay); 
+ 
+        if (edge(h->startDs, h->accept, *h).second) { 
+            /* we should have delay == lit.length(), but in really complex 
+             * cases we may fail to identify that we can remove the whole 
+             * graph. Regardless, the fact that sds is wired to accept means the 
+             * graph serves no purpose. */ 
+            DEBUG_PRINTF("whole graph\n"); 
+            g[e].graph.reset(); 
+            continue; 
+        } 
+ 
+        if (delay == lit.length() && edge(h->start, h->accept, *h).second 
+            && num_vertices(*h) == N_SPECIALS) { 
+            to_anchor.push_back(e); 
+            continue; 
+        } 
+ 
+        /* if we got here we should still have an interesting graph */ 
+        assert(delay == max_delay || num_vertices(*h) > N_SPECIALS); 
+ 
+        if (delay && delay != MO_INVALID_IDX) { 
+            DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); 
+ 
+            g[e].graph = move(h); 
+            g[e].graph_lag = delay; 
+        } 
+    } 
+ 
+    if (!to_anchor.empty()) { 
+        RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g); 
+ 
+        for (RoseInEdge e : to_anchor) { 
+            DEBUG_PRINTF("rehoming to anchor\n"); 
+            RoseInVertex v = target(e, g); 
+            add_edge(anch, v, g); 
+            remove_edge(e, g); 
+        } 
+    } 
+} 
+ 
+static 
+bool isStarCliche(const NGHolder &g) { 
+    DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); 
+ 
+    bool nonspecials_seen = false; 
+ 
+    for (auto v : vertices_range(g)) { 
+        if (is_special(v, g)) { 
+            continue; 
+        } 
+ 
+        if (nonspecials_seen) { 
+            return false; 
+        } 
+        nonspecials_seen = true; 
+ 
+        if (!g[v].char_reach.all()) { 
+            return false; 
+        } 
+ 
+        if (!hasSelfLoop(v, g)) { 
+            return false; 
+        } 
+        if (!edge(v, g.accept, g).second) { 
+            return false; 
+        } 
+    } 
+ 
+    if (!nonspecials_seen) { 
+        return false; 
+    } 
+ 
+    if (!edge(g.start, g.accept, g).second) { 
+        return false; 
+    } 
+ 
+    return true; 
+} 
+ 
+static 
+void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, 
+                                      const vector<RoseInEdge> &ee, 
+                                      const CompileContext &cc) { 
+    /* TODO: This could be better by not creating a separate graph for each 
+     * successor literal. This would require using distinct report ids and also 
+     * taking into account overlap of successor literals. */ 
+ 
+    set<ue2_literal> preds; 
+    set<ue2_literal> succs; 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex u = source(e, ig); 
+        assert(ig[u].type == RIV_LITERAL); 
+        assert(!ig[u].delay); 
+        preds.insert(ig[u].s); 
+ 
+        RoseInVertex v = target(e, ig); 
+        assert(ig[v].type == RIV_LITERAL); 
+        assert(!ig[v].delay); 
+        succs.insert(ig[v].s); 
+ 
+        if (ig[e].graph_lag) { 
+            /* already removed redundant parts of literals */ 
+            return; 
+        } 
+ 
+        assert(!ig[e].dfa); 
+    } 
+ 
+    map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */ 
+ 
+    for (const ue2_literal &right : succs) { 
+        size_t max_overlap = 0; 
+        for (const ue2_literal &left : preds) { 
+            size_t overlap = maxOverlap(left, right, 0); 
+            ENSURE_AT_LEAST(&max_overlap, overlap); 
+        } 
+ 
+        u32 max_allowed_delay = right.length() - max_overlap; 
+ 
+        if (cc.streaming) { 
+            LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); 
+        } 
+ 
+        if (!max_allowed_delay) { 
+            continue; 
+        } 
+ 
+        shared_ptr<NGHolder> h_new = cloneHolder(h); 
+ 
+        u32 delay = removeTrailingLiteralStates(*h_new, right, 
+                                                max_allowed_delay); 
+ 
+        if (delay == MO_INVALID_IDX) { 
+            /* successor literal could not match infix -> ignore false path */ 
+            assert(0); 
+            continue; 
+        } 
+ 
+        if (!delay) { 
+            /* unable to trim graph --> no point swapping to new holder */ 
+            continue; 
+        } 
+ 
+        assert(isCorrectlyTopped(*h_new)); 
+        graphs[right] = make_pair(h_new, delay); 
+    } 
+ 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex v = target(e, ig); 
+        const ue2_literal &succ = ig[v].s; 
+        if (!contains(graphs, succ)) { 
+            continue; 
+        } 
+ 
+        ig[e].graph = graphs[succ].first; 
+        ig[e].graph_lag = graphs[succ].second; 
+ 
+        if (isStarCliche(*ig[e].graph)) { 
+            DEBUG_PRINTF("is a X star!\n"); 
+            ig[e].graph.reset(); 
+            ig[e].graph_lag = 0; 
+        } 
+    } 
+} 
+ 
+static 
+void removeRedundantLiteralsFromInfixes(RoseInGraph &g, 
+                                        const CompileContext &cc) { 
+    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> infixes; 
+ 
+    for (const RoseInEdge &e : edges_range(g)) { 
+        RoseInVertex s = source(e, g); 
+        RoseInVertex t = target(e, g); 
+ 
+        if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) { 
+            continue; 
+        } 
+ 
+        if (!g[e].graph) { 
+            continue; 
+        } 
+ 
+        assert(!g[t].delay); 
+        if (g[e].dfa) { 
+            /* if we removed any more states, we would need to rebuild the 
+             * the dfa which can be time consuming. */ 
+            continue; 
+        } 
+ 
+        NGHolder *h = g[e].graph.get(); 
+        infixes[h].push_back(e); 
+    } 
+ 
+    for (const auto &m : infixes) { 
+        NGHolder *h = m.first; 
+        const auto &edges = m.second; 
+        removeRedundantLiteralsFromInfix(*h, g, edges, cc); 
+    } 
+} 
+ 
+static 
+void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { 
+    removeRedundantLiteralsFromPrefixes(g, cc); 
+    removeRedundantLiteralsFromInfixes(g, cc); 
+} 
+ 
+static 
+RoseInVertex getStart(RoseInGraph &vg) { 
+    for (RoseInVertex v : vertices_range(vg)) { 
+        if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { 
+            return v; 
+        } 
+    } 
+    assert(0); 
+    return RoseInGraph::null_vertex(); 
+} 
+ 
+/** 
+ * Finds the initial accept vertex created to which suffix/outfixes are 
+ * attached. 
+ */ 
+static 
+RoseInVertex getPrimaryAccept(RoseInGraph &vg) { 
+    for (RoseInVertex v : vertices_range(vg)) { 
+        if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) { 
+            return v; 
+        } 
+    } 
+    assert(0); 
+    return RoseInGraph::null_vertex(); 
+} 
+ 
+static 
+bool willBeTransient(const depth &max_depth, const CompileContext &cc) { 
+    if (!cc.streaming) { 
+        return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH); 
+    } else { 
+        return max_depth <= depth(cc.grey.maxHistoryAvailable + 1); 
+    } 
+} 
+ 
+static 
+bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) { 
+    return max_depth <= depth(grey.maxAnchoredRegion); 
+} 
+ 
+static 
+unique_ptr<NGHolder> make_chain(u32 count) { 
+    assert(count); 
+ 
+    auto rv = std::make_unique<NGHolder>(NFA_INFIX); 
+ 
+    NGHolder &h = *rv; 
+ 
+    NFAVertex u = h.start; 
+    for (u32 i = 0; i < count; i++) { 
+        NFAVertex v = add_vertex(h); 
+        h[v].char_reach = CharReach::dot(); 
+        add_edge(u, v, h); 
+        u = v; 
+    } 
+    h[u].reports.insert(0); 
+    add_edge(u, h.accept, h); 
+ 
+    setTops(h); 
+ 
+    return rv; 
+} 
+ 
+#define SHORT_TRIGGER_LEN 16 
+ 
+static 
+bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, 
+                                  const vector<RoseInEdge> &ee, 
+                                  const CompileContext &cc) { 
+    /* check max width and literal lengths to see if possible */ 
+    size_t min_lit = (size_t)~0ULL; 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex v = target(e, vg); 
+        LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length()); 
+    } 
+ 
+    if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) { 
+        return false; 
+    } 
+ 
+    depth max_width = findMaxWidth(h); 
+ 
+    u32 delta = min_lit - SHORT_TRIGGER_LEN; 
+ 
+    if (!willBeTransient(max_width - depth(delta), cc) 
+        && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) { 
+        return false; 
+    } 
+ 
+    DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit); 
+    DEBUG_PRINTF("delta = %u\n", delta); 
+ 
+    /* try split */ 
+    map<RoseInVertex, shared_ptr<NGHolder> > graphs; 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex v = target(e, vg); 
+ 
+        shared_ptr<NGHolder> h_new = cloneHolder(h); 
+ 
+        u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta); 
+ 
+        DEBUG_PRINTF("delay %u\n", delay); 
+ 
+        if (delay != delta) { 
+            DEBUG_PRINTF("unable to trim literal\n"); 
+            return false; 
+        } 
+ 
+        if (in_degree(v, vg) != 1) { 
+            DEBUG_PRINTF("complicated\n"); 
+            return false; 
+        } 
+ 
+        DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new)); 
+        assert(willBeTransient(findMaxWidth(*h_new), cc) 
+               || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); 
+ 
+        assert(isCorrectlyTopped(*h_new)); 
+        graphs[v] = h_new; 
+    } 
+ 
+    /* add .{repeats} from prefixes to long literals */ 
+    for (const RoseInEdge &e : ee) { 
+        RoseInVertex s = source(e, vg); 
+        RoseInVertex t = target(e, vg); 
+ 
+        remove_edge(e, vg); 
+        const ue2_literal &orig_lit = vg[t].s; 
+ 
+        ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta); 
+ 
+        ue2_literal lit2(orig_lit.end() - delta, orig_lit.end()); 
+ 
+        assert(lit.length() + delta == orig_lit.length()); 
+ 
+        vg[t].s = lit2; 
+ 
+        RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); 
+        add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg); 
+        add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg); 
+    } 
+ 
+    DEBUG_PRINTF("success\n"); 
+    /* TODO: alter split point to avoid pathological splits */ 
+    return true; 
+} 
+ 
+static 
+void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, 
+                                  u32 delay, const vector<NFAVertex> &preds) { 
+    assert(delay <= lit.length()); 
+    assert(isCorrectlyTopped(g)); 
+    DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); 
+ 
+    NFAVertex prev = g.accept; 
+    auto it = lit.rbegin(); 
+    while (delay--) { 
+        NFAVertex curr = add_vertex(g); 
+        assert(it != lit.rend()); 
+        g[curr].char_reach = *it; 
+        add_edge(curr, prev, g); 
+        ++it; 
+        prev = curr; 
+    } 
+ 
+    for (auto v : preds) { 
+        NFAEdge e = add_edge_if_not_present(v, prev, g); 
+        if (v == g.start && is_triggered(g)) { 
+            g[e].tops.insert(DEFAULT_TOP); 
+        } 
+    } 
+ 
+    // Every predecessor of accept must have a report. 
+    set_report(g, 0); 
+ 
+    renumber_vertices(g); 
+    renumber_edges(g); 
+    assert(allMatchStatesHaveReports(g)); 
+    assert(isCorrectlyTopped(g)); 
+} 
+ 
+static 
+void restoreTrailingLiteralStates(NGHolder &g, 
+                                  const vector<pair<ue2_literal, u32>> &lits) { 
+    vector<NFAVertex> preds; 
+    insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); 
+    clear_in_edges(g.accept, g); 
+ 
+    for (auto v : preds) { 
+        g[v].reports.clear(); /* clear report from old accepts */ 
+    } 
+ 
+    for (const auto &p : lits) { 
+        const ue2_literal &lit = p.first; 
+        u32 delay = p.second; 
+ 
+        restoreTrailingLiteralStates(g, lit, delay, preds); 
+    } 
+} 
+ 
+static 
+bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, 
+                   const CompileContext &cc) { 
+    DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, 
+                  num_vertices(h)); 
+    assert(isCorrectlyTopped(h)); 
+ 
+    renumber_vertices(h); 
+    renumber_edges(h); 
+ 
+    auto depths = calcDepths(h); 
+ 
+    /* If the reason the prefix is not transient is due to a very long literal 
+     * following, we can make it transient by restricting ourselves to using 
+     * just the head of the literal. */ 
+    if (makeTransientFromLongLiteral(h, vg, ee, cc)) { 
+        return true; 
+    } 
+ 
+    auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc); 
+ 
+    if (split && (split->creates_transient || split->creates_anchored) 
+        && splitRoseEdge(h, vg, ee, *split)) { 
+        DEBUG_PRINTF("split on simple literal\n"); 
+        return true; 
+    } 
+ 
+    /* large back edges may prevent us identifing anchored or transient cases 
+     * properly - use a simple walk instead */ 
+ 
+    if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) { 
+        return true; 
+    } 
+ 
+    if (split && splitRoseEdge(h, vg, ee, *split)) { 
+        /* use the simple split even though it doesn't create a transient 
+         * prefix */ 
+        DEBUG_PRINTF("split on simple literal\n"); 
+        return true; 
+    } 
+ 
+    /* look for netflow cuts which don't produce good prefixes */ 
+    if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) { 
+        return true; 
+    } 
+ 
+    if (ee.size() > 1) { 
+        DEBUG_PRINTF("split the prefix apart based on succ literals\n"); 
+        unordered_map<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> >, 
                       NGHolderHasher, NGHolderEqual> trimmed;
-
-        for (const auto &e : ee) {
-            shared_ptr<NGHolder> hh = cloneHolder(h);
-            auto succ_lit = vg[target(e, vg)].s;
-            assert(isCorrectlyTopped(*hh));
-            u32 delay = removeTrailingLiteralStates(*hh, succ_lit,
-                                                    succ_lit.length(),
-                                              false /* can't overhang start */);
-            if (!delay) {
-                DEBUG_PRINTF("could not remove any literal, skip over\n");
-                continue;
-            }
-
-            assert(isCorrectlyTopped(*hh));
-            trimmed[hh].emplace_back(e, delay);
-        }
-
-        if (trimmed.size() == 1) {
-            return false;
-        }
-
-        /* shift the contents to a vector so we can modify the graphs without
-         * violating the map's invariants. */
-        vector<pair<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> > > >
-            trimmed_vec(trimmed.begin(), trimmed.end());
-        trimmed.clear();
-        for (auto &elem : trimmed_vec) {
-            shared_ptr<NGHolder> &hp = elem.first;
-            vector<pair<ue2_literal, u32>> succ_lits;
-
-            for (const auto &edge_delay : elem.second) {
-                const RoseInEdge &e = edge_delay.first;
-                u32 delay = edge_delay.second;
-                auto lit = vg[target(e, vg)].s;
-
-                vg[e].graph = hp;
-                assert(delay <= lit.length());
-                succ_lits.emplace_back(lit, delay);
-            }
-            restoreTrailingLiteralStates(*hp, succ_lits);
-        }
-        return true;
-    }
-
-    return false;
-}
-
-#define MAX_FIND_BETTER_PREFIX_GEN   4
-#define MAX_FIND_BETTER_PREFIX_COUNT 100
-
-static
-void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) {
-    STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n");
-    RoseInVertex start = getStart(vg);
-
-    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> prefixes;
-    bool changed;
-    u32 gen = 0;
-    do {
-        DEBUG_PRINTF("gen %u\n", gen);
-        changed = false;
-        prefixes.clear();
-
-        /* find prefixes */
-        for (const RoseInEdge &e : out_edges_range(start, vg)) {
-            /* outfixes shouldn't have made it this far */
-            assert(vg[target(e, vg)].type == RIV_LITERAL);
-            if (vg[e].graph) {
-                NGHolder *h = vg[e].graph.get();
-                prefixes[h].push_back(e);
-            }
-        }
-
-        if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) {
-            break;
-        }
-
-        /* look for bad prefixes and try to split */
-        for (const auto &m : prefixes) {
-            NGHolder *h = m.first;
-            const auto &edges = m.second;
-            depth max_width = findMaxWidth(*h);
-            if (willBeTransient(max_width, cc)
-                || willBeAnchoredTable(max_width, cc.grey)) {
-                continue;
-            }
-
-            changed = improvePrefix(*h, vg, edges, cc);
-        }
-    } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN);
-}
-
-#define STRONG_LITERAL_LENGTH 20
-#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10
-
-static
-bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg,
-                          const vector<RoseInEdge> &ee,
-                          const CompileContext &cc) {
-    DEBUG_PRINTF("looking for string literal\n");
-    unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc);
-
-    if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) {
-        DEBUG_PRINTF("splitting simple literal\n");
-        return splitRoseEdge(h, vg, ee, *split);
-    }
-
-    return false;
-}
-
-static
-void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) {
-    if (!cc.grey.violetExtractStrongLiterals) {
-        return;
-    }
-
-    STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n");
-
-    unordered_set<NGHolder *> stuck;
-    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> edges_by_graph;
-    bool changed;
-
-    do {
-        changed = false;
-
-        edges_by_graph.clear();
-        for (const RoseInEdge &ve : edges_range(vg)) {
-            if (vg[source(ve, vg)].type != RIV_LITERAL) {
-                continue;
-            }
-
-            if (vg[ve].graph) {
-                NGHolder *h = vg[ve].graph.get();
-                edges_by_graph[h].push_back(ve);
-            }
-        }
-
-        if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) {
-            DEBUG_PRINTF("too many graphs, stopping\n");
-            return;
-        }
-
-        for (const auto &m : edges_by_graph) {
-            NGHolder *g = m.first;
-            const auto &edges = m.second;
-            if (contains(stuck, g)) {
-                DEBUG_PRINTF("already known to be bad\n");
-                continue;
-            }
-            bool rv = extractStrongLiteral(*g, vg, edges, cc);
-            if (rv) {
-                changed = true;
-            } else {
-                stuck.insert(g);
-            }
-        }
-    } while (changed);
-}
-
-#define INFIX_STRONG_GUARD_LEN 8
-#define INFIX_MIN_SPLIT_LITERAL_LEN 12
-
-static
-bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee,
-                  const CompileContext &cc) {
-    unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc);
-
-    if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN
-        && splitRoseEdge(h, vg, ee, *split)) {
-        DEBUG_PRINTF("splitting simple literal\n");
-        return true;
-    }
-
-    DEBUG_PRINTF("trying for a netflow cut\n");
-    /* look for netflow cuts which don't produce good prefixes */
-    bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8);
-
-    DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv);
-
-    return rv;
-}
-
-/**
- * Infixes which are weakly guarded can, in effect, act like prefixes as they
- * will often be live. We should try to split these infixes further if they
- * contain strong literals so that we are at least running smaller weak infixes
- * which can hopeful be accelerated/miracled.
- */
-static
-void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) {
-    if (!cc.grey.violetAvoidWeakInfixes) {
-        return;
-    }
-    STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n");
-
-    RoseInVertex start = getStart(vg);
-
-    unordered_set<NGHolder *> weak;
-
-    for (RoseInVertex vv : adjacent_vertices_range(start, vg)) {
-        /* outfixes shouldn't have made it this far */
-        assert(vg[vv].type == RIV_LITERAL);
-        if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) {
-            continue;
-        }
-
-        for (const RoseInEdge &e : out_edges_range(vv, vg)) {
-            if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) {
-                continue;
-            }
-
-            NGHolder *h = vg[e].graph.get();
-            DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h);
-            weak.insert(h);
-        }
-    }
-
-    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> weak_edges;
-    for (const RoseInEdge &ve : edges_range(vg)) {
-        NGHolder *h = vg[ve].graph.get();
-        if (contains(weak, h)) {
-            weak_edges[h].push_back(ve);
-        }
-    }
-
-    for (const auto &m : weak_edges) {
-        NGHolder *h = m.first;
-        const auto &edges = m.second;
-        improveInfix(*h, vg, edges, cc);
-    }
-}
-
-static
-void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
-                         const vector<RoseInEdge> &ee, const VertLitInfo &split,
-                         bool eod, const flat_set<ReportID> &reports) {
-    const vector<NFAVertex> &splitters = split.vv;
-    assert(!splitters.empty());
-
-    shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
-    unordered_map<NFAVertex, NFAVertex> v_map;
-    cloneHolder(*lhs, base_graph, &v_map);
-    lhs->kind = NFA_INFIX;
-    clear_in_edges(lhs->accept, *lhs);
-    clear_in_edges(lhs->acceptEod, *lhs);
-    add_edge(lhs->accept, lhs->acceptEod, *lhs);
-    clearReports(*lhs);
-    for (NFAVertex v : splitters) {
-        NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs);
-        if (v == base_graph.start) {
-            (*lhs)[e].tops.insert(DEFAULT_TOP);
-        }
-        (*lhs)[v_map[v]].reports.insert(0);
-
-    }
-    pruneUseless(*lhs);
-    assert(isCorrectlyTopped(*lhs));
-
-    /* create literal vertices and connect preds */
-    for (const auto &lit : split.lit) {
-        if (!can_match(*lhs, lit, is_triggered(*lhs))) {
-            continue;
-        }
-
-        DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str());
-        RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
-
-        RoseInVertex tt;
-        if (eod) {
-            DEBUG_PRINTF("doing eod\n");
-            tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg);
-        } else {
-            DEBUG_PRINTF("doing non-eod\n");
-            tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg);
-        }
-        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
-
-        for (const RoseInEdge &e : ee) {
-            RoseInVertex u = source(e, vg);
-            assert(!edge(u, v, vg).second);
-            add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
-        }
-    }
-}
-
-#define MIN_SUFFIX_LEN 6
-
-static
-bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg,
-                            const vector<RoseInEdge> &suffix_edges,
-                            const CompileContext &cc) {
-    DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h,
-                 suffix_edges.size());
-    /*
-     * We would, in general, rather not have output exposed engines because
-     * once they are triggered, they must be run while infixes only have to run
-     * if the successor literal is seen. Matches from output exposed engines
-     * also have to be placed in a priority queue and interleaved with matches
-     * from other sources.
-     *
-     * Note:
-     * - if the LHS is extremely unlikely we may be better off leaving
-     *   a suffix unguarded.
-     *
-     * - limited width suffixes may be less bad as they won't be continuously
-     *   active, we may want to have (a) stronger controls on if we want to pick
-     *   a trailing literal in these cases and/or (b) look also for literals
-     *   near accept as well as right on accept
-     *
-     * TODO: improve heuristics, splitting logic.
-     */
-
-    /* we may do multiple splits corresponding to different report behaviour */
-    set<NFAVertex> seen;
-    map<pair<bool, flat_set<ReportID> >, VertLitInfo> by_reports; /* eod, rep */
-
-    for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) {
-        set<ue2_literal> ss = getLiteralSet(h, v, false);
-        if (ss.empty()) {
-            DEBUG_PRINTF("candidate is too shitty\n");
-            return false;
-        }
-
-        VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)];
-        insert(&vli.lit, ss);
-        vli.vv.push_back(v);
-        seen.insert(v);
-    }
-
-    seen.insert(h.accept);
-    for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) {
-        if (contains(seen, v)) {
-            continue;
-        }
-
-        set<ue2_literal> ss = getLiteralSet(h, v, false);
-        if (ss.empty()) {
-            DEBUG_PRINTF("candidate is too shitty\n");
-            return false;
-        }
-
-        VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)];
-        insert(&vli.lit, ss);
-        vli.vv.push_back(v);
-    }
-
-    assert(!by_reports.empty());
-
-    /* TODO: how strong a min len do we want here ? */
-    u32 min_len = cc.grey.minRoseLiteralLength;
-    ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN);
-
-    for (auto &vli : by_reports | map_values) {
-        u64a score = sanitizeAndCompressAndScore(vli.lit);
-
-        if (vli.lit.empty()
-            || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len,
-                                              false, false)) {
-            return false;
-        }
-    }
-
-    for (const auto &info : by_reports) {
-        DEBUG_PRINTF("splitting on simple literals\n");
-        splitEdgesForSuffix(h, vg, suffix_edges, info.second,
-                            info.first.first /* eod */,
-                            info.first.second /* reports */);
-    }
-
-    for (const RoseInEdge &e : suffix_edges) {
-        remove_edge(e, vg);
-    }
-    return true;
-}
-
-static
-void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) {
-    if (!cc.grey.violetAvoidSuffixes) {
-        return;
-    }
-
-    STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n");
-
-    RoseInVertex accept = getPrimaryAccept(vg);
-
-    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> suffixes;
-
-    /* find suffixes */
-    for (const RoseInEdge &e : in_edges_range(accept, vg)) {
-        /* outfixes shouldn't have made it this far */
-        assert(vg[source(e, vg)].type == RIV_LITERAL);
-        assert(vg[e].graph); /* non suffix paths should be wired to other
-                                accepts */
-        const NGHolder *h = vg[e].graph.get();
-        suffixes[h].push_back(e);
-    }
-
-    /* look at suffixes and try to split */
-    for (const auto &m : suffixes) {
-        const NGHolder *h = m.first;
-        const auto &edges = m.second;
-        replaceSuffixWithInfix(*h, vg, edges, cc);
-    }
-}
-
-static
-bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) {
-    if (out_degree(h.start, h) != 3) {
-        return false;
-    }
-
-    NFAVertex v = NGHolder::null_vertex();
-    NFAVertex ds = NGHolder::null_vertex();
-
-    for (NFAVertex a : adjacent_vertices_range(h.start, h)) {
-        if (a == h.startDs) {
-            continue;
-        }
-        if (h[a].char_reach.all()) {
-            ds = a;
-            if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) {
-                return false;
-            }
-        } else {
-            v = a;
-        }
-    }
-
-    if (!v || !ds || !edge(ds, v, h).second) {
-        return false;
-    }
-
-    if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) {
-        return false;
-    }
-
-    ue2_literal lit;
-    lit.push_back(h[v].char_reach.find_first(),
-                  h[v].char_reach.isCaselessChar());
-    while (out_degree(v, h) == 1) {
-        NFAVertex vv = *adjacent_vertices(v, h).first;
-        if (h[vv].char_reach.count() != 1
-            && !h[vv].char_reach.isCaselessChar()) {
-            break;
-        }
-
-        v = vv;
-
-        lit.push_back(h[v].char_reach.find_first(),
-                      h[v].char_reach.isCaselessChar());
-    }
-
-    if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) {
-        /* we have rediscovered the post-infix literal */
-        return false;
-    }
-
-    if (bad_mixed_sensitivity(lit)) {
-        make_nocase(&lit);
-    }
-
-    DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str());
-    out->vv = {v};
-    out->lit = {lit};
-    return true;
-}
-
-static
-bool lookForDoubleCut(const NGHolder &h, const vector<RoseInEdge> &ee,
-                      RoseInGraph &vg, const Grey &grey) {
-    VertLitInfo info;
-    if (!leadingDotStartLiteral(h, &info)
-        || min_len(info.lit) < grey.violetDoubleCutLiteralLen) {
-        return false;
-    }
-    DEBUG_PRINTF("performing split\n");
-    return splitRoseEdge(h, vg, ee, {info});
-}
-
-static
-void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) {
-    if (!cc.grey.violetDoubleCut) {
-        return;
-    }
-
-    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges;
-    for (const RoseInEdge &ve : edges_range(vg)) {
-        if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) {
-            const NGHolder *h = vg[ve].graph.get();
-            right_edges[h].push_back(ve);
-        }
-    }
-
-    for (const auto &m : right_edges) {
-        const NGHolder *h = m.first;
-        const auto &edges = m.second;
-        lookForDoubleCut(*h, edges, vg, cc.grey);
-    }
-}
-
-static
-pair<NFAVertex, ue2_literal> findLiteralBefore(const NGHolder &h, NFAVertex v) {
-    ue2_literal lit;
-    if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) {
-        return {v, std::move(lit) };
-    }
-    lit.push_back(h[v].char_reach.find_first(),
-                  h[v].char_reach.isCaselessChar());
-
-    while (in_degree(v, h) == 1) {
-        NFAVertex vv = *inv_adjacent_vertices(v, h).first;
-        if (h[vv].char_reach.count() != 1
-            && !h[vv].char_reach.isCaselessChar()) {
-            break;
-        }
-
-        lit.push_back(h[vv].char_reach.find_first(),
-                      h[vv].char_reach.isCaselessChar());
-        v = vv;
-    }
-
-    return {v, std::move(lit) };
-}
-
-static
-bool lookForDotStarPred(NFAVertex v, const NGHolder &h,
-                        NFAVertex *u, NFAVertex *ds) {
-    *u = NGHolder::null_vertex();
-    *ds = NGHolder::null_vertex();
-    for (NFAVertex a : inv_adjacent_vertices_range(v, h)) {
-        if (h[a].char_reach.all()) {
-            if (!edge(a, a, h).second) {
-                return false;
-            }
-
-            if (*ds) {
-                return false;
-            }
-
-            *ds = a;
-        } else {
-            if (*u) {
-                return false;
-            }
-            *u = a;
-        }
-    }
-
-    if (!*u || !*ds) {
-        return false;
-    }
-
-    return true;
-}
-
-static
-bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) {
-    /* Note: there is no delay yet - so the final literal is the already
-     * discovered successor literal - we are in fact interested in the literal
-     * before it. */
-
-    if (in_degree(h.accept, h) != 1) {
-        return false;
-    }
-
-    if (in_degree(h.acceptEod, h) != 1) {
-        assert(0);
-        return false;
-    }
-
-    NFAVertex v
-        = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first;
-
-    NFAVertex u;
-    NFAVertex ds;
-
-    if (!lookForDotStarPred(v, h, &u, &ds)) {
-        return false;
-    }
-
-    v = u;
-    auto rv = findLiteralBefore(h, v);
-
-    if (!lookForDotStarPred(v, h, &u, &ds)) {
-        return false;
-    }
-
-    ue2_literal lit = reverse_literal(rv.second);
-    DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str());
-
-    if (bad_mixed_sensitivity(lit)) {
-        make_nocase(&lit);
-    }
-
-    out->vv = {v};
-    out->lit = {lit};
-    return true;
-}
-
-static
-bool lookForTrailingLiteralDotStar(const NGHolder &h,
-                                   const vector<RoseInEdge> &ee,
-                                   RoseInGraph &vg, const Grey &grey) {
-    VertLitInfo info;
-    if (!trailingDotStarLiteral(h, &info)
-        || min_len(info.lit) < grey.violetDoubleCutLiteralLen) {
-        return false;
-    }
-    DEBUG_PRINTF("performing split\n");
-    return splitRoseEdge(h, vg, ee, info);
-}
-
-/* In streaming mode, active engines have to be caught up at stream boundaries
- * and have to be stored in stream state, so we prefer to decompose patterns
- * in to literals with no state between them if possible. */
-static
-void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) {
-    if (!cc.grey.violetLiteralChains) {
-        return;
-    }
-
-    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges;
-    bool changed;
-    do {
-        changed = false;
-
-        right_edges.clear();
-        for (const RoseInEdge &ve : edges_range(vg)) {
-            if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) {
-                const NGHolder *h = vg[ve].graph.get();
-                right_edges[h].push_back(ve);
-            }
-        }
-
-        for (const auto &m : right_edges) {
-            const NGHolder *h = m.first;
-            const vector<RoseInEdge> &ee = m.second;
-            bool rv = lookForDoubleCut(*h, ee, vg, cc.grey);
-            if (!rv && h->kind != NFA_SUFFIX) {
-                rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey);
-            }
-            changed |= rv;
-        }
-    } while (changed);
-}
-
-static
-bool lookForCleanSplit(const NGHolder &h, const vector<RoseInEdge> &ee,
-                       RoseInGraph &vg, const CompileContext &cc) {
-    unique_ptr<VertLitInfo> split = findBestCleanSplit(h, cc);
-
-    if (split) {
-        return splitRoseEdge(h, vg, {ee}, *split);
-    }
-
-    return false;
-}
-
-#define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4
-
-static
-void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) {
-    u32 gen = 0;
-
-    insertion_ordered_set<RoseInVertex> prev({getStart(vg)});
-    insertion_ordered_set<RoseInVertex> curr;
-
-    while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) {
-        curr.clear();
-        for (RoseInVertex u : prev) {
-            for (auto v : adjacent_vertices_range(u, vg)) {
-                curr.insert(v);
-            }
-        }
-
-        insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> rightfixes;
-        for (RoseInVertex v : curr) {
-            for (const RoseInEdge &e : out_edges_range(v, vg)) {
-                if (vg[e].graph) {
-                    NGHolder *h = vg[e].graph.get();
-                    rightfixes[h].push_back(e);
-                }
-            }
-        }
-
-        for (const auto &m : rightfixes) {
-            const NGHolder *h = m.first;
-            const auto &edges = m.second;
-            lookForCleanSplit(*h, edges, vg, cc);
-        }
-
-        prev = std::move(curr);
-        gen++;
-    }
-}
-
-static
-void rehomeEodSuffixes(RoseInGraph &vg) {
-    // Find edges to accept with EOD-anchored graphs that we can move over to
-    // acceptEod.
-    vector<RoseInEdge> acc_edges;
-    for (const auto &e : edges_range(vg)) {
-        if (vg[target(e, vg)].type != RIV_ACCEPT) {
-            continue;
-        }
-        if (vg[e].haig || !vg[e].graph) {
-            continue;
-        }
-
-        const NGHolder &h = *vg[e].graph;
-
-        if (in_degree(h.accept, h)) {
-            DEBUG_PRINTF("graph isn't eod anchored\n");
-            continue;
-        }
-
-        acc_edges.push_back(e);
-    }
-
-    for (const RoseInEdge &e : acc_edges) {
-        // Move this edge from accept to acceptEod
-        RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg);
-        add_edge(source(e, vg), w, vg[e], vg);
-        remove_edge(e, vg);
-    }
-
-    /* old accept vertices will be tidied up by final pruneUseless() call */
-}
-
-static
-bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) {
-    switch (h.kind) {
-    case NFA_OUTFIX: /* 'prefix' of eod */
-    case NFA_PREFIX:
-        return cc.grey.earlyMcClellanPrefix;
-    case NFA_INFIX:
-        return cc.grey.earlyMcClellanInfix;
-    case NFA_SUFFIX:
-        return cc.grey.earlyMcClellanSuffix;
-    default:
-        DEBUG_PRINTF("kind %u\n", (u32)h.kind);
-        assert(0);
-        return false;
-    }
-}
-
-static
-vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg,
-                                         const vector<RoseInEdge> &edges,
-                                         bool *single_trigger) {
-    vector<vector<CharReach>> triggers;
-    u32 min_offset = ~0U;
-    u32 max_offset = 0;
-    for (const auto &e : edges) {
-        RoseInVertex s = source(e, vg);
-        if (vg[s].type == RIV_LITERAL) {
-            triggers.push_back(as_cr_seq(vg[s].s));
-        }
-        ENSURE_AT_LEAST(&max_offset, vg[s].max_offset);
-        LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset);
-    }
-
-    *single_trigger = min_offset == max_offset;
-    DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset);
-
-    return triggers;
-}
-
-static
-bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h,
-                const vector<RoseInEdge> &edges, bool final_chance,
-                const ReportManager &rm, const CompileContext &cc) {
-    DEBUG_PRINTF("trying for dfa\n");
-
-    bool single_trigger;
-    for (const auto &e : edges) {
-        if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) {
-            /* TODO: support eod prefixes */
-            return false;
-        }
-    }
-
-    auto triggers = getDfaTriggers(vg, edges, &single_trigger);
-
-    /* TODO: literal delay things */
-    if (!generates_callbacks(h)) {
-        set_report(h, rose.getNewNfaReport());
-    }
-
-    shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers,
-                                             cc.grey, final_chance);
-
-    if (!dfa) {
-        return false;
-    }
-
-    DEBUG_PRINTF("dfa ok\n");
-    for (const auto &e : edges) {
-        vg[e].dfa = dfa;
-    }
-
-    return true;
-}
-
-#define MAX_EDGES_FOR_IMPLEMENTABILITY 50
-
-static
-bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
-                              const vector<RoseInEdge> &edges,
-                              const CompileContext &cc) {
-    vector<pair<ue2_literal, u32>> succ_lits;
-    DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n",
-                  to_string(h.kind).c_str(), num_vertices(h), edges.size());
-
-    if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) {
-        return false;
-    }
-
-    if (!generates_callbacks(h)) {
-        for (const auto &e : edges) {
-            const auto &lit = vg[target(e, vg)].s;
-            u32 delay = vg[e].graph_lag;
-            vg[e].graph_lag = 0;
-
-            assert(delay <= lit.length());
-            succ_lits.emplace_back(lit, delay);
-        }
-        restoreTrailingLiteralStates(h, succ_lits);
-    }
-
-    unique_ptr<VertLitInfo> split;
-    bool last_chance = true;
-    if (h.kind == NFA_PREFIX) {
-        auto depths = calcDepths(h);
-
-        split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc);
-    } else {
-        split = findBestLastChanceSplit(h, vg, edges, cc);
-    }
-
-    if (split && splitRoseEdge(h, vg, edges, *split)) {
-        DEBUG_PRINTF("split on simple literal\n");
-        return true;
-    }
-
-    DEBUG_PRINTF("trying to netflow\n");
-    bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey);
-    DEBUG_PRINTF("done\n");
-
-    return rv;
-}
-
-#define MAX_IMPLEMENTABLE_SPLITS 50
-
-bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
-                         bool final_chance, const ReportManager &rm,
-                         const CompileContext &cc) {
-    DEBUG_PRINTF("checking for impl %d\n", final_chance);
-    bool changed = false;
-    bool need_to_recalc = false;
-    u32 added_count = 0;
-    unordered_set<shared_ptr<NGHolder>> good; /* known to be implementable */
-    do {
-        changed = false;
-        DEBUG_PRINTF("added %u\n", added_count);
-        insertion_ordered_map<shared_ptr<NGHolder>,
-                              vector<RoseInEdge>> edges_by_graph;
-        for (const RoseInEdge &ve : edges_range(vg)) {
-            if (vg[ve].graph && !vg[ve].dfa) {
-                auto &h = vg[ve].graph;
-                edges_by_graph[h].push_back(ve);
-            }
-        }
-        for (auto &m : edges_by_graph) {
-            auto &h = m.first;
-            if (contains(good, h)) {
-                continue;
-            }
-            reduceGraphEquivalences(*h, cc);
-            if (isImplementableNFA(*h, &rm, cc)) {
-                good.insert(h);
-                continue;
-            }
-
-            const auto &edges = m.second;
-
-            if (tryForEarlyDfa(*h, cc) &&
-                doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) {
-                continue;
-            }
-
-            DEBUG_PRINTF("eek\n");
-            if (!allow_changes) {
-                return false;
-            }
-
-            if (splitForImplementability(vg, *h, edges, cc)) {
-                added_count++;
-                if (added_count > MAX_IMPLEMENTABLE_SPLITS) {
-                    DEBUG_PRINTF("added_count hit limit\n");
-                    return false;
-                }
-                changed = true;
-                continue;
-            }
-
-            return false;
-        }
-
-        assert(added_count <= MAX_IMPLEMENTABLE_SPLITS);
-
-        if (changed) {
-            removeRedundantLiterals(vg, cc);
-            pruneUseless(vg);
-            need_to_recalc = true;
-        }
-    } while (changed);
-
-    if (need_to_recalc) {
-        renumber_vertices(vg);
-        calcVertexOffsets(vg);
-    }
-
-    DEBUG_PRINTF("ok!\n");
-    return true;
-}
-
-static
-RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance,
-                                     const CompileContext &cc) {
-    assert(!can_never_match(h));
-
-    RoseInGraph vg = populateTrivialGraph(h);
-
-    if (!cc.grey.allowViolet) {
-        return vg;
-    }
-
-    /* Avoid running the Violet analysis at all on graphs with no vertices with
-     * small reach, since we will not be able to extract any literals. */
-    if (!hasNarrowReachVertex(h)) {
-        DEBUG_PRINTF("fail, no vertices with small reach\n");
-        return vg;
-    }
-
-    DEBUG_PRINTF("hello world\n");
-
-    /* Step 1: avoid outfixes as we always have to run them. */
-    avoidOutfixes(vg, last_chance, cc);
-
-    if (num_vertices(vg) <= 2) {
-        return vg; /* unable to transform pattern */
-    }
-
-    removeRedundantPrefixes(vg);
-    dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot");
-
-    /* Step 2: avoid non-transient prefixes (esp in streaming mode) */
-    findBetterPrefixes(vg, cc);
-
-    dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot");
-
-    extractStrongLiterals(vg, cc);
-    dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot");
-    improveWeakInfixes(vg, cc);
-    dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot");
-
-    /* Step 3: avoid output exposed engines if there is a strong trailing
-       literal) */
-    avoidSuffixes(vg, cc);
-
-    /* Step 4: look for infixes/suffixes with leading .*literals
-     * This can reduce the amount of work a heavily picked literal has to do and
-     * reduce the amount of state used as .* is handled internally to rose. */
-    lookForDoubleCut(vg, cc);
-
-    if (cc.streaming) {
-        lookForCleanEarlySplits(vg, cc);
-        decomposeLiteralChains(vg, cc);
-    }
-
-    rehomeEodSuffixes(vg);
-    removeRedundantLiterals(vg, cc);
-
-    pruneUseless(vg);
-    dumpPreRoseGraph(vg, cc.grey);
-    renumber_vertices(vg);
-    calcVertexOffsets(vg);
-
-    return vg;
-}
-
-bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
-              bool last_chance, const ReportManager &rm,
-              const CompileContext &cc) {
-    auto vg = doInitialVioletTransform(h, last_chance, cc);
-    if (num_vertices(vg) <= 2) {
-        return false;
-    }
-
-    /* Step 5: avoid unimplementable, or overly large engines if possible */
-    if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) {
-        return false;
-    }
-    dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot");
-
-    /* Step 6: send to rose */
-    bool rv = rose.addRose(vg, prefilter);
-    DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail");
-    return rv;
-}
-
-bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter,
-                 const CompileContext &cc) {
-    auto vg = doInitialVioletTransform(h, true, cc);
-    if (num_vertices(vg) <= 2) {
-        return false;
-    }
-
-    bool rv = roseCheckRose(vg, prefilter, rm, cc);
-    DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail");
-    return rv;
-}
-
-}
+ 
+        for (const auto &e : ee) { 
+            shared_ptr<NGHolder> hh = cloneHolder(h); 
+            auto succ_lit = vg[target(e, vg)].s; 
+            assert(isCorrectlyTopped(*hh)); 
+            u32 delay = removeTrailingLiteralStates(*hh, succ_lit, 
+                                                    succ_lit.length(), 
+                                              false /* can't overhang start */); 
+            if (!delay) { 
+                DEBUG_PRINTF("could not remove any literal, skip over\n"); 
+                continue; 
+            } 
+ 
+            assert(isCorrectlyTopped(*hh)); 
+            trimmed[hh].emplace_back(e, delay); 
+        } 
+ 
+        if (trimmed.size() == 1) { 
+            return false; 
+        } 
+ 
+        /* shift the contents to a vector so we can modify the graphs without 
+         * violating the map's invariants. */ 
+        vector<pair<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> > > > 
+            trimmed_vec(trimmed.begin(), trimmed.end()); 
+        trimmed.clear(); 
+        for (auto &elem : trimmed_vec) { 
+            shared_ptr<NGHolder> &hp = elem.first; 
+            vector<pair<ue2_literal, u32>> succ_lits; 
+ 
+            for (const auto &edge_delay : elem.second) { 
+                const RoseInEdge &e = edge_delay.first; 
+                u32 delay = edge_delay.second; 
+                auto lit = vg[target(e, vg)].s; 
+ 
+                vg[e].graph = hp; 
+                assert(delay <= lit.length()); 
+                succ_lits.emplace_back(lit, delay); 
+            } 
+            restoreTrailingLiteralStates(*hp, succ_lits); 
+        } 
+        return true; 
+    } 
+ 
+    return false; 
+} 
+ 
+#define MAX_FIND_BETTER_PREFIX_GEN   4 
+#define MAX_FIND_BETTER_PREFIX_COUNT 100 
+ 
+static 
+void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { 
+    STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n"); 
+    RoseInVertex start = getStart(vg); 
+ 
+    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> prefixes; 
+    bool changed; 
+    u32 gen = 0; 
+    do { 
+        DEBUG_PRINTF("gen %u\n", gen); 
+        changed = false; 
+        prefixes.clear(); 
+ 
+        /* find prefixes */ 
+        for (const RoseInEdge &e : out_edges_range(start, vg)) { 
+            /* outfixes shouldn't have made it this far */ 
+            assert(vg[target(e, vg)].type == RIV_LITERAL); 
+            if (vg[e].graph) { 
+                NGHolder *h = vg[e].graph.get(); 
+                prefixes[h].push_back(e); 
+            } 
+        } 
+ 
+        if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) { 
+            break; 
+        } 
+ 
+        /* look for bad prefixes and try to split */ 
+        for (const auto &m : prefixes) { 
+            NGHolder *h = m.first; 
+            const auto &edges = m.second; 
+            depth max_width = findMaxWidth(*h); 
+            if (willBeTransient(max_width, cc) 
+                || willBeAnchoredTable(max_width, cc.grey)) { 
+                continue; 
+            } 
+ 
+            changed = improvePrefix(*h, vg, edges, cc); 
+        } 
+    } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); 
+} 
+ 
+#define STRONG_LITERAL_LENGTH 20 
+#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 
+ 
+static 
+bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, 
+                          const vector<RoseInEdge> &ee, 
+                          const CompileContext &cc) { 
+    DEBUG_PRINTF("looking for string literal\n"); 
+    unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); 
+ 
+    if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) { 
+        DEBUG_PRINTF("splitting simple literal\n"); 
+        return splitRoseEdge(h, vg, ee, *split); 
+    } 
+ 
+    return false; 
+} 
+ 
+static 
+void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { 
+    if (!cc.grey.violetExtractStrongLiterals) { 
+        return; 
+    } 
+ 
+    STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); 
+ 
+    unordered_set<NGHolder *> stuck; 
+    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> edges_by_graph; 
+    bool changed; 
+ 
+    do { 
+        changed = false; 
+ 
+        edges_by_graph.clear(); 
+        for (const RoseInEdge &ve : edges_range(vg)) { 
+            if (vg[source(ve, vg)].type != RIV_LITERAL) { 
+                continue; 
+            } 
+ 
+            if (vg[ve].graph) { 
+                NGHolder *h = vg[ve].graph.get(); 
+                edges_by_graph[h].push_back(ve); 
+            } 
+        } 
+ 
+        if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) { 
+            DEBUG_PRINTF("too many graphs, stopping\n"); 
+            return; 
+        } 
+ 
+        for (const auto &m : edges_by_graph) { 
+            NGHolder *g = m.first; 
+            const auto &edges = m.second; 
+            if (contains(stuck, g)) { 
+                DEBUG_PRINTF("already known to be bad\n"); 
+                continue; 
+            } 
+            bool rv = extractStrongLiteral(*g, vg, edges, cc); 
+            if (rv) { 
+                changed = true; 
+            } else { 
+                stuck.insert(g); 
+            } 
+        } 
+    } while (changed); 
+} 
+ 
+#define INFIX_STRONG_GUARD_LEN 8 
+#define INFIX_MIN_SPLIT_LITERAL_LEN 12 
+ 
+static 
+bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee, 
+                  const CompileContext &cc) { 
+    unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc); 
+ 
+    if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN 
+        && splitRoseEdge(h, vg, ee, *split)) { 
+        DEBUG_PRINTF("splitting simple literal\n"); 
+        return true; 
+    } 
+ 
+    DEBUG_PRINTF("trying for a netflow cut\n"); 
+    /* look for netflow cuts which don't produce good prefixes */ 
+    bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8); 
+ 
+    DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv); 
+ 
+    return rv; 
+} 
+ 
+/** 
+ * Infixes which are weakly guarded can, in effect, act like prefixes as they 
+ * will often be live. We should try to split these infixes further if they 
+ * contain strong literals so that we are at least running smaller weak infixes 
+ * which can hopeful be accelerated/miracled. 
+ */ 
+static 
+void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { 
+    if (!cc.grey.violetAvoidWeakInfixes) { 
+        return; 
+    } 
+    STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n"); 
+ 
+    RoseInVertex start = getStart(vg); 
+ 
+    unordered_set<NGHolder *> weak; 
+ 
+    for (RoseInVertex vv : adjacent_vertices_range(start, vg)) { 
+        /* outfixes shouldn't have made it this far */ 
+        assert(vg[vv].type == RIV_LITERAL); 
+        if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) { 
+            continue; 
+        } 
+ 
+        for (const RoseInEdge &e : out_edges_range(vv, vg)) { 
+            if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) { 
+                continue; 
+            } 
+ 
+            NGHolder *h = vg[e].graph.get(); 
+            DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h); 
+            weak.insert(h); 
+        } 
+    } 
+ 
+    insertion_ordered_map<NGHolder *, vector<RoseInEdge>> weak_edges; 
+    for (const RoseInEdge &ve : edges_range(vg)) { 
+        NGHolder *h = vg[ve].graph.get(); 
+        if (contains(weak, h)) { 
+            weak_edges[h].push_back(ve); 
+        } 
+    } 
+ 
+    for (const auto &m : weak_edges) { 
+        NGHolder *h = m.first; 
+        const auto &edges = m.second; 
+        improveInfix(*h, vg, edges, cc); 
+    } 
+} 
+ 
+static 
+void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, 
+                         const vector<RoseInEdge> &ee, const VertLitInfo &split, 
+                         bool eod, const flat_set<ReportID> &reports) { 
+    const vector<NFAVertex> &splitters = split.vv; 
+    assert(!splitters.empty()); 
+ 
+    shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); 
+    unordered_map<NFAVertex, NFAVertex> v_map; 
+    cloneHolder(*lhs, base_graph, &v_map); 
+    lhs->kind = NFA_INFIX; 
+    clear_in_edges(lhs->accept, *lhs); 
+    clear_in_edges(lhs->acceptEod, *lhs); 
+    add_edge(lhs->accept, lhs->acceptEod, *lhs); 
+    clearReports(*lhs); 
+    for (NFAVertex v : splitters) { 
+        NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); 
+        if (v == base_graph.start) { 
+            (*lhs)[e].tops.insert(DEFAULT_TOP); 
+        } 
+        (*lhs)[v_map[v]].reports.insert(0); 
+ 
+    } 
+    pruneUseless(*lhs); 
+    assert(isCorrectlyTopped(*lhs)); 
+ 
+    /* create literal vertices and connect preds */ 
+    for (const auto &lit : split.lit) { 
+        if (!can_match(*lhs, lit, is_triggered(*lhs))) { 
+            continue; 
+        } 
+ 
+        DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); 
+        RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); 
+ 
+        RoseInVertex tt; 
+        if (eod) { 
+            DEBUG_PRINTF("doing eod\n"); 
+            tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg); 
+        } else { 
+            DEBUG_PRINTF("doing non-eod\n"); 
+            tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg); 
+        } 
+        add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); 
+ 
+        for (const RoseInEdge &e : ee) { 
+            RoseInVertex u = source(e, vg); 
+            assert(!edge(u, v, vg).second); 
+            add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); 
+        } 
+    } 
+} 
+ 
+#define MIN_SUFFIX_LEN 6 
+ 
+static 
+bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, 
+                            const vector<RoseInEdge> &suffix_edges, 
+                            const CompileContext &cc) { 
+    DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h, 
+                 suffix_edges.size()); 
+    /* 
+     * We would, in general, rather not have output exposed engines because 
+     * once they are triggered, they must be run while infixes only have to run 
+     * if the successor literal is seen. Matches from output exposed engines 
+     * also have to be placed in a priority queue and interleaved with matches 
+     * from other sources. 
+     * 
+     * Note: 
+     * - if the LHS is extremely unlikely we may be better off leaving 
+     *   a suffix unguarded. 
+     * 
+     * - limited width suffixes may be less bad as they won't be continuously 
+     *   active, we may want to have (a) stronger controls on if we want to pick 
+     *   a trailing literal in these cases and/or (b) look also for literals 
+     *   near accept as well as right on accept 
+     * 
+     * TODO: improve heuristics, splitting logic. 
+     */ 
+ 
+    /* we may do multiple splits corresponding to different report behaviour */ 
+    set<NFAVertex> seen; 
+    map<pair<bool, flat_set<ReportID> >, VertLitInfo> by_reports; /* eod, rep */ 
+ 
+    for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) { 
+        set<ue2_literal> ss = getLiteralSet(h, v, false); 
+        if (ss.empty()) { 
+            DEBUG_PRINTF("candidate is too shitty\n"); 
+            return false; 
+        } 
+ 
+        VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)]; 
+        insert(&vli.lit, ss); 
+        vli.vv.push_back(v); 
+        seen.insert(v); 
+    } 
+ 
+    seen.insert(h.accept); 
+    for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) { 
+        if (contains(seen, v)) { 
+            continue; 
+        } 
+ 
+        set<ue2_literal> ss = getLiteralSet(h, v, false); 
+        if (ss.empty()) { 
+            DEBUG_PRINTF("candidate is too shitty\n"); 
+            return false; 
+        } 
+ 
+        VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)]; 
+        insert(&vli.lit, ss); 
+        vli.vv.push_back(v); 
+    } 
+ 
+    assert(!by_reports.empty()); 
+ 
+    /* TODO: how strong a min len do we want here ? */ 
+    u32 min_len = cc.grey.minRoseLiteralLength; 
+    ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); 
+ 
+    for (auto &vli : by_reports | map_values) { 
+        u64a score = sanitizeAndCompressAndScore(vli.lit); 
+ 
+        if (vli.lit.empty() 
+            || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, 
+                                              false, false)) { 
+            return false; 
+        } 
+    } 
+ 
+    for (const auto &info : by_reports) { 
+        DEBUG_PRINTF("splitting on simple literals\n"); 
+        splitEdgesForSuffix(h, vg, suffix_edges, info.second, 
+                            info.first.first /* eod */, 
+                            info.first.second /* reports */); 
+    } 
+ 
+    for (const RoseInEdge &e : suffix_edges) { 
+        remove_edge(e, vg); 
+    } 
+    return true; 
+} 
+ 
+static 
+void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { 
+    if (!cc.grey.violetAvoidSuffixes) { 
+        return; 
+    } 
+ 
+    STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n"); 
+ 
+    RoseInVertex accept = getPrimaryAccept(vg); 
+ 
+    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> suffixes; 
+ 
+    /* find suffixes */ 
+    for (const RoseInEdge &e : in_edges_range(accept, vg)) { 
+        /* outfixes shouldn't have made it this far */ 
+        assert(vg[source(e, vg)].type == RIV_LITERAL); 
+        assert(vg[e].graph); /* non suffix paths should be wired to other 
+                                accepts */ 
+        const NGHolder *h = vg[e].graph.get(); 
+        suffixes[h].push_back(e); 
+    } 
+ 
+    /* look at suffixes and try to split */ 
+    for (const auto &m : suffixes) { 
+        const NGHolder *h = m.first; 
+        const auto &edges = m.second; 
+        replaceSuffixWithInfix(*h, vg, edges, cc); 
+    } 
+} 
+ 
+static 
+bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { 
+    if (out_degree(h.start, h) != 3) { 
+        return false; 
+    } 
+ 
+    NFAVertex v = NGHolder::null_vertex(); 
+    NFAVertex ds = NGHolder::null_vertex(); 
+ 
+    for (NFAVertex a : adjacent_vertices_range(h.start, h)) { 
+        if (a == h.startDs) { 
+            continue; 
+        } 
+        if (h[a].char_reach.all()) { 
+            ds = a; 
+            if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) { 
+                return false; 
+            } 
+        } else { 
+            v = a; 
+        } 
+    } 
+ 
+    if (!v || !ds || !edge(ds, v, h).second) { 
+        return false; 
+    } 
+ 
+    if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { 
+        return false; 
+    } 
+ 
+    ue2_literal lit; 
+    lit.push_back(h[v].char_reach.find_first(), 
+                  h[v].char_reach.isCaselessChar()); 
+    while (out_degree(v, h) == 1) { 
+        NFAVertex vv = *adjacent_vertices(v, h).first; 
+        if (h[vv].char_reach.count() != 1 
+            && !h[vv].char_reach.isCaselessChar()) { 
+            break; 
+        } 
+ 
+        v = vv; 
+ 
+        lit.push_back(h[v].char_reach.find_first(), 
+                      h[v].char_reach.isCaselessChar()); 
+    } 
+ 
+    if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) { 
+        /* we have rediscovered the post-infix literal */ 
+        return false; 
+    } 
+ 
+    if (bad_mixed_sensitivity(lit)) { 
+        make_nocase(&lit); 
+    } 
+ 
+    DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); 
+    out->vv = {v}; 
+    out->lit = {lit}; 
+    return true; 
+} 
+ 
+static 
+bool lookForDoubleCut(const NGHolder &h, const vector<RoseInEdge> &ee, 
+                      RoseInGraph &vg, const Grey &grey) { 
+    VertLitInfo info; 
+    if (!leadingDotStartLiteral(h, &info) 
+        || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { 
+        return false; 
+    } 
+    DEBUG_PRINTF("performing split\n"); 
+    return splitRoseEdge(h, vg, ee, {info}); 
+} 
+ 
+static 
+void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { 
+    if (!cc.grey.violetDoubleCut) { 
+        return; 
+    } 
+ 
+    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; 
+    for (const RoseInEdge &ve : edges_range(vg)) { 
+        if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { 
+            const NGHolder *h = vg[ve].graph.get(); 
+            right_edges[h].push_back(ve); 
+        } 
+    } 
+ 
+    for (const auto &m : right_edges) { 
+        const NGHolder *h = m.first; 
+        const auto &edges = m.second; 
+        lookForDoubleCut(*h, edges, vg, cc.grey); 
+    } 
+} 
+ 
+static 
+pair<NFAVertex, ue2_literal> findLiteralBefore(const NGHolder &h, NFAVertex v) { 
+    ue2_literal lit; 
+    if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { 
+        return {v, std::move(lit) }; 
+    } 
+    lit.push_back(h[v].char_reach.find_first(), 
+                  h[v].char_reach.isCaselessChar()); 
+ 
+    while (in_degree(v, h) == 1) { 
+        NFAVertex vv = *inv_adjacent_vertices(v, h).first; 
+        if (h[vv].char_reach.count() != 1 
+            && !h[vv].char_reach.isCaselessChar()) { 
+            break; 
+        } 
+ 
+        lit.push_back(h[vv].char_reach.find_first(), 
+                      h[vv].char_reach.isCaselessChar()); 
+        v = vv; 
+    } 
+ 
+    return {v, std::move(lit) }; 
+} 
+ 
+static 
+bool lookForDotStarPred(NFAVertex v, const NGHolder &h, 
+                        NFAVertex *u, NFAVertex *ds) { 
+    *u = NGHolder::null_vertex(); 
+    *ds = NGHolder::null_vertex(); 
+    for (NFAVertex a : inv_adjacent_vertices_range(v, h)) { 
+        if (h[a].char_reach.all()) { 
+            if (!edge(a, a, h).second) { 
+                return false; 
+            } 
+ 
+            if (*ds) { 
+                return false; 
+            } 
+ 
+            *ds = a; 
+        } else { 
+            if (*u) { 
+                return false; 
+            } 
+            *u = a; 
+        } 
+    } 
+ 
+    if (!*u || !*ds) { 
+        return false; 
+    } 
+ 
+    return true; 
+} 
+ 
+static 
+bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { 
+    /* Note: there is no delay yet - so the final literal is the already 
+     * discovered successor literal - we are in fact interested in the literal 
+     * before it. */ 
+ 
+    if (in_degree(h.accept, h) != 1) { 
+        return false; 
+    } 
+ 
+    if (in_degree(h.acceptEod, h) != 1) { 
+        assert(0); 
+        return false; 
+    } 
+ 
+    NFAVertex v 
+        = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first; 
+ 
+    NFAVertex u; 
+    NFAVertex ds; 
+ 
+    if (!lookForDotStarPred(v, h, &u, &ds)) { 
+        return false; 
+    } 
+ 
+    v = u; 
+    auto rv = findLiteralBefore(h, v); 
+ 
+    if (!lookForDotStarPred(v, h, &u, &ds)) { 
+        return false; 
+    } 
+ 
+    ue2_literal lit = reverse_literal(rv.second); 
+    DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); 
+ 
+    if (bad_mixed_sensitivity(lit)) { 
+        make_nocase(&lit); 
+    } 
+ 
+    out->vv = {v}; 
+    out->lit = {lit}; 
+    return true; 
+} 
+ 
+static 
+bool lookForTrailingLiteralDotStar(const NGHolder &h, 
+                                   const vector<RoseInEdge> &ee, 
+                                   RoseInGraph &vg, const Grey &grey) { 
+    VertLitInfo info; 
+    if (!trailingDotStarLiteral(h, &info) 
+        || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { 
+        return false; 
+    } 
+    DEBUG_PRINTF("performing split\n"); 
+    return splitRoseEdge(h, vg, ee, info); 
+} 
+ 
+/* In streaming mode, active engines have to be caught up at stream boundaries 
+ * and have to be stored in stream state, so we prefer to decompose patterns 
+ * in to literals with no state between them if possible. */ 
+static 
+void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) { 
+    if (!cc.grey.violetLiteralChains) { 
+        return; 
+    } 
+ 
+    insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges; 
+    bool changed; 
+    do { 
+        changed = false; 
+ 
+        right_edges.clear(); 
+        for (const RoseInEdge &ve : edges_range(vg)) { 
+            if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { 
+                const NGHolder *h = vg[ve].graph.get(); 
+                right_edges[h].push_back(ve); 
+            } 
+        } 
+ 
+        for (const auto &m : right_edges) { 
+            const NGHolder *h = m.first; 
+            const vector<RoseInEdge> &ee = m.second; 
+            bool rv = lookForDoubleCut(*h, ee, vg, cc.grey); 
+            if (!rv && h->kind != NFA_SUFFIX) { 
+                rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey); 
+            } 
+            changed |= rv; 
+        } 
+    } while (changed); 
+} 
+ 
+static 
+bool lookForCleanSplit(const NGHolder &h, const vector<RoseInEdge> &ee, 
+                       RoseInGraph &vg, const CompileContext &cc) { 
+    unique_ptr<VertLitInfo> split = findBestCleanSplit(h, cc); 
+ 
+    if (split) { 
+        return splitRoseEdge(h, vg, {ee}, *split); 
+    } 
+ 
+    return false; 
+} 
+ 
+#define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4 
+ 
+static 
+void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { 
+    u32 gen = 0; 
+ 
+    insertion_ordered_set<RoseInVertex> prev({getStart(vg)}); 
+    insertion_ordered_set<RoseInVertex> curr; 
+ 
+    while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) { 
+        curr.clear(); 
+        for (RoseInVertex u : prev) { 
+            for (auto v : adjacent_vertices_range(u, vg)) { 
+                curr.insert(v); 
+            } 
+        } 
+ 
+        insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> rightfixes; 
+        for (RoseInVertex v : curr) { 
+            for (const RoseInEdge &e : out_edges_range(v, vg)) { 
+                if (vg[e].graph) { 
+                    NGHolder *h = vg[e].graph.get(); 
+                    rightfixes[h].push_back(e); 
+                } 
+            } 
+        } 
+ 
+        for (const auto &m : rightfixes) { 
+            const NGHolder *h = m.first; 
+            const auto &edges = m.second; 
+            lookForCleanSplit(*h, edges, vg, cc); 
+        } 
+ 
+        prev = std::move(curr); 
+        gen++; 
+    } 
+} 
+ 
+static 
+void rehomeEodSuffixes(RoseInGraph &vg) { 
+    // Find edges to accept with EOD-anchored graphs that we can move over to 
+    // acceptEod. 
+    vector<RoseInEdge> acc_edges; 
+    for (const auto &e : edges_range(vg)) { 
+        if (vg[target(e, vg)].type != RIV_ACCEPT) { 
+            continue; 
+        } 
+        if (vg[e].haig || !vg[e].graph) { 
+            continue; 
+        } 
+ 
+        const NGHolder &h = *vg[e].graph; 
+ 
+        if (in_degree(h.accept, h)) { 
+            DEBUG_PRINTF("graph isn't eod anchored\n"); 
+            continue; 
+        } 
+ 
+        acc_edges.push_back(e); 
+    } 
+ 
+    for (const RoseInEdge &e : acc_edges) { 
+        // Move this edge from accept to acceptEod 
+        RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg); 
+        add_edge(source(e, vg), w, vg[e], vg); 
+        remove_edge(e, vg); 
+    } 
+ 
+    /* old accept vertices will be tidied up by final pruneUseless() call */ 
+} 
+ 
+static 
+bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { 
+    switch (h.kind) { 
+    case NFA_OUTFIX: /* 'prefix' of eod */ 
+    case NFA_PREFIX: 
+        return cc.grey.earlyMcClellanPrefix; 
+    case NFA_INFIX: 
+        return cc.grey.earlyMcClellanInfix; 
+    case NFA_SUFFIX: 
+        return cc.grey.earlyMcClellanSuffix; 
+    default: 
+        DEBUG_PRINTF("kind %u\n", (u32)h.kind); 
+        assert(0); 
+        return false; 
+    } 
+} 
+ 
+static 
+vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg, 
+                                         const vector<RoseInEdge> &edges, 
+                                         bool *single_trigger) { 
+    vector<vector<CharReach>> triggers; 
+    u32 min_offset = ~0U; 
+    u32 max_offset = 0; 
+    for (const auto &e : edges) { 
+        RoseInVertex s = source(e, vg); 
+        if (vg[s].type == RIV_LITERAL) { 
+            triggers.push_back(as_cr_seq(vg[s].s)); 
+        } 
+        ENSURE_AT_LEAST(&max_offset, vg[s].max_offset); 
+        LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset); 
+    } 
+ 
+    *single_trigger = min_offset == max_offset; 
+    DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset); 
+ 
+    return triggers; 
+} 
+ 
+static 
+bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, 
+                const vector<RoseInEdge> &edges, bool final_chance, 
+                const ReportManager &rm, const CompileContext &cc) { 
+    DEBUG_PRINTF("trying for dfa\n"); 
+ 
+    bool single_trigger; 
+    for (const auto &e : edges) { 
+        if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) { 
+            /* TODO: support eod prefixes */ 
+            return false; 
+        } 
+    } 
+ 
+    auto triggers = getDfaTriggers(vg, edges, &single_trigger); 
+ 
+    /* TODO: literal delay things */ 
+    if (!generates_callbacks(h)) { 
+        set_report(h, rose.getNewNfaReport()); 
+    } 
+ 
+    shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers, 
+                                             cc.grey, final_chance); 
+ 
+    if (!dfa) { 
+        return false; 
+    } 
+ 
+    DEBUG_PRINTF("dfa ok\n"); 
+    for (const auto &e : edges) { 
+        vg[e].dfa = dfa; 
+    } 
+ 
+    return true; 
+} 
+ 
+#define MAX_EDGES_FOR_IMPLEMENTABILITY 50 
+ 
+static 
+bool splitForImplementability(RoseInGraph &vg, NGHolder &h, 
+                              const vector<RoseInEdge> &edges, 
+                              const CompileContext &cc) { 
+    vector<pair<ue2_literal, u32>> succ_lits; 
+    DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", 
+                  to_string(h.kind).c_str(), num_vertices(h), edges.size()); 
+ 
+    if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) { 
+        return false; 
+    } 
+ 
+    if (!generates_callbacks(h)) { 
+        for (const auto &e : edges) { 
+            const auto &lit = vg[target(e, vg)].s; 
+            u32 delay = vg[e].graph_lag; 
+            vg[e].graph_lag = 0; 
+ 
+            assert(delay <= lit.length()); 
+            succ_lits.emplace_back(lit, delay); 
+        } 
+        restoreTrailingLiteralStates(h, succ_lits); 
+    } 
+ 
+    unique_ptr<VertLitInfo> split; 
+    bool last_chance = true; 
+    if (h.kind == NFA_PREFIX) { 
+        auto depths = calcDepths(h); 
+ 
+        split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); 
+    } else { 
+        split = findBestLastChanceSplit(h, vg, edges, cc); 
+    } 
+ 
+    if (split && splitRoseEdge(h, vg, edges, *split)) { 
+        DEBUG_PRINTF("split on simple literal\n"); 
+        return true; 
+    } 
+ 
+    DEBUG_PRINTF("trying to netflow\n"); 
+    bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); 
+    DEBUG_PRINTF("done\n"); 
+ 
+    return rv; 
+} 
+ 
+#define MAX_IMPLEMENTABLE_SPLITS 50 
+ 
+bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, 
+                         bool final_chance, const ReportManager &rm, 
+                         const CompileContext &cc) { 
+    DEBUG_PRINTF("checking for impl %d\n", final_chance); 
+    bool changed = false; 
+    bool need_to_recalc = false; 
+    u32 added_count = 0; 
+    unordered_set<shared_ptr<NGHolder>> good; /* known to be implementable */ 
+    do { 
+        changed = false; 
+        DEBUG_PRINTF("added %u\n", added_count); 
+        insertion_ordered_map<shared_ptr<NGHolder>, 
+                              vector<RoseInEdge>> edges_by_graph; 
+        for (const RoseInEdge &ve : edges_range(vg)) { 
+            if (vg[ve].graph && !vg[ve].dfa) { 
+                auto &h = vg[ve].graph; 
+                edges_by_graph[h].push_back(ve); 
+            } 
+        } 
+        for (auto &m : edges_by_graph) { 
+            auto &h = m.first; 
+            if (contains(good, h)) { 
+                continue; 
+            } 
+            reduceGraphEquivalences(*h, cc); 
+            if (isImplementableNFA(*h, &rm, cc)) { 
+                good.insert(h); 
+                continue; 
+            } 
+ 
+            const auto &edges = m.second; 
+ 
+            if (tryForEarlyDfa(*h, cc) && 
+                doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { 
+                continue; 
+            } 
+ 
+            DEBUG_PRINTF("eek\n"); 
+            if (!allow_changes) { 
+                return false; 
+            } 
+ 
+            if (splitForImplementability(vg, *h, edges, cc)) { 
+                added_count++; 
+                if (added_count > MAX_IMPLEMENTABLE_SPLITS) { 
+                    DEBUG_PRINTF("added_count hit limit\n"); 
+                    return false; 
+                } 
+                changed = true; 
+                continue; 
+            } 
+ 
+            return false; 
+        } 
+ 
+        assert(added_count <= MAX_IMPLEMENTABLE_SPLITS); 
+ 
+        if (changed) { 
+            removeRedundantLiterals(vg, cc); 
+            pruneUseless(vg); 
+            need_to_recalc = true; 
+        } 
+    } while (changed); 
+ 
+    if (need_to_recalc) { 
+        renumber_vertices(vg); 
+        calcVertexOffsets(vg); 
+    } 
+ 
+    DEBUG_PRINTF("ok!\n"); 
+    return true; 
+} 
+ 
+static 
+RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, 
+                                     const CompileContext &cc) { 
+    assert(!can_never_match(h)); 
+ 
+    RoseInGraph vg = populateTrivialGraph(h); 
+ 
+    if (!cc.grey.allowViolet) { 
+        return vg; 
+    } 
+ 
+    /* Avoid running the Violet analysis at all on graphs with no vertices with 
+     * small reach, since we will not be able to extract any literals. */ 
+    if (!hasNarrowReachVertex(h)) { 
+        DEBUG_PRINTF("fail, no vertices with small reach\n"); 
+        return vg; 
+    } 
+ 
+    DEBUG_PRINTF("hello world\n"); 
+ 
+    /* Step 1: avoid outfixes as we always have to run them. */ 
+    avoidOutfixes(vg, last_chance, cc); 
+ 
+    if (num_vertices(vg) <= 2) { 
+        return vg; /* unable to transform pattern */ 
+    } 
+ 
+    removeRedundantPrefixes(vg); 
+    dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot"); 
+ 
+    /* Step 2: avoid non-transient prefixes (esp in streaming mode) */ 
+    findBetterPrefixes(vg, cc); 
+ 
+    dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot"); 
+ 
+    extractStrongLiterals(vg, cc); 
+    dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot"); 
+    improveWeakInfixes(vg, cc); 
+    dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot"); 
+ 
+    /* Step 3: avoid output exposed engines if there is a strong trailing 
+       literal) */ 
+    avoidSuffixes(vg, cc); 
+ 
+    /* Step 4: look for infixes/suffixes with leading .*literals 
+     * This can reduce the amount of work a heavily picked literal has to do and 
+     * reduce the amount of state used as .* is handled internally to rose. */ 
+    lookForDoubleCut(vg, cc); 
+ 
+    if (cc.streaming) { 
+        lookForCleanEarlySplits(vg, cc); 
+        decomposeLiteralChains(vg, cc); 
+    } 
+ 
+    rehomeEodSuffixes(vg); 
+    removeRedundantLiterals(vg, cc); 
+ 
+    pruneUseless(vg); 
+    dumpPreRoseGraph(vg, cc.grey); 
+    renumber_vertices(vg); 
+    calcVertexOffsets(vg); 
+ 
+    return vg; 
+} 
+ 
+bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, 
+              bool last_chance, const ReportManager &rm, 
+              const CompileContext &cc) { 
+    auto vg = doInitialVioletTransform(h, last_chance, cc); 
+    if (num_vertices(vg) <= 2) { 
+        return false; 
+    } 
+ 
+    /* Step 5: avoid unimplementable, or overly large engines if possible */ 
+    if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { 
+        return false; 
+    } 
+    dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot"); 
+ 
+    /* Step 6: send to rose */ 
+    bool rv = rose.addRose(vg, prefilter); 
+    DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); 
+    return rv; 
+} 
+ 
+bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, 
+                 const CompileContext &cc) { 
+    auto vg = doInitialVioletTransform(h, true, cc); 
+    if (num_vertices(vg) <= 2) { 
+        return false; 
+    } 
+ 
+    bool rv = roseCheckRose(vg, prefilter, rm, cc); 
+    DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); 
+    return rv; 
+} 
+ 
+}
author	Ivan Blinkov <ivan@blinkov.ru>	2022-02-10 16:47:10 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:10 +0300
commit	1aeb9a455974457866f78722ad98114bafc84e8a (patch)
tree	e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfagraph/ng_violet.cpp
parent	bd5ef432f5cfb1e18851381329d94665a4c22470 (diff)
download	ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz