Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.

author: Ivan Blinkov <ivan@blinkov.ru> 2022-02-10 16:47:11 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:11 +0300
commit: 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree: 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/smallwrite
parent: 1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download: ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
2 files changed, 725 insertions, 725 deletions
diff --git a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
index 7dc4d6e883..d993137632 100644
--- a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
+++ b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
@@ -26,38 +26,38 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-/** 
- * \file 
- * \brief Small-write engine build code. 
- */ 
- 
+/**
+ * \file
+ * \brief Small-write engine build code.
+ */
+
 #include "smallwrite/smallwrite_build.h"
 
 #include "grey.h"
 #include "ue2common.h"
-#include "compiler/compiler.h" 
-#include "nfa/dfa_min.h" 
+#include "compiler/compiler.h"
+#include "nfa/dfa_min.h"
 #include "nfa/mcclellancompile.h"
 #include "nfa/mcclellancompile_util.h"
 #include "nfa/nfa_internal.h"
 #include "nfa/rdfa_merge.h"
-#include "nfa/shengcompile.h" 
+#include "nfa/shengcompile.h"
 #include "nfagraph/ng.h"
-#include "nfagraph/ng_depth.h" 
+#include "nfagraph/ng_depth.h"
 #include "nfagraph/ng_holder.h"
 #include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_reports.h" 
-#include "nfagraph/ng_prune.h" 
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_prune.h"
 #include "nfagraph/ng_util.h"
 #include "smallwrite/smallwrite_internal.h"
 #include "util/alloc.h"
-#include "util/bytecode_ptr.h" 
+#include "util/bytecode_ptr.h"
 #include "util/charreach.h"
-#include "util/compare.h" 
+#include "util/compare.h"
 #include "util/compile_context.h"
 #include "util/container.h"
 #include "util/make_unique.h"
-#include "util/ue2_graph.h" 
+#include "util/ue2_graph.h"
 #include "util/ue2string.h"
 #include "util/verify_types.h"
 
@@ -66,236 +66,236 @@
 #include <vector>
 #include <utility>
 
-#include <boost/graph/breadth_first_search.hpp> 
- 
+#include <boost/graph/breadth_first_search.hpp>
+
 using namespace std;
 
 namespace ue2 {
 
 #define DFA_MERGE_MAX_STATES 8000
-#define MAX_TRIE_VERTICES 8000 
-
-struct LitTrieVertexProps { 
-    LitTrieVertexProps() = default; 
-    explicit LitTrieVertexProps(u8 c_in) : c(c_in) {} 
-    size_t index; // managed by ue2_graph 
-    u8 c = 0; //!< character reached on this vertex 
-    flat_set<ReportID> reports; //!< managed reports fired on this vertex 
-}; 
- 
-struct LitTrieEdgeProps { 
-    size_t index; // managed by ue2_graph 
-}; 
- 
-/** 
- * \brief BGL graph used to store a trie of literals (for later AC construction 
- * into a DFA). 
- */ 
-struct LitTrie 
-    : public ue2_graph<LitTrie, LitTrieVertexProps, LitTrieEdgeProps> { 
- 
-    LitTrie() : root(add_vertex(*this)) {} 
- 
-    const vertex_descriptor root; //!< Root vertex for the trie. 
-}; 
- 
-static 
-bool is_empty(const LitTrie &trie) { 
-    return num_vertices(trie) <= 1; 
-} 
- 
-static 
-std::set<ReportID> all_reports(const LitTrie &trie) { 
-    std::set<ReportID> reports; 
-    for (auto v : vertices_range(trie)) { 
-        insert(&reports, trie[v].reports); 
-    } 
-    return reports; 
-} 
- 
-using LitTrieVertex = LitTrie::vertex_descriptor; 
-using LitTrieEdge = LitTrie::edge_descriptor; 
- 
+#define MAX_TRIE_VERTICES 8000
+
+struct LitTrieVertexProps {
+    LitTrieVertexProps() = default;
+    explicit LitTrieVertexProps(u8 c_in) : c(c_in) {}
+    size_t index; // managed by ue2_graph
+    u8 c = 0; //!< character reached on this vertex
+    flat_set<ReportID> reports; //!< managed reports fired on this vertex
+};
+
+struct LitTrieEdgeProps {
+    size_t index; // managed by ue2_graph
+};
+
+/**
+ * \brief BGL graph used to store a trie of literals (for later AC construction
+ * into a DFA).
+ */
+struct LitTrie
+    : public ue2_graph<LitTrie, LitTrieVertexProps, LitTrieEdgeProps> {
+
+    LitTrie() : root(add_vertex(*this)) {}
+
+    const vertex_descriptor root; //!< Root vertex for the trie.
+};
+
+static
+bool is_empty(const LitTrie &trie) {
+    return num_vertices(trie) <= 1;
+}
+
+static
+std::set<ReportID> all_reports(const LitTrie &trie) {
+    std::set<ReportID> reports;
+    for (auto v : vertices_range(trie)) {
+        insert(&reports, trie[v].reports);
+    }
+    return reports;
+}
+
+using LitTrieVertex = LitTrie::vertex_descriptor;
+using LitTrieEdge = LitTrie::edge_descriptor;
+
 namespace { // unnamed
 
 // Concrete impl class
 class SmallWriteBuildImpl : public SmallWriteBuild {
 public:
-    SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm, 
-                        const CompileContext &cc); 
+    SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm,
+                        const CompileContext &cc);
 
     // Construct a runtime implementation.
-    bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) override; 
+    bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) override;
 
-    void add(const NGHolder &g, const ExpressionInfo &expr) override; 
+    void add(const NGHolder &g, const ExpressionInfo &expr) override;
     void add(const ue2_literal &literal, ReportID r) override;
 
-    set<ReportID> all_reports() const override; 
+    set<ReportID> all_reports() const override;
 
     const ReportManager &rm;
     const CompileContext &cc;
 
-    vector<unique_ptr<raw_dfa>> dfas; 
-    LitTrie lit_trie; 
-    LitTrie lit_trie_nocase; 
-    size_t num_literals = 0; 
+    vector<unique_ptr<raw_dfa>> dfas;
+    LitTrie lit_trie;
+    LitTrie lit_trie_nocase;
+    size_t num_literals = 0;
     bool poisoned;
 };
 
 } // namespace
 
-SmallWriteBuild::~SmallWriteBuild() = default; 
+SmallWriteBuild::~SmallWriteBuild() = default;
 
-SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, 
-                                         const ReportManager &rm_in, 
+SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns,
+                                         const ReportManager &rm_in,
                                          const CompileContext &cc_in)
     : rm(rm_in), cc(cc_in),
       /* small write is block mode only */
-      poisoned(!cc.grey.allowSmallWrite 
-               || cc.streaming 
-               || num_patterns > cc.grey.smallWriteMaxPatterns) { 
+      poisoned(!cc.grey.allowSmallWrite
+               || cc.streaming
+               || num_patterns > cc.grey.smallWriteMaxPatterns) {
+}
+
+/**
+ * \brief Remove any reports from the given vertex that cannot match within
+ * max_depth due to their constraints.
+ */
+static
+bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth,
+                          const ReportManager &rm) {
+    assert(!g[v].reports.empty());
+
+    vector<ReportID> bad_reports;
+
+    for (ReportID id : g[v].reports) {
+        const auto &report = rm.getReport(id);
+        if (report.minOffset > max_depth) {
+            bad_reports.push_back(id);
+        }
+    }
+
+    for (ReportID id : bad_reports) {
+        g[v].reports.erase(id);
+    }
+
+    if (g[v].reports.empty()) {
+        DEBUG_PRINTF("none of vertex %zu's reports can match, cut accepts\n",
+                     g[v].index);
+        remove_edge(v, g.accept, g);
+        remove_edge(v, g.acceptEod, g);
+    }
+
+    return !bad_reports.empty();
+}
+
+/**
+ * \brief Prune vertices and reports from the graph that cannot match within
+ * max_depth.
+ */
+static
+bool pruneOverlong(NGHolder &g, const depth &max_depth,
+                   const ReportManager &rm) {
+    bool modified = false;
+    auto depths = calcBidiDepths(g);
+
+    for (auto v : vertices_range(g)) {
+        if (is_special(v, g)) {
+            continue;
+        }
+        const auto &d = depths.at(g[v].index);
+        depth min_match_offset = min(d.fromStart.min, d.fromStartDotStar.min)
+                               + min(d.toAccept.min, d.toAcceptEod.min);
+        if (min_match_offset > max_depth) {
+            clear_vertex(v, g);
+            modified = true;
+            continue;
+        }
+
+        if (is_match_vertex(v, g)) {
+            modified |= pruneOverlongReports(v, g, max_depth, rm);
+        }
+    }
+
+    if (modified) {
+        pruneUseless(g);
+        DEBUG_PRINTF("pruned graph down to %zu vertices\n", num_vertices(g));
+    }
+
+    return modified;
+}
+
+/**
+ * \brief Attempt to merge the set of DFAs given down into a single raw_dfa.
+ * Returns false on failure.
+ */
+static
+bool mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, const ReportManager &rm,
+               const CompileContext &cc) {
+    assert(!dfas.empty());
+
+    if (dfas.size() == 1) {
+        return true;
+    }
+
+    DEBUG_PRINTF("attempting to merge %zu DFAs\n", dfas.size());
+
+    vector<const raw_dfa *> dfa_ptrs;
+    dfa_ptrs.reserve(dfas.size());
+    for (auto &d : dfas) {
+        dfa_ptrs.push_back(d.get());
+    }
+
+    auto merged = mergeAllDfas(dfa_ptrs, DFA_MERGE_MAX_STATES, &rm, cc.grey);
+    if (!merged) {
+        DEBUG_PRINTF("merge failed\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("merge succeeded, result has %zu states\n",
+                  merged->states.size());
+    dfas.clear();
+    dfas.push_back(std::move(merged));
+    return true;
 }
 
-/** 
- * \brief Remove any reports from the given vertex that cannot match within 
- * max_depth due to their constraints. 
- */ 
-static 
-bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth, 
-                          const ReportManager &rm) { 
-    assert(!g[v].reports.empty()); 
- 
-    vector<ReportID> bad_reports; 
- 
-    for (ReportID id : g[v].reports) { 
-        const auto &report = rm.getReport(id); 
-        if (report.minOffset > max_depth) { 
-            bad_reports.push_back(id); 
-        } 
-    } 
- 
-    for (ReportID id : bad_reports) { 
-        g[v].reports.erase(id); 
-    } 
- 
-    if (g[v].reports.empty()) { 
-        DEBUG_PRINTF("none of vertex %zu's reports can match, cut accepts\n", 
-                     g[v].index); 
-        remove_edge(v, g.accept, g); 
-        remove_edge(v, g.acceptEod, g); 
-    } 
- 
-    return !bad_reports.empty(); 
-} 
- 
-/** 
- * \brief Prune vertices and reports from the graph that cannot match within 
- * max_depth. 
- */ 
-static 
-bool pruneOverlong(NGHolder &g, const depth &max_depth, 
-                   const ReportManager &rm) { 
-    bool modified = false; 
-    auto depths = calcBidiDepths(g); 
- 
-    for (auto v : vertices_range(g)) { 
-        if (is_special(v, g)) { 
-            continue; 
-        } 
-        const auto &d = depths.at(g[v].index); 
-        depth min_match_offset = min(d.fromStart.min, d.fromStartDotStar.min) 
-                               + min(d.toAccept.min, d.toAcceptEod.min); 
-        if (min_match_offset > max_depth) { 
-            clear_vertex(v, g); 
-            modified = true; 
-            continue; 
-        } 
- 
-        if (is_match_vertex(v, g)) { 
-            modified |= pruneOverlongReports(v, g, max_depth, rm); 
-        } 
-    } 
- 
-    if (modified) { 
-        pruneUseless(g); 
-        DEBUG_PRINTF("pruned graph down to %zu vertices\n", num_vertices(g)); 
-    } 
- 
-    return modified; 
-} 
- 
-/** 
- * \brief Attempt to merge the set of DFAs given down into a single raw_dfa. 
- * Returns false on failure. 
- */ 
-static 
-bool mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, const ReportManager &rm, 
-               const CompileContext &cc) { 
-    assert(!dfas.empty()); 
- 
-    if (dfas.size() == 1) { 
-        return true; 
-    } 
- 
-    DEBUG_PRINTF("attempting to merge %zu DFAs\n", dfas.size()); 
- 
-    vector<const raw_dfa *> dfa_ptrs; 
-    dfa_ptrs.reserve(dfas.size()); 
-    for (auto &d : dfas) { 
-        dfa_ptrs.push_back(d.get()); 
-    } 
- 
-    auto merged = mergeAllDfas(dfa_ptrs, DFA_MERGE_MAX_STATES, &rm, cc.grey); 
-    if (!merged) { 
-        DEBUG_PRINTF("merge failed\n"); 
-        return false; 
-    } 
- 
-    DEBUG_PRINTF("merge succeeded, result has %zu states\n", 
-                  merged->states.size()); 
-    dfas.clear(); 
-    dfas.push_back(std::move(merged)); 
-    return true; 
-} 
- 
-void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { 
+void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
     // If the graph is poisoned (i.e. we can't build a SmallWrite version),
     // we don't even try.
     if (poisoned) {
         return;
     }
 
-    if (expr.som) { 
-        DEBUG_PRINTF("no SOM support in small-write engine\n"); 
+    if (expr.som) {
+        DEBUG_PRINTF("no SOM support in small-write engine\n");
+        poisoned = true;
+        return;
+    }
+
+    if (isVacuous(g)) {
+        DEBUG_PRINTF("no vacuous graph support in small-write engine\n");
+        poisoned = true;
+        return;
+    }
+
+    if (any_of_in(::ue2::all_reports(g), [&](ReportID id) {
+            return rm.getReport(id).minLength > 0;
+        })) {
+        DEBUG_PRINTF("no min_length extparam support in small-write engine\n");
         poisoned = true;
         return;
     }
 
-    if (isVacuous(g)) { 
-        DEBUG_PRINTF("no vacuous graph support in small-write engine\n"); 
-        poisoned = true; 
-        return; 
-    } 
-
-    if (any_of_in(::ue2::all_reports(g), [&](ReportID id) { 
-            return rm.getReport(id).minLength > 0; 
-        })) { 
-        DEBUG_PRINTF("no min_length extparam support in small-write engine\n"); 
-        poisoned = true; 
-        return; 
-    } 
- 
-    DEBUG_PRINTF("g=%p\n", &g); 
- 
+    DEBUG_PRINTF("g=%p\n", &g);
+
     // make a copy of the graph so that we can modify it for our purposes
-    unique_ptr<NGHolder> h = cloneHolder(g); 
+    unique_ptr<NGHolder> h = cloneHolder(g);
 
-    pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); 
+    pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
 
-    reduceGraph(*h, SOM_NONE, expr.utf8, cc); 
- 
-    if (can_never_match(*h)) { 
-        DEBUG_PRINTF("graph can never match in small block\n"); 
+    reduceGraph(*h, SOM_NONE, expr.utf8, cc);
+
+    if (can_never_match(*h)) {
+        DEBUG_PRINTF("graph can never match in small block\n");
         return;
     }
 
@@ -311,434 +311,434 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
         return;
     }
 
-    if (clear_deeper_reports(*r, cc.grey.smallWriteLargestBuffer)) { 
-        minimize_hopcroft(*r, cc.grey); 
-    } 
+    if (clear_deeper_reports(*r, cc.grey.smallWriteLargestBuffer)) {
+        minimize_hopcroft(*r, cc.grey);
+    }
+
+    dfas.push_back(std::move(r));
 
-    dfas.push_back(std::move(r)); 
- 
-    if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) { 
-        if (!mergeDfas(dfas, rm, cc)) { 
-            dfas.clear(); 
+    if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) {
+        if (!mergeDfas(dfas, rm, cc)) {
+            dfas.clear();
             poisoned = true;
             return;
         }
     }
 }
 
-static 
-bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) { 
-    auto u = trie.root; 
-    for (const auto &c : literal) { 
-        auto next = LitTrie::null_vertex(); 
-        for (auto v : adjacent_vertices_range(u, trie)) { 
-            if (trie[v].c == (u8)c.c) { 
-                next = v; 
-                break; 
-            } 
-        } 
-        if (!next) { 
-            next = add_vertex(LitTrieVertexProps((u8)c.c), trie); 
-            add_edge(u, next, trie); 
-        } 
-        u = next; 
-    } 
- 
-    trie[u].reports.insert(report); 
- 
-    DEBUG_PRINTF("added '%s' (report %u) to trie, now %zu vertices\n", 
-                  escapeString(literal).c_str(), report, num_vertices(trie)); 
-    return num_vertices(trie) <= MAX_TRIE_VERTICES; 
-} 
- 
+static
+bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) {
+    auto u = trie.root;
+    for (const auto &c : literal) {
+        auto next = LitTrie::null_vertex();
+        for (auto v : adjacent_vertices_range(u, trie)) {
+            if (trie[v].c == (u8)c.c) {
+                next = v;
+                break;
+            }
+        }
+        if (!next) {
+            next = add_vertex(LitTrieVertexProps((u8)c.c), trie);
+            add_edge(u, next, trie);
+        }
+        u = next;
+    }
+
+    trie[u].reports.insert(report);
+
+    DEBUG_PRINTF("added '%s' (report %u) to trie, now %zu vertices\n",
+                  escapeString(literal).c_str(), report, num_vertices(trie));
+    return num_vertices(trie) <= MAX_TRIE_VERTICES;
+}
+
 void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
     // If the graph is poisoned (i.e. we can't build a SmallWrite version),
     // we don't even try.
     if (poisoned) {
-        DEBUG_PRINTF("poisoned\n"); 
+        DEBUG_PRINTF("poisoned\n");
         return;
     }
 
     if (literal.length() > cc.grey.smallWriteLargestBuffer) {
-        DEBUG_PRINTF("exceeded length limit\n"); 
+        DEBUG_PRINTF("exceeded length limit\n");
         return; /* too long */
     }
 
-    if (++num_literals > cc.grey.smallWriteMaxLiterals) { 
-        DEBUG_PRINTF("exceeded literal limit\n"); 
-        poisoned = true; 
-        return; 
-    } 
- 
-    auto &trie = literal.any_nocase() ? lit_trie_nocase : lit_trie; 
-    if (!add_to_trie(literal, r, trie)) { 
-        DEBUG_PRINTF("trie add failed\n"); 
-        poisoned = true; 
-    } 
+    if (++num_literals > cc.grey.smallWriteMaxLiterals) {
+        DEBUG_PRINTF("exceeded literal limit\n");
+        poisoned = true;
+        return;
+    }
+
+    auto &trie = literal.any_nocase() ? lit_trie_nocase : lit_trie;
+    if (!add_to_trie(literal, r, trie)) {
+        DEBUG_PRINTF("trie add failed\n");
+        poisoned = true;
+    }
 }
 
-namespace { 
- 
-/** 
- * \brief BFS visitor for Aho-Corasick automaton construction. 
- * 
- * This is doing two things: 
- * 
- *   - Computing the failure edges (also called fall or supply edges) for each 
- *     vertex, giving the longest suffix of the path to that point that is also 
- *     a prefix in the trie reached on the same character. The BFS traversal 
- *     makes it possible to build these from earlier failure paths. 
- * 
- *   - Computing the output function for each vertex, which is done by 
- *     propagating the reports from failure paths as well. This ensures that 
- *     substrings of the current path also report correctly. 
- */ 
-struct ACVisitor : public boost::default_bfs_visitor { 
-    ACVisitor(LitTrie &trie_in, 
-              unordered_map<LitTrieVertex, LitTrieVertex> &failure_map_in, 
-              vector<LitTrieVertex> &ordering_in) 
-        : mutable_trie(trie_in), failure_map(failure_map_in), 
-          ordering(ordering_in) {} 
- 
-    LitTrieVertex find_failure_target(LitTrieVertex u, LitTrieVertex v, 
-                                      const LitTrie &trie) { 
-        assert(u == trie.root || contains(failure_map, u)); 
-        assert(!contains(failure_map, v)); 
- 
-        const auto &c = trie[v].c; 
- 
-        while (u != trie.root) { 
-            auto f = failure_map.at(u); 
-            for (auto w : adjacent_vertices_range(f, trie)) { 
-                if (trie[w].c == c) { 
-                    return w; 
-                } 
-            } 
-            u = f; 
-        } 
- 
-        DEBUG_PRINTF("no failure edge\n"); 
-        return LitTrie::null_vertex(); 
-    }
- 
-    void tree_edge(LitTrieEdge e, const LitTrie &trie) { 
-        auto u = source(e, trie); 
-        auto v = target(e, trie); 
-        DEBUG_PRINTF("bfs (%zu, %zu) on '%c'\n", trie[u].index, trie[v].index, 
-                     trie[v].c); 
-        ordering.push_back(v); 
- 
-        auto f = find_failure_target(u, v, trie); 
- 
-        if (f) { 
-            DEBUG_PRINTF("final failure vertex %zu\n", trie[f].index); 
-            failure_map.emplace(v, f); 
- 
-            // Propagate reports from failure path to ensure we correctly 
-            // report substrings. 
-            insert(&mutable_trie[v].reports, mutable_trie[f].reports); 
-        } else { 
-            DEBUG_PRINTF("final failure vertex root\n"); 
-            failure_map.emplace(v, trie.root); 
-        } 
-    } 
- 
-private: 
-    LitTrie &mutable_trie; //!< For setting reports property. 
-    unordered_map<LitTrieVertex, LitTrieVertex> &failure_map; 
-    vector<LitTrieVertex> &ordering; //!< BFS ordering for vertices. 
-}; 
+namespace {
+
+/**
+ * \brief BFS visitor for Aho-Corasick automaton construction.
+ *
+ * This is doing two things:
+ *
+ *   - Computing the failure edges (also called fall or supply edges) for each
+ *     vertex, giving the longest suffix of the path to that point that is also
+ *     a prefix in the trie reached on the same character. The BFS traversal
+ *     makes it possible to build these from earlier failure paths.
+ *
+ *   - Computing the output function for each vertex, which is done by
+ *     propagating the reports from failure paths as well. This ensures that
+ *     substrings of the current path also report correctly.
+ */
+struct ACVisitor : public boost::default_bfs_visitor {
+    ACVisitor(LitTrie &trie_in,
+              unordered_map<LitTrieVertex, LitTrieVertex> &failure_map_in,
+              vector<LitTrieVertex> &ordering_in)
+        : mutable_trie(trie_in), failure_map(failure_map_in),
+          ordering(ordering_in) {}
+
+    LitTrieVertex find_failure_target(LitTrieVertex u, LitTrieVertex v,
+                                      const LitTrie &trie) {
+        assert(u == trie.root || contains(failure_map, u));
+        assert(!contains(failure_map, v));
+
+        const auto &c = trie[v].c;
+
+        while (u != trie.root) {
+            auto f = failure_map.at(u);
+            for (auto w : adjacent_vertices_range(f, trie)) {
+                if (trie[w].c == c) {
+                    return w;
+                }
+            }
+            u = f;
+        }
+
+        DEBUG_PRINTF("no failure edge\n");
+        return LitTrie::null_vertex();
+    }
+
+    void tree_edge(LitTrieEdge e, const LitTrie &trie) {
+        auto u = source(e, trie);
+        auto v = target(e, trie);
+        DEBUG_PRINTF("bfs (%zu, %zu) on '%c'\n", trie[u].index, trie[v].index,
+                     trie[v].c);
+        ordering.push_back(v);
+
+        auto f = find_failure_target(u, v, trie);
+
+        if (f) {
+            DEBUG_PRINTF("final failure vertex %zu\n", trie[f].index);
+            failure_map.emplace(v, f);
+
+            // Propagate reports from failure path to ensure we correctly
+            // report substrings.
+            insert(&mutable_trie[v].reports, mutable_trie[f].reports);
+        } else {
+            DEBUG_PRINTF("final failure vertex root\n");
+            failure_map.emplace(v, trie.root);
+        }
+    }
+
+private:
+    LitTrie &mutable_trie; //!< For setting reports property.
+    unordered_map<LitTrieVertex, LitTrieVertex> &failure_map;
+    vector<LitTrieVertex> &ordering; //!< BFS ordering for vertices.
+};
 }
 
-static UNUSED 
-bool isSaneTrie(const LitTrie &trie) { 
-    CharReach seen; 
-    for (auto u : vertices_range(trie)) { 
-        seen.clear(); 
-        for (auto v : adjacent_vertices_range(u, trie)) { 
-            if (seen.test(trie[v].c)) { 
-                return false; 
-            } 
-            seen.set(trie[v].c); 
-        } 
-    } 
-    return true; 
-} 
-
-/** 
- * \brief Turn the given literal trie into an AC automaton by adding additional 
- * edges and reports. 
- */ 
-static 
-void buildAutomaton(LitTrie &trie, 
-                    unordered_map<LitTrieVertex, LitTrieVertex> &failure_map, 
-                    vector<LitTrieVertex> &ordering) { 
-    assert(isSaneTrie(trie)); 
- 
-    // Find our failure transitions and reports. 
-    failure_map.reserve(num_vertices(trie)); 
-    ordering.reserve(num_vertices(trie)); 
-    ACVisitor ac_vis(trie, failure_map, ordering); 
-    boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); 
- 
-    // Compute missing edges from failure map. 
-    for (auto v : ordering) { 
-        DEBUG_PRINTF("vertex %zu\n", trie[v].index); 
-        CharReach seen; 
-        for (auto w : adjacent_vertices_range(v, trie)) { 
-            DEBUG_PRINTF("edge to %zu with reach 0x%02x\n", trie[w].index, 
-                         trie[w].c); 
-            assert(!seen.test(trie[w].c)); 
-            seen.set(trie[w].c); 
-        } 
-        auto parent = failure_map.at(v); 
-        for (auto w : adjacent_vertices_range(parent, trie)) { 
-            if (!seen.test(trie[w].c)) { 
-                add_edge(v, w, trie); 
-            } 
-        } 
-    }
-} 
-
-static 
-vector<u32> findDistFromRoot(const LitTrie &trie) { 
-    vector<u32> dist(num_vertices(trie), UINT32_MAX); 
-    dist[trie[trie.root].index] = 0; 
-
-    // BFS to find dist from root. 
-    breadth_first_search( 
-        trie, trie.root, 
-        visitor(make_bfs_visitor(record_distances( 
-            make_iterator_property_map(dist.begin(), 
-                                       get(&LitTrieVertexProps::index, trie)), 
-            boost::on_tree_edge())))); 
-
-    return dist; 
-} 
- 
-static 
-vector<u32> findDistToAccept(const LitTrie &trie) { 
-    vector<u32> dist(num_vertices(trie), UINT32_MAX); 
- 
-    // Start with all reporting vertices. 
-    deque<LitTrieVertex> q; 
-    for (auto v : vertices_range(trie)) { 
-        if (!trie[v].reports.empty()) { 
-            q.push_back(v); 
-            dist[trie[v].index] = 0; 
+static UNUSED
+bool isSaneTrie(const LitTrie &trie) {
+    CharReach seen;
+    for (auto u : vertices_range(trie)) {
+        seen.clear();
+        for (auto v : adjacent_vertices_range(u, trie)) {
+            if (seen.test(trie[v].c)) {
+                return false;
+            }
+            seen.set(trie[v].c);
         }
     }
+    return true;
+}
+
+/**
+ * \brief Turn the given literal trie into an AC automaton by adding additional
+ * edges and reports.
+ */
+static
+void buildAutomaton(LitTrie &trie,
+                    unordered_map<LitTrieVertex, LitTrieVertex> &failure_map,
+                    vector<LitTrieVertex> &ordering) {
+    assert(isSaneTrie(trie));
+
+    // Find our failure transitions and reports.
+    failure_map.reserve(num_vertices(trie));
+    ordering.reserve(num_vertices(trie));
+    ACVisitor ac_vis(trie, failure_map, ordering);
+    boost::breadth_first_search(trie, trie.root, visitor(ac_vis));
+
+    // Compute missing edges from failure map.
+    for (auto v : ordering) {
+        DEBUG_PRINTF("vertex %zu\n", trie[v].index);
+        CharReach seen;
+        for (auto w : adjacent_vertices_range(v, trie)) {
+            DEBUG_PRINTF("edge to %zu with reach 0x%02x\n", trie[w].index,
+                         trie[w].c);
+            assert(!seen.test(trie[w].c));
+            seen.set(trie[w].c);
+        }
+        auto parent = failure_map.at(v);
+        for (auto w : adjacent_vertices_range(parent, trie)) {
+            if (!seen.test(trie[w].c)) {
+                add_edge(v, w, trie);
+            }
+        }
+    }
+}
+
+static
+vector<u32> findDistFromRoot(const LitTrie &trie) {
+    vector<u32> dist(num_vertices(trie), UINT32_MAX);
+    dist[trie[trie.root].index] = 0;
+
+    // BFS to find dist from root.
+    breadth_first_search(
+        trie, trie.root,
+        visitor(make_bfs_visitor(record_distances(
+            make_iterator_property_map(dist.begin(),
+                                       get(&LitTrieVertexProps::index, trie)),
+            boost::on_tree_edge()))));
+
+    return dist;
+}
 
-    // Custom BFS, since we have a pile of sources. 
-    while (!q.empty()) { 
-        auto v = q.front(); 
-        q.pop_front(); 
-        u32 d = dist[trie[v].index]; 
- 
-        for (auto u : inv_adjacent_vertices_range(v, trie)) { 
-            auto &u_dist = dist[trie[u].index]; 
-            if (u_dist == UINT32_MAX) { 
-                q.push_back(u); 
-                u_dist = d + 1; 
-            } 
-        } 
-    }
-
-    return dist; 
-} 
-
-/** 
- * \brief Prune all vertices from the trie that do not lie on a path from root 
- * to accept of length <= max_depth. 
- */ 
-static 
-void pruneTrie(LitTrie &trie, u32 max_depth) { 
-    DEBUG_PRINTF("pruning trie to %u\n", max_depth); 
-
-    auto dist_from_root = findDistFromRoot(trie); 
-    auto dist_to_accept = findDistToAccept(trie); 
- 
-    vector<LitTrieVertex> dead; 
-    for (auto v : vertices_range(trie)) { 
-        if (v == trie.root) { 
-            continue; 
-        } 
-        auto v_index = trie[v].index; 
-        DEBUG_PRINTF("vertex %zu: from_start=%u, to_accept=%u\n", trie[v].index, 
-                     dist_from_root[v_index], dist_to_accept[v_index]); 
-        assert(dist_from_root[v_index] != UINT32_MAX); 
-        assert(dist_to_accept[v_index] != UINT32_MAX); 
-        u32 min_path_len = dist_from_root[v_index] + dist_to_accept[v_index]; 
-        if (min_path_len > max_depth) { 
-            DEBUG_PRINTF("pruning vertex %zu (min path len %u)\n", 
-                         trie[v].index, min_path_len); 
-            clear_vertex(v, trie); 
-            dead.push_back(v); 
-        } 
-    }
-
-    if (dead.empty()) { 
-        return; 
-    }
-
-    for (auto v : dead) { 
-        remove_vertex(v, trie); 
-    } 
-
-    DEBUG_PRINTF("%zu vertices remain\n", num_vertices(trie)); 
-
-    renumber_edges(trie); 
-    renumber_vertices(trie); 
-} 
-
-static 
-vector<CharReach> getAlphabet(const LitTrie &trie, bool nocase) { 
-    vector<CharReach> esets = {CharReach::dot()}; 
-    for (auto v : vertices_range(trie)) { 
-        if (v == trie.root) { 
-            continue; 
+static
+vector<u32> findDistToAccept(const LitTrie &trie) {
+    vector<u32> dist(num_vertices(trie), UINT32_MAX);
+
+    // Start with all reporting vertices.
+    deque<LitTrieVertex> q;
+    for (auto v : vertices_range(trie)) {
+        if (!trie[v].reports.empty()) {
+            q.push_back(v);
+            dist[trie[v].index] = 0;
         }
+    }
 
-        CharReach cr; 
-        if (nocase) { 
-            cr.set(mytoupper(trie[v].c)); 
-            cr.set(mytolower(trie[v].c)); 
-        } else { 
-            cr.set(trie[v].c); 
-        } 
- 
-        for (size_t i = 0; i < esets.size(); i++) { 
-            if (esets[i].count() == 1) { 
-                continue; 
-            } 
- 
-            CharReach t = cr & esets[i]; 
-            if (t.any() && t != esets[i]) { 
-                esets[i] &= ~t; 
-                esets.push_back(t); 
-            } 
-        } 
-    }
-
-    // For deterministic compiles. 
-    sort(esets.begin(), esets.end()); 
-    return esets; 
-} 
-
-static 
-u16 buildAlphabet(const LitTrie &trie, bool nocase, 
-                  array<u16, ALPHABET_SIZE> &alpha, 
-                  array<u16, ALPHABET_SIZE> &unalpha) { 
-    const auto &esets = getAlphabet(trie, nocase); 
- 
-    u16 i = 0; 
-    for (const auto &cr : esets) { 
-        u16 leader = cr.find_first(); 
-        for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { 
-            alpha[s] = i; 
-        } 
-        unalpha[i] = leader; 
-        i++; 
-    }
-
-    for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) { 
-        alpha[j] = i; 
-        unalpha[i] = j; 
-    } 
-
-    DEBUG_PRINTF("alphabet size %u\n", i); 
-    return i; 
-} 
-
-/** 
- * \brief Calculate state mapping, from vertex in trie to state index in BFS 
- * ordering. 
- */ 
-static 
-unordered_map<LitTrieVertex, u32> 
-makeStateMap(const LitTrie &trie, const vector<LitTrieVertex> &ordering) { 
-    unordered_map<LitTrieVertex, u32> state_ids; 
-    state_ids.reserve(num_vertices(trie)); 
-    u32 idx = DEAD_STATE + 1; 
-    state_ids.emplace(trie.root, idx++); 
-    for (auto v : ordering) { 
-        state_ids.emplace(v, idx++); 
-    } 
-    assert(state_ids.size() == num_vertices(trie)); 
-    return state_ids; 
+    // Custom BFS, since we have a pile of sources.
+    while (!q.empty()) {
+        auto v = q.front();
+        q.pop_front();
+        u32 d = dist[trie[v].index];
+
+        for (auto u : inv_adjacent_vertices_range(v, trie)) {
+            auto &u_dist = dist[trie[u].index];
+            if (u_dist == UINT32_MAX) {
+                q.push_back(u);
+                u_dist = d + 1;
+            }
+        }
+    }
+
+    return dist;
+}
+
+/**
+ * \brief Prune all vertices from the trie that do not lie on a path from root
+ * to accept of length <= max_depth.
+ */
+static
+void pruneTrie(LitTrie &trie, u32 max_depth) {
+    DEBUG_PRINTF("pruning trie to %u\n", max_depth);
+
+    auto dist_from_root = findDistFromRoot(trie);
+    auto dist_to_accept = findDistToAccept(trie);
+
+    vector<LitTrieVertex> dead;
+    for (auto v : vertices_range(trie)) {
+        if (v == trie.root) {
+            continue;
+        }
+        auto v_index = trie[v].index;
+        DEBUG_PRINTF("vertex %zu: from_start=%u, to_accept=%u\n", trie[v].index,
+                     dist_from_root[v_index], dist_to_accept[v_index]);
+        assert(dist_from_root[v_index] != UINT32_MAX);
+        assert(dist_to_accept[v_index] != UINT32_MAX);
+        u32 min_path_len = dist_from_root[v_index] + dist_to_accept[v_index];
+        if (min_path_len > max_depth) {
+            DEBUG_PRINTF("pruning vertex %zu (min path len %u)\n",
+                         trie[v].index, min_path_len);
+            clear_vertex(v, trie);
+            dead.push_back(v);
+        }
+    }
+
+    if (dead.empty()) {
+        return;
+    }
+
+    for (auto v : dead) {
+        remove_vertex(v, trie);
+    }
+
+    DEBUG_PRINTF("%zu vertices remain\n", num_vertices(trie));
+
+    renumber_edges(trie);
+    renumber_vertices(trie);
+}
+
+static
+vector<CharReach> getAlphabet(const LitTrie &trie, bool nocase) {
+    vector<CharReach> esets = {CharReach::dot()};
+    for (auto v : vertices_range(trie)) {
+        if (v == trie.root) {
+            continue;
+        }
+
+        CharReach cr;
+        if (nocase) {
+            cr.set(mytoupper(trie[v].c));
+            cr.set(mytolower(trie[v].c));
+        } else {
+            cr.set(trie[v].c);
+        }
+
+        for (size_t i = 0; i < esets.size(); i++) {
+            if (esets[i].count() == 1) {
+                continue;
+            }
+
+            CharReach t = cr & esets[i];
+            if (t.any() && t != esets[i]) {
+                esets[i] &= ~t;
+                esets.push_back(t);
+            }
+        }
+    }
+
+    // For deterministic compiles.
+    sort(esets.begin(), esets.end());
+    return esets;
+}
+
+static
+u16 buildAlphabet(const LitTrie &trie, bool nocase,
+                  array<u16, ALPHABET_SIZE> &alpha,
+                  array<u16, ALPHABET_SIZE> &unalpha) {
+    const auto &esets = getAlphabet(trie, nocase);
+
+    u16 i = 0;
+    for (const auto &cr : esets) {
+        u16 leader = cr.find_first();
+        for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
+            alpha[s] = i;
+        }
+        unalpha[i] = leader;
+        i++;
+    }
+
+    for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
+        alpha[j] = i;
+        unalpha[i] = j;
+    }
+
+    DEBUG_PRINTF("alphabet size %u\n", i);
+    return i;
+}
+
+/**
+ * \brief Calculate state mapping, from vertex in trie to state index in BFS
+ * ordering.
+ */
+static
+unordered_map<LitTrieVertex, u32>
+makeStateMap(const LitTrie &trie, const vector<LitTrieVertex> &ordering) {
+    unordered_map<LitTrieVertex, u32> state_ids;
+    state_ids.reserve(num_vertices(trie));
+    u32 idx = DEAD_STATE + 1;
+    state_ids.emplace(trie.root, idx++);
+    for (auto v : ordering) {
+        state_ids.emplace(v, idx++);
+    }
+    assert(state_ids.size() == num_vertices(trie));
+    return state_ids;
+}
+
+/** \brief Construct a raw_dfa from a literal trie. */
+static
+unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
+    DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie));
+
+    vector<LitTrieVertex> ordering;
+    unordered_map<LitTrieVertex, LitTrieVertex> failure_map;
+    buildAutomaton(trie, failure_map, ordering);
+
+    // Construct DFA states in BFS order.
+    const auto state_ids = makeStateMap(trie, ordering);
+
+    auto rdfa = std::make_unique<raw_dfa>(NFA_OUTFIX);
+
+    // Calculate alphabet.
+    array<u16, ALPHABET_SIZE> unalpha;
+    auto &alpha = rdfa->alpha_remap;
+    rdfa->alpha_size = buildAlphabet(trie, nocase, alpha, unalpha);
+
+    // Construct states and transitions.
+    const u16 root_state = state_ids.at(trie.root);
+    assert(root_state == DEAD_STATE + 1);
+    rdfa->start_anchored = root_state;
+    rdfa->start_floating = root_state;
+    rdfa->states.resize(num_vertices(trie) + 1, dstate(rdfa->alpha_size));
+
+    // Dead state.
+    fill(rdfa->states[DEAD_STATE].next.begin(),
+         rdfa->states[DEAD_STATE].next.end(), DEAD_STATE);
+
+    for (auto u : vertices_range(trie)) {
+        auto u_state = state_ids.at(u);
+        DEBUG_PRINTF("state %u\n", u_state);
+        assert(u_state < rdfa->states.size());
+        auto &ds = rdfa->states[u_state];
+        ds.reports = trie[u].reports;
+        if (!ds.reports.empty()) {
+            DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str());
+        }
+
+        // Set daddy state from failure map.
+        if (u == trie.root) {
+            ds.daddy = DEAD_STATE;
+        } else {
+            assert(contains(failure_map, u));
+            ds.daddy = state_ids.at(failure_map.at(u));
+        }
+
+        // By default, transition back to the root.
+        fill(ds.next.begin(), ds.next.end(), root_state);
+        // TOP should be a self-loop.
+        ds.next[alpha[TOP]] = u_state;
+
+        // Add in the real transitions.
+        for (auto v : adjacent_vertices_range(u, trie)) {
+            if (v == trie.root) {
+                continue;
+            }
+            auto v_state = state_ids.at(v);
+            u16 sym = alpha[trie[v].c];
+            DEBUG_PRINTF("edge to %u on 0x%02x (sym %u)\n", v_state,
+                         trie[v].c, sym);
+            assert(sym < ds.next.size());
+            assert(ds.next[sym] == root_state);
+            ds.next[sym] = v_state;
+        }
+    }
+
+    return rdfa;
 }
 
-/** \brief Construct a raw_dfa from a literal trie. */ 
-static 
-unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) { 
-    DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); 
- 
-    vector<LitTrieVertex> ordering; 
-    unordered_map<LitTrieVertex, LitTrieVertex> failure_map; 
-    buildAutomaton(trie, failure_map, ordering); 
- 
-    // Construct DFA states in BFS order. 
-    const auto state_ids = makeStateMap(trie, ordering); 
- 
-    auto rdfa = std::make_unique<raw_dfa>(NFA_OUTFIX); 
- 
-    // Calculate alphabet. 
-    array<u16, ALPHABET_SIZE> unalpha; 
-    auto &alpha = rdfa->alpha_remap; 
-    rdfa->alpha_size = buildAlphabet(trie, nocase, alpha, unalpha); 
- 
-    // Construct states and transitions. 
-    const u16 root_state = state_ids.at(trie.root); 
-    assert(root_state == DEAD_STATE + 1); 
-    rdfa->start_anchored = root_state; 
-    rdfa->start_floating = root_state; 
-    rdfa->states.resize(num_vertices(trie) + 1, dstate(rdfa->alpha_size)); 
- 
-    // Dead state. 
-    fill(rdfa->states[DEAD_STATE].next.begin(), 
-         rdfa->states[DEAD_STATE].next.end(), DEAD_STATE); 
- 
-    for (auto u : vertices_range(trie)) { 
-        auto u_state = state_ids.at(u); 
-        DEBUG_PRINTF("state %u\n", u_state); 
-        assert(u_state < rdfa->states.size()); 
-        auto &ds = rdfa->states[u_state]; 
-        ds.reports = trie[u].reports; 
-        if (!ds.reports.empty()) { 
-            DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str()); 
-        } 
- 
-        // Set daddy state from failure map. 
-        if (u == trie.root) { 
-            ds.daddy = DEAD_STATE; 
-        } else { 
-            assert(contains(failure_map, u)); 
-            ds.daddy = state_ids.at(failure_map.at(u)); 
-        } 
- 
-        // By default, transition back to the root. 
-        fill(ds.next.begin(), ds.next.end(), root_state); 
-        // TOP should be a self-loop. 
-        ds.next[alpha[TOP]] = u_state; 
- 
-        // Add in the real transitions. 
-        for (auto v : adjacent_vertices_range(u, trie)) { 
-            if (v == trie.root) { 
-                continue; 
-            } 
-            auto v_state = state_ids.at(v); 
-            u16 sym = alpha[trie[v].c]; 
-            DEBUG_PRINTF("edge to %u on 0x%02x (sym %u)\n", v_state, 
-                         trie[v].c, sym); 
-            assert(sym < ds.next.size()); 
-            assert(ds.next[sym] == root_state); 
-            ds.next[sym] = v_state; 
-        } 
-    } 
- 
-    return rdfa; 
-} 
- 
 #define MAX_GOOD_ACCEL_DEPTH 4
 
 static
@@ -782,72 +782,72 @@ bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
 }
 
 static
-bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc, 
-                         const ReportManager &rm, bool has_non_literals, 
-                         set<dstate_id_t> &accel_states) { 
-    // If we determinised only literals, then we only need to consider the init 
-    // states for acceleration. 
-    bool only_accel_init = !has_non_literals; 
-    bool trust_daddy_states = !has_non_literals; 
- 
-    bytecode_ptr<NFA> dfa = nullptr; 
-    if (cc.grey.allowSmallWriteSheng) { 
-        dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); 
+bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
+                         const ReportManager &rm, bool has_non_literals,
+                         set<dstate_id_t> &accel_states) {
+    // If we determinised only literals, then we only need to consider the init
+    // states for acceleration.
+    bool only_accel_init = !has_non_literals;
+    bool trust_daddy_states = !has_non_literals;
+
+    bytecode_ptr<NFA> dfa = nullptr;
+    if (cc.grey.allowSmallWriteSheng) {
+        dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states);
         if (!dfa) {
             dfa = sheng32Compile(rdfa, cc, rm, only_accel_init, &accel_states);
         }
         if (!dfa) {
             dfa = sheng64Compile(rdfa, cc, rm, only_accel_init, &accel_states);
         }
-    } 
-    if (!dfa) { 
-        dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, 
-                               trust_daddy_states, &accel_states); 
-    } 
-    return dfa; 
-} 
- 
-static 
-bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality, 
-                             const CompileContext &cc, const ReportManager &rm, 
-                             bool has_non_literals, u32 *start_offset, 
-                             u32 *small_region) { 
+    }
+    if (!dfa) {
+        dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init,
+                               trust_daddy_states, &accel_states);
+    }
+    return dfa;
+}
+
+static
+bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
+                             const CompileContext &cc, const ReportManager &rm,
+                             bool has_non_literals, u32 *start_offset,
+                             u32 *small_region) {
     *start_offset = remove_leading_dots(rdfa);
 
     // Unleash the McClellan!
     set<dstate_id_t> accel_states;
 
-    auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); 
+    auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
     if (!nfa) {
-        DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); 
+        DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
         return nullptr;
     }
 
     if (is_slow(rdfa, accel_states, roseQuality)) {
-        DEBUG_PRINTF("is slow\n"); 
+        DEBUG_PRINTF("is slow\n");
         *small_region = cc.grey.smallWriteLargestBufferBad;
         if (*small_region <= *start_offset) {
             return nullptr;
         }
-        if (clear_deeper_reports(rdfa, *small_region - *start_offset)) { 
-            minimize_hopcroft(rdfa, cc.grey); 
-            if (rdfa.start_anchored == DEAD_STATE) { 
-                DEBUG_PRINTF("all patterns pruned out\n"); 
-                return nullptr; 
-            } 
-
-            nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); 
-            if (!nfa) { 
-                DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); 
-                assert(0); /* able to build orig dfa but not the trimmed? */ 
-                return nullptr; 
-            } 
+        if (clear_deeper_reports(rdfa, *small_region - *start_offset)) {
+            minimize_hopcroft(rdfa, cc.grey);
+            if (rdfa.start_anchored == DEAD_STATE) {
+                DEBUG_PRINTF("all patterns pruned out\n");
+                return nullptr;
+            }
+
+            nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
+            if (!nfa) {
+                DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
+                assert(0); /* able to build orig dfa but not the trimmed? */
+                return nullptr;
+            }
         }
     } else {
         *small_region = cc.grey.smallWriteLargestBuffer;
     }
 
-    assert(isDfaType(nfa->type)); 
+    assert(isDfaType(nfa->type));
     if (nfa->length > cc.grey.limitSmallWriteOutfixSize
         || nfa->length > cc.grey.limitDFASize) {
         DEBUG_PRINTF("smallwrite outfix size too large\n");
@@ -859,16 +859,16 @@ bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
 }
 
 // SmallWriteBuild factory
-unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns, 
-                                                  const ReportManager &rm, 
+unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
+                                                  const ReportManager &rm,
                                                   const CompileContext &cc) {
-    return ue2::make_unique<SmallWriteBuildImpl>(num_patterns, rm, cc); 
+    return ue2::make_unique<SmallWriteBuildImpl>(num_patterns, rm, cc);
 }
 
-bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) { 
-    const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); 
-    const bool has_non_literals = !dfas.empty(); 
-    if (dfas.empty() && !has_literals) { 
+bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
+    const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
+    const bool has_non_literals = !dfas.empty();
+    if (dfas.empty() && !has_literals) {
         DEBUG_PRINTF("no smallwrite engine\n");
         poisoned = true;
         return nullptr;
@@ -879,49 +879,49 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
         return nullptr;
     }
 
-    // We happen to know that if the rose is high quality, we're going to limit 
-    // depth further. 
-    if (roseQuality) { 
-        u32 max_depth = cc.grey.smallWriteLargestBufferBad; 
-        if (!is_empty(lit_trie)) { 
-            pruneTrie(lit_trie, max_depth); 
-        } 
-        if (!is_empty(lit_trie_nocase)) { 
-            pruneTrie(lit_trie_nocase, max_depth); 
-        } 
-    } 
- 
-    if (!is_empty(lit_trie)) { 
-        dfas.push_back(buildDfa(lit_trie, false)); 
-        DEBUG_PRINTF("caseful literal dfa with %zu states\n", 
-                     dfas.back()->states.size()); 
-    } 
-    if (!is_empty(lit_trie_nocase)) { 
-        dfas.push_back(buildDfa(lit_trie_nocase, true)); 
-        DEBUG_PRINTF("nocase literal dfa with %zu states\n", 
-                     dfas.back()->states.size()); 
-    } 
- 
-    if (dfas.empty()) { 
-        DEBUG_PRINTF("no dfa, pruned everything away\n"); 
+    // We happen to know that if the rose is high quality, we're going to limit
+    // depth further.
+    if (roseQuality) {
+        u32 max_depth = cc.grey.smallWriteLargestBufferBad;
+        if (!is_empty(lit_trie)) {
+            pruneTrie(lit_trie, max_depth);
+        }
+        if (!is_empty(lit_trie_nocase)) {
+            pruneTrie(lit_trie_nocase, max_depth);
+        }
+    }
+
+    if (!is_empty(lit_trie)) {
+        dfas.push_back(buildDfa(lit_trie, false));
+        DEBUG_PRINTF("caseful literal dfa with %zu states\n",
+                     dfas.back()->states.size());
+    }
+    if (!is_empty(lit_trie_nocase)) {
+        dfas.push_back(buildDfa(lit_trie_nocase, true));
+        DEBUG_PRINTF("nocase literal dfa with %zu states\n",
+                     dfas.back()->states.size());
+    }
+
+    if (dfas.empty()) {
+        DEBUG_PRINTF("no dfa, pruned everything away\n");
+        return nullptr;
+    }
+
+    if (!mergeDfas(dfas, rm, cc)) {
+        dfas.clear();
         return nullptr;
     }
 
-    if (!mergeDfas(dfas, rm, cc)) { 
-        dfas.clear(); 
-        return nullptr; 
-    } 
- 
-    assert(dfas.size() == 1); 
-    auto rdfa = std::move(dfas.front()); 
-    dfas.clear(); 
- 
+    assert(dfas.size() == 1);
+    auto rdfa = std::move(dfas.front());
+    dfas.clear();
+
     DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
 
     u32 start_offset;
     u32 small_region;
-    auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals, 
-                          &start_offset, &small_region); 
+    auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals,
+                          &start_offset, &small_region);
     if (!nfa) {
         DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");
         /* just skip the smallwrite optimization */
@@ -930,7 +930,7 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
     }
 
     u32 size = sizeof(SmallWriteEngine) + nfa->length;
-    auto smwr = make_zeroed_bytecode_ptr<SmallWriteEngine>(size); 
+    auto smwr = make_zeroed_bytecode_ptr<SmallWriteEngine>(size);
 
     smwr->size = size;
     smwr->start_offset = start_offset;
@@ -944,20 +944,20 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
     return smwr;
 }
 
-set<ReportID> SmallWriteBuildImpl::all_reports() const { 
-    set<ReportID> reports; 
-    if (poisoned) { 
-        return reports; 
-    } 
- 
-    for (const auto &rdfa : dfas) { 
-        insert(&reports, ::ue2::all_reports(*rdfa)); 
-    } 
- 
-    insert(&reports, ::ue2::all_reports(lit_trie)); 
-    insert(&reports, ::ue2::all_reports(lit_trie_nocase)); 
- 
-    return reports; 
+set<ReportID> SmallWriteBuildImpl::all_reports() const {
+    set<ReportID> reports;
+    if (poisoned) {
+        return reports;
+    }
+
+    for (const auto &rdfa : dfas) {
+        insert(&reports, ::ue2::all_reports(*rdfa));
+    }
+
+    insert(&reports, ::ue2::all_reports(lit_trie));
+    insert(&reports, ::ue2::all_reports(lit_trie_nocase));
+
+    return reports;
 }
 
 } // namespace ue2
diff --git a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
index 436c5181c7..648b13db79 100644
--- a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
+++ b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation 
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,19 +30,19 @@
 #define SMWR_BUILD_H
 
 /**
- * \file 
- * \brief Small-write engine build interface. 
- * 
- * Everything you ever needed to feed literals in and get a SmallWriteEngine 
- * out. This header should be everything needed by the rest of UE2. 
+ * \file
+ * \brief Small-write engine build interface.
+ *
+ * Everything you ever needed to feed literals in and get a SmallWriteEngine
+ * out. This header should be everything needed by the rest of UE2.
  */
 
 #include "ue2common.h"
-#include "util/bytecode_ptr.h" 
-#include "util/noncopyable.h" 
+#include "util/bytecode_ptr.h"
+#include "util/noncopyable.h"
 
-#include <memory> 
-#include <set> 
+#include <memory>
+#include <set>
 
 struct SmallWriteEngine;
 
@@ -50,30 +50,30 @@ namespace ue2 {
 
 struct CompileContext;
 struct ue2_literal;
-class ExpressionInfo; 
-class NGHolder; 
-class ReportManager; 
+class ExpressionInfo;
+class NGHolder;
+class ReportManager;
 
-/** 
- * Abstract interface intended for callers from elsewhere in the tree, real 
- * underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. 
- */ 
-class SmallWriteBuild : noncopyable { 
+/**
+ * Abstract interface intended for callers from elsewhere in the tree, real
+ * underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h.
+ */
+class SmallWriteBuild : noncopyable {
 public:
     virtual ~SmallWriteBuild();
 
-    virtual bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) = 0; 
+    virtual bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) = 0;
 
-    virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; 
+    virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0;
     virtual void add(const ue2_literal &literal, ReportID r) = 0;
- 
-    virtual std::set<ReportID> all_reports() const = 0; 
+
+    virtual std::set<ReportID> all_reports() const = 0;
 };
 
-/** \brief Construct a usable SmallWrite builder. */ 
-std::unique_ptr<SmallWriteBuild> 
-makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, 
-                      const CompileContext &cc); 
+/** \brief Construct a usable SmallWrite builder. */
+std::unique_ptr<SmallWriteBuild>
+makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm,
+                      const CompileContext &cc);
 
 } // namespace ue2
author	Ivan Blinkov <ivan@blinkov.ru>	2022-02-10 16:47:11 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:11 +0300
commit	5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree	339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/smallwrite
parent	1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download	ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz