Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.

author: Ivan Blinkov <ivan@blinkov.ru> 2022-02-10 16:47:11 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:11 +0300
commit: 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree: 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp
parent: 1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download: ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
1 files changed, 707 insertions, 707 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp
index 9dd19f05de..78fd862919 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp
@@ -1,707 +1,707 @@
-/* 
- * Copyright (c) 2015-2017, Intel Corporation 
- * 
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are met: 
- * 
- *  * Redistributions of source code must retain the above copyright notice, 
- *    this list of conditions and the following disclaimer. 
- *  * Redistributions in binary form must reproduce the above copyright 
- *    notice, this list of conditions and the following disclaimer in the 
- *    documentation and/or other materials provided with the distribution. 
- *  * Neither the name of Intel Corporation nor the names of its contributors 
- *    may be used to endorse or promote products derived from this software 
- *    without specific prior written permission. 
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
- * POSSIBILITY OF SUCH DAMAGE. 
- */ 
- 
-/** \file 
- * \brief Graph fuzzer for approximate matching 
- */ 
- 
-#include "ng_fuzzy.h" 
- 
-#include "ng.h" 
-#include "ng_depth.h" 
-#include "ng_util.h" 
- 
-#include <map> 
-#include <vector> 
-using namespace std; 
- 
-namespace ue2 { 
- 
-// returns all successors up to a given depth in a vector of sets, indexed by 
-// zero-based depth from source vertex 
-static 
-vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g, 
-                                                    NFAVertex src, u32 depth) { 
-    vector<flat_set<NFAVertex>> result(depth); 
-    flat_set<NFAVertex> cur, next; 
- 
-    assert(depth > 0); 
- 
-    // populate current set of successors 
-    for (auto v : adjacent_vertices_range(src, g)) { 
-        // ignore self-loops 
-        if (src == v) { 
-            continue; 
-        } 
-        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); 
-        cur.insert(v); 
-    } 
-    result[0] = cur; 
- 
-    for (unsigned d = 1; d < depth; d++) { 
-        // collect all successors for all current level vertices 
-        for (auto v : cur) { 
-            // don't go past special nodes 
-            if (is_special(v, g)) { 
-                continue; 
-            } 
- 
-            for (auto succ : adjacent_vertices_range(v, g)) { 
-                // ignore self-loops 
-                if (v == succ) { 
-                    continue; 
-                } 
-                DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); 
-                next.insert(succ); 
-            } 
-        } 
-        result[d] = next; 
-        next.swap(cur); 
-        next.clear(); 
-    } 
- 
-    return result; 
-} 
- 
-// returns all predecessors up to a given depth in a vector of sets, indexed by 
-// zero-based depth from source vertex 
-static 
-vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g, 
-                                                      NFAVertex src, 
-                                                      u32 depth) { 
-    vector<flat_set<NFAVertex>> result(depth); 
-    flat_set<NFAVertex> cur, next; 
- 
-    assert(depth > 0); 
- 
-    // populate current set of successors 
-    for (auto v : inv_adjacent_vertices_range(src, g)) { 
-        // ignore self-loops 
-        if (src == v) { 
-            continue; 
-        } 
-        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); 
-        cur.insert(v); 
-    } 
-    result[0] = cur; 
- 
-    for (unsigned d = 1; d < depth; d++) { 
-        // collect all successors for all current level vertices 
-        for (auto v : cur) { 
-            for (auto pred : inv_adjacent_vertices_range(v, g)) { 
-                // ignore self-loops 
-                if (v == pred) { 
-                    continue; 
-                } 
-                DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); 
-                next.insert(pred); 
-            } 
-        } 
-        result[d] = next; 
-        next.swap(cur); 
-        next.clear(); 
-    } 
- 
-    return result; 
-} 
- 
-/* 
- * This struct produces a fuzzed graph; that is, a graph that is able to match 
- * the original pattern, as well as input data within a certain edit distance. 
- * Construct the struct, then call fuzz_graph() to transform the graph. 
- * 
- * Terminology used: 
- * - Shadow vertices: vertices mirroring the original graph at various edit 
- * distances 
- * - Shadow graph level: edit distance of a particular shadow graph 
- * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace 
- */ 
-struct ShadowGraph { 
-    NGHolder &g; 
-    u32 edit_distance; 
-    bool hamming; 
-    map<pair<NFAVertex, u32>, NFAVertex> shadow_map; 
-    map<pair<NFAVertex, u32>, NFAVertex> helper_map; 
-    map<NFAVertex, NFAVertex> clones; 
-    // edge creation is deferred 
-    vector<pair<NFAVertex, NFAVertex>> edges_to_be_added; 
-    flat_set<NFAVertex> orig; 
- 
-    ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in) 
-        : g(g_in), edit_distance(ed_in), hamming(hamm_in) {} 
- 
-    void fuzz_graph() { 
-        if (edit_distance == 0) { 
-            return; 
-        } 
- 
-        DEBUG_PRINTF("edit distance = %u hamming = %s\n", edit_distance, 
-                     hamming ? "true" : "false"); 
- 
-        // step 1: prepare the vertices, helpers and shadows according to 
-        // the original graph 
-        prepare_graph(); 
- 
-        // step 2: add shadow and helper nodes 
-        build_shadow_graph(); 
- 
-        // step 3: set up reports for newly created vertices (and make clones 
-        // if necessary) 
-        if (!hamming) { 
-            create_reports(); 
-        } 
- 
-        // step 4: wire up shadow graph and helpers for insert/replace/remove 
-        connect_shadow_graph(); 
- 
-        // step 5: commit all the edge wirings 
-        DEBUG_PRINTF("Committing edge wirings\n"); 
-        for (const auto &p : edges_to_be_added) { 
-            add_edge_if_not_present(p.first, p.second, g); 
-        } 
- 
-        DEBUG_PRINTF("Done!\n"); 
-    } 
- 
-private: 
-    const NFAVertex& get_clone(const NFAVertex &v) { 
-        return contains(clones, v) ? 
-                    clones[v] : v; 
-    } 
- 
-    void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { 
-        const NFAVertex &clone_u = get_clone(u); 
-        const NFAVertex &clone_v = get_clone(v); 
- 
-        edges_to_be_added.emplace_back(u, v); 
-        DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index); 
- 
-        // do not connect clones to accepts, we do it during cloning 
-        if (is_any_accept(clone_v, g)) { 
-            return; 
-        } 
-        edges_to_be_added.emplace_back(clone_u, clone_v); 
-        DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index, 
-                     g[clone_v].index); 
-    } 
- 
-    void prepare_graph() { 
-        DEBUG_PRINTF("Building shadow graphs\n"); 
- 
-        for (auto v : vertices_range(g)) { 
-            // all level 0 vertices are their own helpers and their own shadows 
-            helper_map[make_pair(v, 0)] = v; 
-            shadow_map[make_pair(v, 0)] = v; 
- 
-            // find special nodes 
-            if (is_any_accept(v, g)) { 
-                DEBUG_PRINTF("Node %zu is a special node\n", g[v].index); 
-                for (unsigned edit = 1; edit <= edit_distance; edit++) { 
-                    // all accepts are their own shadows and helpers at all 
-                    // levels 
-                    shadow_map[make_pair(v, edit)] = v; 
-                    helper_map[make_pair(v, edit)] = v; 
-                } 
-                continue; 
-            } 
-            DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index); 
-            orig.insert(v); 
-        } 
-    } 
- 
-    void build_shadow_graph() { 
-        for (auto v : orig) { 
-            DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n", 
-                         g[v].index); 
-            for (unsigned dist = 1; dist <= edit_distance; dist++) { 
-                auto shadow_v = v; 
- 
-                // start and startDs cannot have shadows but do have helpers 
-                if (!is_any_start(v, g)) { 
-                    shadow_v = clone_vertex(g, v); 
-                    DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", 
-                                 g[shadow_v].index, dist); 
-                } 
-                shadow_map[make_pair(v, dist)] = shadow_v; 
- 
-                // if there's nowhere to go from this vertex, no helper needed 
-                if (proper_out_degree(v, g) < 1) { 
-                    DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", 
-                                 g[shadow_v].index, dist); 
-                    helper_map[make_pair(v, dist)] = shadow_v; 
-                    continue; 
-                } 
- 
-                // start and startDs only have helpers for insert, so not Hamming 
-                if (hamming && is_any_start(v, g)) { 
-                    DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n", 
-                                 g[shadow_v].index, dist); 
-                    helper_map[make_pair(v, dist)] = shadow_v; 
-                    continue; 
-                } 
- 
-                auto helper_v = clone_vertex(g, v); 
-                DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", 
-                             g[helper_v].index, dist); 
- 
-                // this is a helper, so make it a dot 
-                g[helper_v].char_reach = CharReach::dot(); 
-                // do not copy virtual start's assert flags 
-                if (is_virtual_start(v, g)) { 
-                    DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n", 
-                             g[helper_v].index, dist); 
-                    g[helper_v].assert_flags = 0; 
-                } 
-                helper_map[make_pair(v, dist)] = helper_v; 
-            } 
-        } 
-    } 
- 
-    // wire up successors according to the original graph, wire helpers 
-    // to shadow successors (insert/replace) 
-    void connect_succs(NFAVertex v, u32 dist) { 
-        DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n", 
-                     g[v].index, dist); 
-        const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; 
-        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; 
- 
-        // multiple insert 
-        if (!hamming && dist > 1) { 
-            const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; 
-            connect_to_clones(prev_level_helper, cur_shadow_helper); 
-        } 
- 
-        for (auto orig_dst : adjacent_vertices_range(v, g)) { 
-            const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; 
- 
-            connect_to_clones(cur_shadow_v, shadow_dst); 
- 
-            // ignore startDs for insert/replace 
-            if (orig_dst == g.startDs) { 
-                continue; 
-            } 
- 
-            connect_to_clones(cur_shadow_helper, shadow_dst); 
-        } 
-    } 
- 
-    // wire up predecessors according to the original graph, wire 
-    // predecessors to helpers (replace), wire predecessor helpers to 
-    // helpers (multiple replace) 
-    void connect_preds(NFAVertex v, u32 dist) { 
-        DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n", 
-                     g[v].index, dist); 
-        const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; 
-        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; 
- 
-        auto orig_src_vertices = inv_adjacent_vertices_range(v, g); 
-        for (auto orig_src : orig_src_vertices) { 
-            // ignore edges from start to startDs 
-            if (v == g.startDs && orig_src == g.start) { 
-                continue; 
-            } 
-            // ignore self-loops for replace 
-            if (orig_src != v) { 
-                // do not wire a replace node for start vertices if we 
-                // have a virtual start 
-                if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { 
-                    continue; 
-                } 
- 
-                if (dist) { 
-                    const auto &prev_level_src = 
-                        shadow_map[make_pair(orig_src, dist - 1)]; 
-                    const auto &prev_level_helper = 
-                        helper_map[make_pair(orig_src, dist - 1)]; 
- 
-                    connect_to_clones(prev_level_src, cur_shadow_helper); 
-                    connect_to_clones(prev_level_helper, cur_shadow_helper); 
-                } 
-            } 
-            // wire predecessor according to original graph 
-            const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; 
- 
-            connect_to_clones(shadow_src, cur_shadow_v); 
-        } 
-    } 
- 
-    // wire up previous level helper to current shadow (insert) 
-    void connect_helpers(NFAVertex v, u32 dist) { 
-        DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n", 
-                     g[v].index, dist); 
-        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; 
-        auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; 
- 
-        connect_to_clones(prev_level_v, cur_shadow_helper); 
-    } 
- 
-    /* 
-     * wiring edges for removal is a special case. 
-     * 
-     * when wiring edges for removal, as well as wiring up immediate 
-     * predecessors to immediate successors, we also need to wire up more 
-     * distant successors to their respective shadow graph levels. 
-     * 
-     * for example, consider graph start->a->b->c->d->accept. 
-     * 
-     * at edit distance 1, we need remove edges start->b, a->c, b->d, and 
-     * c->accept, all going from original graph (level 0) to shadow graph 
-     * level 1. 
-     * 
-     * at edit distance 2, we also need edges start->c, a->d and b->accept, 
-     * all going from level 0 to shadow graph level 2. 
-     * 
-     * this is propagated to all shadow levels; that is, given edit 
-     * distance 3, we will have edges from shadow levels 0->1, 0->2, 
-     * 0->3, 1->2, 1->3, and 2->3. 
-     * 
-     * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, 
-     * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. 
-     * 
-     * we also have to wire helpers to their removal successors, to 
-     * accommodate for a replace followed by a remove, on all shadow levels. 
-     * 
-     * and finally, we also have to wire source shadows into removal 
-     * successor helpers on a level above, to accommodate for a remove 
-     * followed by a replace. 
-     */ 
-    void connect_removals(NFAVertex v) { 
-        DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index); 
- 
-        // vertices returned by this function don't include self-loops 
-        auto dst_vertices_by_depth = 
-            gatherSuccessorsByDepth(g, v, edit_distance); 
-        auto orig_src_vertices = inv_adjacent_vertices_range(v, g); 
-        for (auto orig_src : orig_src_vertices) { 
-            // ignore self-loops 
-            if (orig_src == v) { 
-                continue; 
-            } 
-            for (unsigned step = 1; step <= edit_distance; step++) { 
-                for (unsigned dist = step; dist <= edit_distance; dist++) { 
-                    auto &dst_vertices = dst_vertices_by_depth[step - 1]; 
-                    for (auto &orig_dst : dst_vertices) { 
-                        const auto &shadow_src = 
-                            shadow_map[make_pair(orig_src, dist - step)]; 
-                        const auto &shadow_helper = 
-                            helper_map[make_pair(orig_src, dist - step)]; 
-                        const auto &shadow_dst = 
-                                shadow_map[make_pair(orig_dst, dist)]; 
- 
-                        // removal 
-                        connect_to_clones(shadow_src, shadow_dst); 
- 
-                        // removal from helper vertex 
-                        connect_to_clones(shadow_helper, shadow_dst); 
- 
-                        // removal into helper, requires additional edit 
-                        if ((dist + 1) <= edit_distance) { 
-                            const auto &next_level_helper = 
-                                    helper_map[make_pair(orig_dst, dist + 1)]; 
- 
-                            connect_to_clones(shadow_src, next_level_helper); 
-                        } 
-                    } 
-                } 
-            } 
-        } 
-    } 
- 
-    void connect_shadow_graph() { 
-        DEBUG_PRINTF("Wiring up the graph\n"); 
- 
-        for (auto v : orig) { 
- 
-            DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index); 
- 
-            for (unsigned dist = 0; dist <= edit_distance; dist++) { 
- 
-                // handle insert/replace 
-                connect_succs(v, dist); 
- 
-                // handle replace/multiple insert 
-                connect_preds(v, dist); 
- 
-                // handle helpers 
-                if (!hamming && dist > 0) { 
-                    connect_helpers(v, dist); 
-                } 
-            } 
- 
-            // handle removals 
-            if (!hamming) { 
-                connect_removals(v); 
-            } 
-        } 
-    } 
- 
-    void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) { 
-        for (auto dst : targets) { 
-            DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index, 
-                         g[dst].index); 
-            edges_to_be_added.emplace_back(src, dst); 
-        } 
-    } 
- 
-    // create a clone of the vertex, but overwrite its report set 
-    void create_clone(NFAVertex v, const flat_set<ReportID> &reports, 
-                      unsigned max_edit_distance, 
-                      const flat_set<NFAVertex> &targets) { 
-        // some vertices may have the same reports, but different successors; 
-        // therefore, we may need to connect them multiple times, but still only 
-        // clone once 
-        bool needs_cloning = !contains(clones, v); 
- 
-        DEBUG_PRINTF("Cloning node %zu\n", g[v].index); 
-        // go through all shadows and helpers, including 
-        // original vertex 
-        for (unsigned d = 0; d < max_edit_distance; d++) { 
-            auto shadow_v = shadow_map[make_pair(v, d)]; 
-            auto helper_v = helper_map[make_pair(v, d)]; 
- 
-            NFAVertex new_shadow_v, new_helper_v; 
- 
-            // make sure we don't clone the same vertex twice 
-            if (needs_cloning) { 
-                new_shadow_v = clone_vertex(g, shadow_v); 
-                DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", 
-                             g[new_shadow_v].index, d); 
-                clones[shadow_v] = new_shadow_v; 
-            } else { 
-                new_shadow_v = clones[shadow_v]; 
-            } 
-            g[new_shadow_v].reports = reports; 
- 
-            connect_to_targets(new_shadow_v, targets); 
- 
-            if (shadow_v == helper_v) { 
-                continue; 
-            } 
-            if (needs_cloning) { 
-                new_helper_v = clone_vertex(g, helper_v); 
-                DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", 
-                             g[new_helper_v].index, d); 
-                clones[helper_v] = new_helper_v; 
-            } else { 
-                new_helper_v = clones[helper_v]; 
-            } 
-            g[new_helper_v].reports = reports; 
- 
-            connect_to_targets(new_helper_v, targets); 
-        } 
-    } 
- 
-    void write_reports(NFAVertex v, const flat_set<ReportID> &reports, 
-                       unsigned max_edit_distance, 
-                       const flat_set<NFAVertex> &targets) { 
-        // we're overwriting reports, but we're not losing any 
-        // information as we already cached all the different report 
-        // sets, so vertices having different reports will be cloned and set up 
-        // with the correct report set 
- 
-        // go through all shadows and helpers, including original 
-        // vertex 
-        for (unsigned d = 0; d < max_edit_distance; d++) { 
-            auto shadow_v = shadow_map[make_pair(v, d)]; 
-            auto helper_v = helper_map[make_pair(v, d)]; 
-            DEBUG_PRINTF("Setting up reports for shadow node: %zu " 
-                         "(level %u)\n", 
-                         g[shadow_v].index, d); 
-            DEBUG_PRINTF("Setting up reports for helper node: %zu " 
-                         "(level %u)\n", 
-                         g[helper_v].index, d); 
-            g[shadow_v].reports = reports; 
-            g[helper_v].reports = reports; 
- 
-            connect_to_targets(shadow_v, targets); 
-            connect_to_targets(helper_v, targets); 
-        } 
-    } 
- 
-    /* 
-     * we may have multiple report sets per graph. that means, whenever we 
-     * construct additional paths through the graph (alternations, removals), we 
-     * have to account for the fact that some vertices are predecessors to 
-     * vertices with different report sets. 
-     * 
-     * whenever that happens, we have to clone the paths for both report sets, 
-     * and set up these new vertices with their respective report sets as well. 
-     * 
-     * in order to do that, we first have to get all the predecessors for accept 
-     * and acceptEod vertices. then, go through them one by one, and take note 
-     * of the report lists. the first report set we find, wins, the rest we 
-     * clone. 
-     * 
-     * we also have to do this in two passes, because there may be vertices that 
-     * are predecessors to vertices with different report sets, so to avoid 
-     * overwriting reports we will be caching reports info instead. 
-     */ 
-    void create_reports() { 
-        map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices; 
-        flat_set<NFAVertex> accepts{g.accept, g.acceptEod}; 
- 
-        // gather reports info from all vertices connected to accept 
-        for (auto accept : accepts) { 
-            for (auto src : inv_adjacent_vertices_range(accept, g)) { 
-                // skip special vertices 
-                if (is_special(src, g)) { 
-                    continue; 
-                } 
-                reports_to_vertices[g[src].reports].insert(src); 
-            } 
-        } 
- 
-        // we expect to see at most two report sets 
-        assert(reports_to_vertices.size() > 0 && 
-               reports_to_vertices.size() <= 2); 
- 
-        // set up all reports 
-        bool clone = false; 
-        for (auto &pair : reports_to_vertices) { 
-            const auto &reports = pair.first; 
-            const auto &vertices = pair.second; 
- 
-            for (auto src : vertices) { 
-                // get all predecessors up to edit distance 
-                auto src_vertices_by_depth = 
-                        gatherPredecessorsByDepth(g, src, edit_distance); 
- 
-                // find which accepts source vertex connects to 
-                flat_set<NFAVertex> targets; 
-                for (const auto &accept : accepts) { 
-                    NFAEdge e = edge(src, accept, g); 
-                    if (e) { 
-                        targets.insert(accept); 
-                    } 
-                } 
-                assert(targets.size()); 
- 
-                for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { 
-                    const auto &preds = src_vertices_by_depth[d]; 
-                    for (auto v : preds) { 
-                        // only clone a node if it already contains reports 
-                        if (clone && !g[v].reports.empty()) { 
-                            create_clone(v, reports, edit_distance - d, 
-                                         targets); 
-                        } else { 
-                            write_reports(v, reports, edit_distance - d, 
-                                          targets); 
-                        } 
-                    } 
-                } 
-            } 
-            // clone vertices only if it's not our first report set 
-            clone = true; 
-        } 
-    } 
-}; 
- 
-// check if we will edit our way into a vacuous pattern 
-static 
-bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { 
-    auto depths = calcRevDepths(g); 
- 
-    depth min_depth = depth::infinity(); 
-    auto idx = g[g.start].index; 
- 
-    // check distance from start to accept/acceptEod 
-    if (depths[idx].toAccept.min.is_finite()) { 
-        min_depth = min(depths[idx].toAccept.min, min_depth); 
-    } 
-    if (depths[idx].toAcceptEod.min.is_finite()) { 
-        min_depth = min(depths[idx].toAcceptEod.min, min_depth); 
-    } 
- 
-    idx = g[g.startDs].index; 
- 
-    // check distance from startDs to accept/acceptEod 
-    if (depths[idx].toAccept.min.is_finite()) { 
-        min_depth = min(depths[idx].toAccept.min, min_depth); 
-    } 
-    if (depths[idx].toAcceptEod.min.is_finite()) { 
-        min_depth = min(depths[idx].toAcceptEod.min, min_depth); 
-    } 
- 
-    assert(min_depth.is_finite()); 
- 
-    // now, check if we can edit our way into a vacuous pattern 
-    if (min_depth <= (u64a) edit_distance + 1) { 
-        DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n"); 
-        return true; 
-    } 
-    return false; 
-} 
- 
-void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, 
-                            bool utf8, const Grey &grey) { 
-    if (edit_distance == 0) { 
-        return; 
-    } 
-    if (!grey.allowApproximateMatching) { 
-        throw CompileError("Approximate matching is disabled."); 
-    } 
-    if (edit_distance > grey.maxEditDistance) { 
-        throw CompileError("Edit distance is too big."); 
-    } 
-    if (utf8) { 
-        throw CompileError("UTF-8 is disallowed for approximate matching."); 
-    } 
-    // graph isn't fuzzable if there are edge assertions anywhere in the graph 
-    for (auto e : edges_range(g)) { 
-        if (g[e].assert_flags) { 
-            throw CompileError("Zero-width assertions are disallowed for " 
-                               "approximate matching."); 
-        } 
-    } 
-    if (!hamming && will_turn_vacuous(g, edit_distance)) { 
-        throw CompileError("Approximate matching patterns that reduce to " 
-                           "vacuous patterns are disallowed."); 
-    } 
-} 
- 
-void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, 
-                const Grey &grey) { 
-    if (edit_distance == 0) { 
-        return; 
-    } 
- 
-    assert(grey.allowApproximateMatching); 
-    assert(grey.maxEditDistance >= edit_distance); 
- 
-    ShadowGraph sg(g, edit_distance, hamming); 
-    sg.fuzz_graph(); 
- 
-    // For safety, enforce limit on actual vertex count. 
-    if (num_vertices(g) > grey.limitApproxMatchingVertices) { 
-        DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g), 
-                     grey.limitApproxMatchingVertices); 
-        throw ResourceLimitError(); 
-    } 
-} 
- 
-} // namespace ue2 
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Graph fuzzer for approximate matching
+ */
+
+#include "ng_fuzzy.h"
+
+#include "ng.h"
+#include "ng_depth.h"
+#include "ng_util.h"
+
+#include <map>
+#include <vector>
+using namespace std;
+
+namespace ue2 {
+
+// returns all successors up to a given depth in a vector of sets, indexed by
+// zero-based depth from source vertex
+static
+vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g,
+                                                    NFAVertex src, u32 depth) {
+    vector<flat_set<NFAVertex>> result(depth);
+    flat_set<NFAVertex> cur, next;
+
+    assert(depth > 0);
+
+    // populate current set of successors
+    for (auto v : adjacent_vertices_range(src, g)) {
+        // ignore self-loops
+        if (src == v) {
+            continue;
+        }
+        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
+        cur.insert(v);
+    }
+    result[0] = cur;
+
+    for (unsigned d = 1; d < depth; d++) {
+        // collect all successors for all current level vertices
+        for (auto v : cur) {
+            // don't go past special nodes
+            if (is_special(v, g)) {
+                continue;
+            }
+
+            for (auto succ : adjacent_vertices_range(v, g)) {
+                // ignore self-loops
+                if (v == succ) {
+                    continue;
+                }
+                DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1);
+                next.insert(succ);
+            }
+        }
+        result[d] = next;
+        next.swap(cur);
+        next.clear();
+    }
+
+    return result;
+}
+
+// returns all predecessors up to a given depth in a vector of sets, indexed by
+// zero-based depth from source vertex
+static
+vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g,
+                                                      NFAVertex src,
+                                                      u32 depth) {
+    vector<flat_set<NFAVertex>> result(depth);
+    flat_set<NFAVertex> cur, next;
+
+    assert(depth > 0);
+
+    // populate current set of successors
+    for (auto v : inv_adjacent_vertices_range(src, g)) {
+        // ignore self-loops
+        if (src == v) {
+            continue;
+        }
+        DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
+        cur.insert(v);
+    }
+    result[0] = cur;
+
+    for (unsigned d = 1; d < depth; d++) {
+        // collect all successors for all current level vertices
+        for (auto v : cur) {
+            for (auto pred : inv_adjacent_vertices_range(v, g)) {
+                // ignore self-loops
+                if (v == pred) {
+                    continue;
+                }
+                DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1);
+                next.insert(pred);
+            }
+        }
+        result[d] = next;
+        next.swap(cur);
+        next.clear();
+    }
+
+    return result;
+}
+
+/*
+ * This struct produces a fuzzed graph; that is, a graph that is able to match
+ * the original pattern, as well as input data within a certain edit distance.
+ * Construct the struct, then call fuzz_graph() to transform the graph.
+ *
+ * Terminology used:
+ * - Shadow vertices: vertices mirroring the original graph at various edit
+ * distances
+ * - Shadow graph level: edit distance of a particular shadow graph
+ * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace
+ */
+struct ShadowGraph {
+    NGHolder &g;
+    u32 edit_distance;
+    bool hamming;
+    map<pair<NFAVertex, u32>, NFAVertex> shadow_map;
+    map<pair<NFAVertex, u32>, NFAVertex> helper_map;
+    map<NFAVertex, NFAVertex> clones;
+    // edge creation is deferred
+    vector<pair<NFAVertex, NFAVertex>> edges_to_be_added;
+    flat_set<NFAVertex> orig;
+
+    ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in)
+        : g(g_in), edit_distance(ed_in), hamming(hamm_in) {}
+
+    void fuzz_graph() {
+        if (edit_distance == 0) {
+            return;
+        }
+
+        DEBUG_PRINTF("edit distance = %u hamming = %s\n", edit_distance,
+                     hamming ? "true" : "false");
+
+        // step 1: prepare the vertices, helpers and shadows according to
+        // the original graph
+        prepare_graph();
+
+        // step 2: add shadow and helper nodes
+        build_shadow_graph();
+
+        // step 3: set up reports for newly created vertices (and make clones
+        // if necessary)
+        if (!hamming) {
+            create_reports();
+        }
+
+        // step 4: wire up shadow graph and helpers for insert/replace/remove
+        connect_shadow_graph();
+
+        // step 5: commit all the edge wirings
+        DEBUG_PRINTF("Committing edge wirings\n");
+        for (const auto &p : edges_to_be_added) {
+            add_edge_if_not_present(p.first, p.second, g);
+        }
+
+        DEBUG_PRINTF("Done!\n");
+    }
+
+private:
+    const NFAVertex& get_clone(const NFAVertex &v) {
+        return contains(clones, v) ?
+                    clones[v] : v;
+    }
+
+    void connect_to_clones(const NFAVertex &u, const NFAVertex &v) {
+        const NFAVertex &clone_u = get_clone(u);
+        const NFAVertex &clone_v = get_clone(v);
+
+        edges_to_be_added.emplace_back(u, v);
+        DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index);
+
+        // do not connect clones to accepts, we do it during cloning
+        if (is_any_accept(clone_v, g)) {
+            return;
+        }
+        edges_to_be_added.emplace_back(clone_u, clone_v);
+        DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index,
+                     g[clone_v].index);
+    }
+
+    void prepare_graph() {
+        DEBUG_PRINTF("Building shadow graphs\n");
+
+        for (auto v : vertices_range(g)) {
+            // all level 0 vertices are their own helpers and their own shadows
+            helper_map[make_pair(v, 0)] = v;
+            shadow_map[make_pair(v, 0)] = v;
+
+            // find special nodes
+            if (is_any_accept(v, g)) {
+                DEBUG_PRINTF("Node %zu is a special node\n", g[v].index);
+                for (unsigned edit = 1; edit <= edit_distance; edit++) {
+                    // all accepts are their own shadows and helpers at all
+                    // levels
+                    shadow_map[make_pair(v, edit)] = v;
+                    helper_map[make_pair(v, edit)] = v;
+                }
+                continue;
+            }
+            DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index);
+            orig.insert(v);
+        }
+    }
+
+    void build_shadow_graph() {
+        for (auto v : orig) {
+            DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n",
+                         g[v].index);
+            for (unsigned dist = 1; dist <= edit_distance; dist++) {
+                auto shadow_v = v;
+
+                // start and startDs cannot have shadows but do have helpers
+                if (!is_any_start(v, g)) {
+                    shadow_v = clone_vertex(g, v);
+                    DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n",
+                                 g[shadow_v].index, dist);
+                }
+                shadow_map[make_pair(v, dist)] = shadow_v;
+
+                // if there's nowhere to go from this vertex, no helper needed
+                if (proper_out_degree(v, g) < 1) {
+                    DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n",
+                                 g[shadow_v].index, dist);
+                    helper_map[make_pair(v, dist)] = shadow_v;
+                    continue;
+                }
+
+                // start and startDs only have helpers for insert, so not Hamming
+                if (hamming && is_any_start(v, g)) {
+                    DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n",
+                                 g[shadow_v].index, dist);
+                    helper_map[make_pair(v, dist)] = shadow_v;
+                    continue;
+                }
+
+                auto helper_v = clone_vertex(g, v);
+                DEBUG_PRINTF("New helper node ID: %zu (level %u)\n",
+                             g[helper_v].index, dist);
+
+                // this is a helper, so make it a dot
+                g[helper_v].char_reach = CharReach::dot();
+                // do not copy virtual start's assert flags
+                if (is_virtual_start(v, g)) {
+                    DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n",
+                             g[helper_v].index, dist);
+                    g[helper_v].assert_flags = 0;
+                }
+                helper_map[make_pair(v, dist)] = helper_v;
+            }
+        }
+    }
+
+    // wire up successors according to the original graph, wire helpers
+    // to shadow successors (insert/replace)
+    void connect_succs(NFAVertex v, u32 dist) {
+        DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n",
+                     g[v].index, dist);
+        const auto &cur_shadow_v = shadow_map[make_pair(v, dist)];
+        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)];
+
+        // multiple insert
+        if (!hamming && dist > 1) {
+            const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)];
+            connect_to_clones(prev_level_helper, cur_shadow_helper);
+        }
+
+        for (auto orig_dst : adjacent_vertices_range(v, g)) {
+            const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)];
+
+            connect_to_clones(cur_shadow_v, shadow_dst);
+
+            // ignore startDs for insert/replace
+            if (orig_dst == g.startDs) {
+                continue;
+            }
+
+            connect_to_clones(cur_shadow_helper, shadow_dst);
+        }
+    }
+
+    // wire up predecessors according to the original graph, wire
+    // predecessors to helpers (replace), wire predecessor helpers to
+    // helpers (multiple replace)
+    void connect_preds(NFAVertex v, u32 dist) {
+        DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n",
+                     g[v].index, dist);
+        const auto &cur_shadow_v = shadow_map[make_pair(v, dist)];
+        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)];
+
+        auto orig_src_vertices = inv_adjacent_vertices_range(v, g);
+        for (auto orig_src : orig_src_vertices) {
+            // ignore edges from start to startDs
+            if (v == g.startDs && orig_src == g.start) {
+                continue;
+            }
+            // ignore self-loops for replace
+            if (orig_src != v) {
+                // do not wire a replace node for start vertices if we
+                // have a virtual start
+                if (is_virtual_start(v, g) && is_any_start(orig_src, g)) {
+                    continue;
+                }
+
+                if (dist) {
+                    const auto &prev_level_src =
+                        shadow_map[make_pair(orig_src, dist - 1)];
+                    const auto &prev_level_helper =
+                        helper_map[make_pair(orig_src, dist - 1)];
+
+                    connect_to_clones(prev_level_src, cur_shadow_helper);
+                    connect_to_clones(prev_level_helper, cur_shadow_helper);
+                }
+            }
+            // wire predecessor according to original graph
+            const auto &shadow_src = shadow_map[make_pair(orig_src, dist)];
+
+            connect_to_clones(shadow_src, cur_shadow_v);
+        }
+    }
+
+    // wire up previous level helper to current shadow (insert)
+    void connect_helpers(NFAVertex v, u32 dist) {
+        DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n",
+                     g[v].index, dist);
+        const auto &cur_shadow_helper = helper_map[make_pair(v, dist)];
+        auto prev_level_v = shadow_map[make_pair(v, dist - 1)];
+
+        connect_to_clones(prev_level_v, cur_shadow_helper);
+    }
+
+    /*
+     * wiring edges for removal is a special case.
+     *
+     * when wiring edges for removal, as well as wiring up immediate
+     * predecessors to immediate successors, we also need to wire up more
+     * distant successors to their respective shadow graph levels.
+     *
+     * for example, consider graph start->a->b->c->d->accept.
+     *
+     * at edit distance 1, we need remove edges start->b, a->c, b->d, and
+     * c->accept, all going from original graph (level 0) to shadow graph
+     * level 1.
+     *
+     * at edit distance 2, we also need edges start->c, a->d and b->accept,
+     * all going from level 0 to shadow graph level 2.
+     *
+     * this is propagated to all shadow levels; that is, given edit
+     * distance 3, we will have edges from shadow levels 0->1, 0->2,
+     * 0->3, 1->2, 1->3, and 2->3.
+     *
+     * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2,
+     * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc.
+     *
+     * we also have to wire helpers to their removal successors, to
+     * accommodate for a replace followed by a remove, on all shadow levels.
+     *
+     * and finally, we also have to wire source shadows into removal
+     * successor helpers on a level above, to accommodate for a remove
+     * followed by a replace.
+     */
+    void connect_removals(NFAVertex v) {
+        DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index);
+
+        // vertices returned by this function don't include self-loops
+        auto dst_vertices_by_depth =
+            gatherSuccessorsByDepth(g, v, edit_distance);
+        auto orig_src_vertices = inv_adjacent_vertices_range(v, g);
+        for (auto orig_src : orig_src_vertices) {
+            // ignore self-loops
+            if (orig_src == v) {
+                continue;
+            }
+            for (unsigned step = 1; step <= edit_distance; step++) {
+                for (unsigned dist = step; dist <= edit_distance; dist++) {
+                    auto &dst_vertices = dst_vertices_by_depth[step - 1];
+                    for (auto &orig_dst : dst_vertices) {
+                        const auto &shadow_src =
+                            shadow_map[make_pair(orig_src, dist - step)];
+                        const auto &shadow_helper =
+                            helper_map[make_pair(orig_src, dist - step)];
+                        const auto &shadow_dst =
+                                shadow_map[make_pair(orig_dst, dist)];
+
+                        // removal
+                        connect_to_clones(shadow_src, shadow_dst);
+
+                        // removal from helper vertex
+                        connect_to_clones(shadow_helper, shadow_dst);
+
+                        // removal into helper, requires additional edit
+                        if ((dist + 1) <= edit_distance) {
+                            const auto &next_level_helper =
+                                    helper_map[make_pair(orig_dst, dist + 1)];
+
+                            connect_to_clones(shadow_src, next_level_helper);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void connect_shadow_graph() {
+        DEBUG_PRINTF("Wiring up the graph\n");
+
+        for (auto v : orig) {
+
+            DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index);
+
+            for (unsigned dist = 0; dist <= edit_distance; dist++) {
+
+                // handle insert/replace
+                connect_succs(v, dist);
+
+                // handle replace/multiple insert
+                connect_preds(v, dist);
+
+                // handle helpers
+                if (!hamming && dist > 0) {
+                    connect_helpers(v, dist);
+                }
+            }
+
+            // handle removals
+            if (!hamming) {
+                connect_removals(v);
+            }
+        }
+    }
+
+    void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) {
+        for (auto dst : targets) {
+            DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index,
+                         g[dst].index);
+            edges_to_be_added.emplace_back(src, dst);
+        }
+    }
+
+    // create a clone of the vertex, but overwrite its report set
+    void create_clone(NFAVertex v, const flat_set<ReportID> &reports,
+                      unsigned max_edit_distance,
+                      const flat_set<NFAVertex> &targets) {
+        // some vertices may have the same reports, but different successors;
+        // therefore, we may need to connect them multiple times, but still only
+        // clone once
+        bool needs_cloning = !contains(clones, v);
+
+        DEBUG_PRINTF("Cloning node %zu\n", g[v].index);
+        // go through all shadows and helpers, including
+        // original vertex
+        for (unsigned d = 0; d < max_edit_distance; d++) {
+            auto shadow_v = shadow_map[make_pair(v, d)];
+            auto helper_v = helper_map[make_pair(v, d)];
+
+            NFAVertex new_shadow_v, new_helper_v;
+
+            // make sure we don't clone the same vertex twice
+            if (needs_cloning) {
+                new_shadow_v = clone_vertex(g, shadow_v);
+                DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n",
+                             g[new_shadow_v].index, d);
+                clones[shadow_v] = new_shadow_v;
+            } else {
+                new_shadow_v = clones[shadow_v];
+            }
+            g[new_shadow_v].reports = reports;
+
+            connect_to_targets(new_shadow_v, targets);
+
+            if (shadow_v == helper_v) {
+                continue;
+            }
+            if (needs_cloning) {
+                new_helper_v = clone_vertex(g, helper_v);
+                DEBUG_PRINTF("New helper node ID: %zu (level %u)\n",
+                             g[new_helper_v].index, d);
+                clones[helper_v] = new_helper_v;
+            } else {
+                new_helper_v = clones[helper_v];
+            }
+            g[new_helper_v].reports = reports;
+
+            connect_to_targets(new_helper_v, targets);
+        }
+    }
+
+    void write_reports(NFAVertex v, const flat_set<ReportID> &reports,
+                       unsigned max_edit_distance,
+                       const flat_set<NFAVertex> &targets) {
+        // we're overwriting reports, but we're not losing any
+        // information as we already cached all the different report
+        // sets, so vertices having different reports will be cloned and set up
+        // with the correct report set
+
+        // go through all shadows and helpers, including original
+        // vertex
+        for (unsigned d = 0; d < max_edit_distance; d++) {
+            auto shadow_v = shadow_map[make_pair(v, d)];
+            auto helper_v = helper_map[make_pair(v, d)];
+            DEBUG_PRINTF("Setting up reports for shadow node: %zu "
+                         "(level %u)\n",
+                         g[shadow_v].index, d);
+            DEBUG_PRINTF("Setting up reports for helper node: %zu "
+                         "(level %u)\n",
+                         g[helper_v].index, d);
+            g[shadow_v].reports = reports;
+            g[helper_v].reports = reports;
+
+            connect_to_targets(shadow_v, targets);
+            connect_to_targets(helper_v, targets);
+        }
+    }
+
+    /*
+     * we may have multiple report sets per graph. that means, whenever we
+     * construct additional paths through the graph (alternations, removals), we
+     * have to account for the fact that some vertices are predecessors to
+     * vertices with different report sets.
+     *
+     * whenever that happens, we have to clone the paths for both report sets,
+     * and set up these new vertices with their respective report sets as well.
+     *
+     * in order to do that, we first have to get all the predecessors for accept
+     * and acceptEod vertices. then, go through them one by one, and take note
+     * of the report lists. the first report set we find, wins, the rest we
+     * clone.
+     *
+     * we also have to do this in two passes, because there may be vertices that
+     * are predecessors to vertices with different report sets, so to avoid
+     * overwriting reports we will be caching reports info instead.
+     */
+    void create_reports() {
+        map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices;
+        flat_set<NFAVertex> accepts{g.accept, g.acceptEod};
+
+        // gather reports info from all vertices connected to accept
+        for (auto accept : accepts) {
+            for (auto src : inv_adjacent_vertices_range(accept, g)) {
+                // skip special vertices
+                if (is_special(src, g)) {
+                    continue;
+                }
+                reports_to_vertices[g[src].reports].insert(src);
+            }
+        }
+
+        // we expect to see at most two report sets
+        assert(reports_to_vertices.size() > 0 &&
+               reports_to_vertices.size() <= 2);
+
+        // set up all reports
+        bool clone = false;
+        for (auto &pair : reports_to_vertices) {
+            const auto &reports = pair.first;
+            const auto &vertices = pair.second;
+
+            for (auto src : vertices) {
+                // get all predecessors up to edit distance
+                auto src_vertices_by_depth =
+                        gatherPredecessorsByDepth(g, src, edit_distance);
+
+                // find which accepts source vertex connects to
+                flat_set<NFAVertex> targets;
+                for (const auto &accept : accepts) {
+                    NFAEdge e = edge(src, accept, g);
+                    if (e) {
+                        targets.insert(accept);
+                    }
+                }
+                assert(targets.size());
+
+                for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) {
+                    const auto &preds = src_vertices_by_depth[d];
+                    for (auto v : preds) {
+                        // only clone a node if it already contains reports
+                        if (clone && !g[v].reports.empty()) {
+                            create_clone(v, reports, edit_distance - d,
+                                         targets);
+                        } else {
+                            write_reports(v, reports, edit_distance - d,
+                                          targets);
+                        }
+                    }
+                }
+            }
+            // clone vertices only if it's not our first report set
+            clone = true;
+        }
+    }
+};
+
+// check if we will edit our way into a vacuous pattern
+static
+bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) {
+    auto depths = calcRevDepths(g);
+
+    depth min_depth = depth::infinity();
+    auto idx = g[g.start].index;
+
+    // check distance from start to accept/acceptEod
+    if (depths[idx].toAccept.min.is_finite()) {
+        min_depth = min(depths[idx].toAccept.min, min_depth);
+    }
+    if (depths[idx].toAcceptEod.min.is_finite()) {
+        min_depth = min(depths[idx].toAcceptEod.min, min_depth);
+    }
+
+    idx = g[g.startDs].index;
+
+    // check distance from startDs to accept/acceptEod
+    if (depths[idx].toAccept.min.is_finite()) {
+        min_depth = min(depths[idx].toAccept.min, min_depth);
+    }
+    if (depths[idx].toAcceptEod.min.is_finite()) {
+        min_depth = min(depths[idx].toAcceptEod.min, min_depth);
+    }
+
+    assert(min_depth.is_finite());
+
+    // now, check if we can edit our way into a vacuous pattern
+    if (min_depth <= (u64a) edit_distance + 1) {
+        DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n");
+        return true;
+    }
+    return false;
+}
+
+void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming,
+                            bool utf8, const Grey &grey) {
+    if (edit_distance == 0) {
+        return;
+    }
+    if (!grey.allowApproximateMatching) {
+        throw CompileError("Approximate matching is disabled.");
+    }
+    if (edit_distance > grey.maxEditDistance) {
+        throw CompileError("Edit distance is too big.");
+    }
+    if (utf8) {
+        throw CompileError("UTF-8 is disallowed for approximate matching.");
+    }
+    // graph isn't fuzzable if there are edge assertions anywhere in the graph
+    for (auto e : edges_range(g)) {
+        if (g[e].assert_flags) {
+            throw CompileError("Zero-width assertions are disallowed for "
+                               "approximate matching.");
+        }
+    }
+    if (!hamming && will_turn_vacuous(g, edit_distance)) {
+        throw CompileError("Approximate matching patterns that reduce to "
+                           "vacuous patterns are disallowed.");
+    }
+}
+
+void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming,
+                const Grey &grey) {
+    if (edit_distance == 0) {
+        return;
+    }
+
+    assert(grey.allowApproximateMatching);
+    assert(grey.maxEditDistance >= edit_distance);
+
+    ShadowGraph sg(g, edit_distance, hamming);
+    sg.fuzz_graph();
+
+    // For safety, enforce limit on actual vertex count.
+    if (num_vertices(g) > grey.limitApproxMatchingVertices) {
+        DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g),
+                     grey.limitApproxMatchingVertices);
+        throw ResourceLimitError();
+    }
+}
+
+} // namespace ue2
author	Ivan Blinkov <ivan@blinkov.ru>	2022-02-10 16:47:11 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:11 +0300
commit	5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree	339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_fuzzy.cpp
parent	1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download	ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz