aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitc74559fb88da8adac0d9186cfa55a6b13c47695f (patch)
treeb83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/rose/rose_build_add.cpp
parentd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff)
downloadydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_add.cpp')
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add.cpp3438
1 files changed, 1719 insertions, 1719 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
index 2dc136c0f1..4929c95fce 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
@@ -1,675 +1,675 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_add_internal.h"
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "rose_build_anchored.h"
-#include "rose_in_util.h"
-#include "hwlm/hwlm_literal.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_prefilter.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_region.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_add_internal.h"
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "rose_build_anchored.h"
+#include "rose_in_util.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_prefilter.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_region.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/insertion_ordered.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-/**
- * \brief Data used by most of the construction code in this file.
- */
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+/**
+ * \brief Data used by most of the construction code in this file.
+ */
struct RoseBuildData : noncopyable {
- RoseBuildData(const RoseInGraph &ig_in, bool som_in)
- : ig(ig_in), som(som_in) {}
-
- /** Input rose graph. */
- const RoseInGraph &ig;
-
- /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
- * require ANCH history to prevent overlap. */
+ RoseBuildData(const RoseInGraph &ig_in, bool som_in)
+ : ig(ig_in), som(som_in) {}
+
+ /** Input rose graph. */
+ const RoseInGraph &ig;
+
+ /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
+ * require ANCH history to prevent overlap. */
unordered_set<RoseInEdge> anch_history_edges;
-
- /** True if we're tracking Start of Match. */
- bool som;
-};
-
-static
-ReportID findReportId(const NGHolder &g) {
- /* prefix/infix always have an edge to accept and only 1 reportid initially
- */
- assert(in_degree(g.accept, g));
- const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
- assert(!rep.empty());
- return *rep.begin();
-}
-
-static
-RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
- u32 max_offset) {
- RoseGraph &g = build->g;
- // add to tree
- RoseVertex v = add_vertex(g);
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
+
+ /** True if we're tracking Start of Match. */
+ bool som;
+};
+
+static
+ReportID findReportId(const NGHolder &g) {
+ /* prefix/infix always have an edge to accept and only 1 reportid initially
+ */
+ assert(in_degree(g.accept, g));
+ const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
+ assert(!rep.empty());
+ return *rep.begin();
+}
+
+static
+RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
+ u32 max_offset) {
+ RoseGraph &g = build->g;
+ // add to tree
+ RoseVertex v = add_vertex(g);
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index,
- literalId);
- g[v].literals.insert(literalId);
- build->literal_info[literalId].vertices.insert(v);
-
- return v;
-}
-
-RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
- u32 minBound, u32 maxBound, u32 literalId,
- size_t literalLength,
+ literalId);
+ g[v].literals.insert(literalId);
+ build->literal_info[literalId].vertices.insert(v);
+
+ return v;
+}
+
+RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
+ u32 minBound, u32 maxBound, u32 literalId,
+ size_t literalLength,
const flat_set<ReportID> &reports) {
- assert(parent != RoseGraph::null_vertex());
-
- RoseGraph &g = build->g;
- // add to tree (offsets set latter)
- RoseVertex v = createVertex(build, literalId, 0U, 0U);
-
- /* fill in report information */
- g[v].reports.insert(reports.begin(), reports.end());
-
+ assert(parent != RoseGraph::null_vertex());
+
+ RoseGraph &g = build->g;
+ // add to tree (offsets set latter)
+ RoseVertex v = createVertex(build, literalId, 0U, 0U);
+
+ /* fill in report information */
+ g[v].reports.insert(reports.begin(), reports.end());
+
RoseEdge e = add_edge(parent, v, g);
- DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
-
- g[e].minBound = minBound;
- g[e].maxBound = maxBound;
- g[e].rose_top = 0;
-
- u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
- u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
-
- /* take literal length into account for offsets */
- const u32 lit_len = verify_u32(literalLength);
- min_offset = add_rose_depth(min_offset, lit_len);
- max_offset = add_rose_depth(max_offset, lit_len);
-
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
- return v;
-}
-
-static
-RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
- u32 min_offset, u32 max_offset) {
- RoseGraph &g = build->g;
- RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
-
+ DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
+
+ g[e].minBound = minBound;
+ g[e].maxBound = maxBound;
+ g[e].rose_top = 0;
+
+ u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
+ u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
+
+ /* take literal length into account for offsets */
+ const u32 lit_len = verify_u32(literalLength);
+ min_offset = add_rose_depth(min_offset, lit_len);
+ max_offset = add_rose_depth(max_offset, lit_len);
+
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
+ u32 min_offset, u32 max_offset) {
+ RoseGraph &g = build->g;
+ RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
+
DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index,
- literalId);
-
+ literalId);
+
RoseEdge e = add_edge(build->anchored_root, v, g);
- g[e].minBound = min_offset;
- g[e].maxBound = max_offset;
-
- return v;
-}
-
-static
-RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
- RoseGraph &g = build->g;
- RoseVertex w = add_vertex(g[v], g);
+ g[e].minBound = min_offset;
+ g[e].maxBound = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
+ RoseGraph &g = build->g;
+ RoseVertex w = add_vertex(g[v], g);
DEBUG_PRINTF("added vertex %zu\n", g[w].index);
-
- for (auto lit_id : g[w].literals) {
- build->literal_info[lit_id].vertices.insert(w);
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseVertex s = source(e, g);
- add_edge(s, w, g[e], g);
+
+ for (auto lit_id : g[w].literals) {
+ build->literal_info[lit_id].vertices.insert(w);
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseVertex s = source(e, g);
+ add_edge(s, w, g[e], g);
DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index);
- }
-
- return w;
-}
-
-namespace {
-struct created_key {
- explicit created_key(const RoseInEdgeProps &trep)
+ }
+
+ return w;
+}
+
+namespace {
+struct created_key {
+ explicit created_key(const RoseInEdgeProps &trep)
: prefix(trep.graph.get()), lag(trep.graph_lag) {
- }
- bool operator<(const created_key &b) const {
- const created_key &a = *this;
- ORDER_CHECK(prefix);
- ORDER_CHECK(lag);
- return false;
- }
- NGHolder *prefix;
- u32 lag;
-};
-}
-
-static
-bool isPureAnchored(const NGHolder &h) {
- return !proper_out_degree(h.startDs, h);
-}
-
-static
-RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
- const RoseInEdge &rose_edge, const RoseEdge &e) {
- const RoseGraph &g = tbi.g;
- const RoseVertex u = source(e, g), v = target(e, g);
- const bool fixed_offset_src = g[u].fixedOffset();
- const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
-
- DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
+ }
+ bool operator<(const created_key &b) const {
+ const created_key &a = *this;
+ ORDER_CHECK(prefix);
+ ORDER_CHECK(lag);
+ return false;
+ }
+ NGHolder *prefix;
+ u32 lag;
+};
+}
+
+static
+bool isPureAnchored(const NGHolder &h) {
+ return !proper_out_degree(h.startDs, h);
+}
+
+static
+RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
+ const RoseInEdge &rose_edge, const RoseEdge &e) {
+ const RoseGraph &g = tbi.g;
+ const RoseVertex u = source(e, g), v = target(e, g);
+ const bool fixed_offset_src = g[u].fixedOffset();
+ const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
+
+ DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
- (int)g[u].fixedOffset(), (int)g[v].left);
-
- if (g[v].left) {
- // Roles with prefix engines have their history handled by that prefix.
- assert(!contains(bd.anch_history_edges, rose_edge));
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- if (contains(bd.anch_history_edges, rose_edge)) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- if (fixed_offset_src && has_bounds) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- return ROSE_ROLE_HISTORY_NONE;
-}
-
-static
-bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
- for (auto v : adjacent_vertices_range(iv, ig)) {
- if (ig[v].type != RIV_ACCEPT) {
- return true;
- }
- }
- return false;
-}
-
-static
-void createVertices(RoseBuildImpl *tbi,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, u32 min_offset, u32 max_offset,
- u32 literalId, u32 delay, const RoseBuildData &bd) {
- RoseGraph &g = tbi->g;
-
- DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
-
- map<created_key, RoseVertex> created;
-
- for (const auto &pv : parents) {
- RoseVertex w;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
- shared_ptr<NGHolder> prefix_graph = edge_props.graph;
- u32 prefix_lag = edge_props.graph_lag;
-
- created_key key(edge_props);
-
- if (!contains(created, key)) {
- assert(prefix_graph || !edge_props.haig);
- w = createVertex(tbi, literalId, min_offset, max_offset);
- created[key] = w;
-
- if (prefix_graph) {
- g[w].left.graph = prefix_graph;
+ (int)g[u].fixedOffset(), (int)g[v].left);
+
+ if (g[v].left) {
+ // Roles with prefix engines have their history handled by that prefix.
+ assert(!contains(bd.anch_history_edges, rose_edge));
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ if (contains(bd.anch_history_edges, rose_edge)) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ if (fixed_offset_src && has_bounds) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ return ROSE_ROLE_HISTORY_NONE;
+}
+
+static
+bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
+ for (auto v : adjacent_vertices_range(iv, ig)) {
+ if (ig[v].type != RIV_ACCEPT) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void createVertices(RoseBuildImpl *tbi,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, u32 min_offset, u32 max_offset,
+ u32 literalId, u32 delay, const RoseBuildData &bd) {
+ RoseGraph &g = tbi->g;
+
+ DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
+
+ map<created_key, RoseVertex> created;
+
+ for (const auto &pv : parents) {
+ RoseVertex w;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+ shared_ptr<NGHolder> prefix_graph = edge_props.graph;
+ u32 prefix_lag = edge_props.graph_lag;
+
+ created_key key(edge_props);
+
+ if (!contains(created, key)) {
+ assert(prefix_graph || !edge_props.haig);
+ w = createVertex(tbi, literalId, min_offset, max_offset);
+ created[key] = w;
+
+ if (prefix_graph) {
+ g[w].left.graph = prefix_graph;
if (edge_props.dfa) {
g[w].left.dfa = edge_props.dfa;
- }
- g[w].left.haig = edge_props.haig;
- g[w].left.lag = prefix_lag;
-
- // The graph already has its report id allocated - find it.
- g[w].left.leftfix_report = findReportId(*prefix_graph);
-
- if (g[w].left.dfa || g[w].left.haig) {
- assert(prefix_graph);
- g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
- g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
- }
- }
-
- if (bd.som && !g[w].left.haig) {
- /* no prefix - som based on literal start */
- assert(!prefix_graph);
+ }
+ g[w].left.haig = edge_props.haig;
+ g[w].left.lag = prefix_lag;
+
+ // The graph already has its report id allocated - find it.
+ g[w].left.leftfix_report = findReportId(*prefix_graph);
+
+ if (g[w].left.dfa || g[w].left.haig) {
+ assert(prefix_graph);
+ g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
+ g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
+ }
+ }
+
+ if (bd.som && !g[w].left.haig) {
+ /* no prefix - som based on literal start */
+ assert(!prefix_graph);
g[w].som_adjust = tbi->literals.at(literalId).elength();
- DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
- }
-
+ DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
+ }
+
DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index);
- vertex_map[iv].push_back(w);
- } else {
- w = created[key];
- }
-
+ vertex_map[iv].push_back(w);
+ } else {
+ w = created[key];
+ }
+
RoseVertex p = pv.first;
-
+
RoseEdge e = add_edge(p, w, g);
- DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
- edge_props.maxBound);
- g[e].minBound = edge_props.minBound;
- if (p != tbi->root && g[w].left.graph
- && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
- depth mw = findMaxWidth(*g[w].left.graph);
- if (mw.is_infinite()) {
- g[e].maxBound = ROSE_BOUND_INF;
- } else {
- DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
- prefix_lag);
- g[e].maxBound = prefix_lag + mw;
- }
- } else {
- g[e].maxBound = edge_props.maxBound;
- }
- g[e].rose_top = 0;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- }
-
- if (delay && hasSuccessorLiterals(iv, bd.ig)) {
- // Add an undelayed "ghost" vertex for this literal.
- u32 ghostId = tbi->literal_info[literalId].undelayed_id;
- DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
- assert(ghostId != literalId);
+ DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
+ edge_props.maxBound);
+ g[e].minBound = edge_props.minBound;
+ if (p != tbi->root && g[w].left.graph
+ && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
+ depth mw = findMaxWidth(*g[w].left.graph);
+ if (mw.is_infinite()) {
+ g[e].maxBound = ROSE_BOUND_INF;
+ } else {
+ DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
+ prefix_lag);
+ g[e].maxBound = prefix_lag + mw;
+ }
+ } else {
+ g[e].maxBound = edge_props.maxBound;
+ }
+ g[e].rose_top = 0;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ }
+
+ if (delay && hasSuccessorLiterals(iv, bd.ig)) {
+ // Add an undelayed "ghost" vertex for this literal.
+ u32 ghostId = tbi->literal_info[literalId].undelayed_id;
+ DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
+ assert(ghostId != literalId);
assert(tbi->literals.at(ghostId).delay == 0);
-
- // Adjust offsets, removing delay.
- u32 ghost_min = min_offset, ghost_max = max_offset;
- assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
- ghost_min -= delay;
- ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
-
- RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
-
- for (const auto &pv : parents) {
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
+ // Adjust offsets, removing delay.
+ u32 ghost_min = min_offset, ghost_max = max_offset;
+ assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
+ ghost_min -= delay;
+ ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
+
+ RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
+
+ for (const auto &pv : parents) {
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
RoseEdge e = add_edge(pv.first, g_v, tbi->g);
- g[e].minBound = edge_props.minBound;
- g[e].maxBound = edge_props.maxBound;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
- edge_props.minBound, edge_props.maxBound);
- }
-
- for (auto &m : created) {
- tbi->ghost[m.second] = g_v;
- }
- }
-}
-
-/* ensure the holder does not accept any paths which do not end with lit */
-static
-void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
+ g[e].minBound = edge_props.minBound;
+ g[e].maxBound = edge_props.maxBound;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
+ edge_props.minBound, edge_props.maxBound);
+ }
+
+ for (auto &m : created) {
+ tbi->ghost[m.second] = g_v;
+ }
+ }
+}
+
+/* ensure the holder does not accept any paths which do not end with lit */
+static
+void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str());
- set<NFAVertex> curr, next;
- curr.insert(g.accept);
- curr.insert(g.acceptEod);
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- next.clear();
- for (auto curr_v : curr) {
+ set<NFAVertex> curr, next;
+ curr.insert(g.accept);
+ curr.insert(g.acceptEod);
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ next.clear();
+ for (auto curr_v : curr) {
DEBUG_PRINTF("handling %zu\n", g[curr_v].index);
- vector<NFAVertex> next_cand;
- insert(&next_cand, next_cand.end(),
- inv_adjacent_vertices(curr_v, g));
- clear_in_edges(curr_v, g);
- if (curr_v == g.acceptEod) {
- add_edge(g.accept, g.acceptEod, g);
- }
-
- for (auto v : next_cand) {
- assert(v != g.startDs);
- if (v == g.start || v == g.startDs || v == g.accept) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
-
- if (!overlaps(*it, cr)) {
+ vector<NFAVertex> next_cand;
+ insert(&next_cand, next_cand.end(),
+ inv_adjacent_vertices(curr_v, g));
+ clear_in_edges(curr_v, g);
+ if (curr_v == g.acceptEod) {
+ add_edge(g.accept, g.acceptEod, g);
+ }
+
+ for (auto v : next_cand) {
+ assert(v != g.startDs);
+ if (v == g.start || v == g.startDs || v == g.accept) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+
+ if (!overlaps(*it, cr)) {
DEBUG_PRINTF("false edge %zu\n", g[v].index);
- continue;
- }
-
- NFAVertex v2 = clone_vertex(g, v);
- clone_in_edges(g, v, v2);
- add_edge(v2, curr_v, g);
- g[v2].char_reach &= *it;
+ continue;
+ }
+
+ NFAVertex v2 = clone_vertex(g, v);
+ clone_in_edges(g, v, v2);
+ add_edge(v2, curr_v, g);
+ g[v2].char_reach &= *it;
DEBUG_PRINTF("next <- %zu\n", g[v2].index);
- next.insert(v2);
- }
- }
-
- curr.swap(next);
- }
-
- pruneUseless(g);
+ next.insert(v2);
+ }
+ }
+
+ curr.swap(next);
+ }
+
+ pruneUseless(g);
clearReports(g);
- assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
- assert(allMatchStatesHaveReports(g));
-
- DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
-}
-
-static
-RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
- const RoseInVertexProps &iv_info,
- const RoseInEdgeProps &ep) {
- if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
- DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
- return RoseGraph::null_vertex(); /* TODO: better */
- }
-
- const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
- depth min_width(0), max_width(0);
-
- if (ep.graph.get()) {
- const depth graph_lag(ep.graph_lag);
- max_width = findMaxWidth(*ep.graph) + graph_lag;
- min_width = findMinWidth(*ep.graph) + graph_lag;
- if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
- max_width = depth::infinity();
- }
- }
-
- DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
-
- NGHolder h;
-
- if (ep.graph.get() && max_width <= anchored_max_depth) {
- cloneHolder(h, *ep.graph);
-
- /* add literal/dots */
- if (ep.graph_lag) {
- assert(ep.graph_lag == iv_info.s.length());
- appendLiteral(h, iv_info.s);
- } else {
- removeFalsePaths(h, iv_info.s);
- }
- } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
- && iv_info.s.length() + ep.maxBound
- <= tbi->cc.grey.maxAnchoredRegion) {
- if (ep.maxBound || ep.minBound) {
- /* TODO: handle, however these cases are not generated currently by
+ assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
+ assert(allMatchStatesHaveReports(g));
+
+ DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
+}
+
+static
+RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
+ const RoseInVertexProps &iv_info,
+ const RoseInEdgeProps &ep) {
+ if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
+ DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
+ return RoseGraph::null_vertex(); /* TODO: better */
+ }
+
+ const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
+ depth min_width(0), max_width(0);
+
+ if (ep.graph.get()) {
+ const depth graph_lag(ep.graph_lag);
+ max_width = findMaxWidth(*ep.graph) + graph_lag;
+ min_width = findMinWidth(*ep.graph) + graph_lag;
+ if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
+ max_width = depth::infinity();
+ }
+ }
+
+ DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
+
+ NGHolder h;
+
+ if (ep.graph.get() && max_width <= anchored_max_depth) {
+ cloneHolder(h, *ep.graph);
+
+ /* add literal/dots */
+ if (ep.graph_lag) {
+ assert(ep.graph_lag == iv_info.s.length());
+ appendLiteral(h, iv_info.s);
+ } else {
+ removeFalsePaths(h, iv_info.s);
+ }
+ } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
+ && iv_info.s.length() + ep.maxBound
+ <= tbi->cc.grey.maxAnchoredRegion) {
+ if (ep.maxBound || ep.minBound) {
+ /* TODO: handle, however these cases are not generated currently by
ng_violet */
- return RoseGraph::null_vertex();
- }
- max_width = depth(ep.maxBound + iv_info.s.length());
- min_width = depth(ep.minBound + iv_info.s.length());
- add_edge(h.start, h.accept, h);
- appendLiteral(h, iv_info.s);
- } else {
- return RoseGraph::null_vertex();
- }
-
- u32 anchored_exit_id = tbi->getNewLiteralId();
- u32 remap_id = 0;
- DEBUG_PRINTF(" trying to add dfa stuff\n");
- int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
-
- if (rv == ANCHORED_FAIL) {
- return RoseGraph::null_vertex();
- } else if (rv == ANCHORED_REMAP) {
- anchored_exit_id = remap_id;
- } else {
- assert(rv == ANCHORED_SUCCESS);
- }
-
- // Store the literal itself in a side structure so that we can use it for
- // overlap calculations later. This may be obsolete when the old Rose
- // construction path (and its history selection code) goes away.
- rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
- tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
-
- assert(min_width <= anchored_max_depth);
- assert(max_width <= anchored_max_depth);
- assert(min_width <= max_width);
-
- /* Note: bounds are end-to-end as anchored lits are considered
- * to have 0 length. */
- RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
- max_width);
- return v;
-}
-
-static
-u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
- const RoseInVertexProps &succ_vp) {
- /* we need to ensure there is enough history to find the successor literal
- * when we enable its group.
- */
-
- if (!ep.graph.get()) {
- return 0; /* non overlapping */
- }
- depth graph_min_width = findMinWidth(*ep.graph);
- u32 min_width = ep.graph_lag + graph_min_width;
- u32 s_len = succ_vp.s.length();
-
- if (s_len <= min_width) {
- return 0; /* no overlap */
- }
-
- u32 overlap = s_len - min_width;
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- return overlap;
-}
-
-static
-void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
- vector<u8> &cmp) {
- if (lag >= HWLM_MASKLEN) {
- msk.clear(); cmp.clear();
- return;
- }
-
- assert(in_degree(h.acceptEod, h) == 1); // no eod reports
-
- // Start with the set of reporter vertices for this rose.
- set<NFAVertex> curr, next;
- insert(&curr, inv_adjacent_vertices(h.accept, h));
- assert(!curr.empty());
-
- msk.assign(HWLM_MASKLEN, 0);
- cmp.assign(HWLM_MASKLEN, 0);
- size_t i = HWLM_MASKLEN - lag - 1;
- do {
- if (curr.empty() || contains(curr, h.start) ||
- contains(curr, h.startDs)) {
- DEBUG_PRINTF("end of the road\n");
- break;
- }
-
- next.clear();
- CharReach cr;
- for (auto v : curr) {
+ return RoseGraph::null_vertex();
+ }
+ max_width = depth(ep.maxBound + iv_info.s.length());
+ min_width = depth(ep.minBound + iv_info.s.length());
+ add_edge(h.start, h.accept, h);
+ appendLiteral(h, iv_info.s);
+ } else {
+ return RoseGraph::null_vertex();
+ }
+
+ u32 anchored_exit_id = tbi->getNewLiteralId();
+ u32 remap_id = 0;
+ DEBUG_PRINTF(" trying to add dfa stuff\n");
+ int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
+
+ if (rv == ANCHORED_FAIL) {
+ return RoseGraph::null_vertex();
+ } else if (rv == ANCHORED_REMAP) {
+ anchored_exit_id = remap_id;
+ } else {
+ assert(rv == ANCHORED_SUCCESS);
+ }
+
+ // Store the literal itself in a side structure so that we can use it for
+ // overlap calculations later. This may be obsolete when the old Rose
+ // construction path (and its history selection code) goes away.
+ rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
+ tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
+
+ assert(min_width <= anchored_max_depth);
+ assert(max_width <= anchored_max_depth);
+ assert(min_width <= max_width);
+
+ /* Note: bounds are end-to-end as anchored lits are considered
+ * to have 0 length. */
+ RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
+ max_width);
+ return v;
+}
+
+static
+u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
+ const RoseInVertexProps &succ_vp) {
+ /* we need to ensure there is enough history to find the successor literal
+ * when we enable its group.
+ */
+
+ if (!ep.graph.get()) {
+ return 0; /* non overlapping */
+ }
+ depth graph_min_width = findMinWidth(*ep.graph);
+ u32 min_width = ep.graph_lag + graph_min_width;
+ u32 s_len = succ_vp.s.length();
+
+ if (s_len <= min_width) {
+ return 0; /* no overlap */
+ }
+
+ u32 overlap = s_len - min_width;
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ return overlap;
+}
+
+static
+void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
+ vector<u8> &cmp) {
+ if (lag >= HWLM_MASKLEN) {
+ msk.clear(); cmp.clear();
+ return;
+ }
+
+ assert(in_degree(h.acceptEod, h) == 1); // no eod reports
+
+ // Start with the set of reporter vertices for this rose.
+ set<NFAVertex> curr, next;
+ insert(&curr, inv_adjacent_vertices(h.accept, h));
+ assert(!curr.empty());
+
+ msk.assign(HWLM_MASKLEN, 0);
+ cmp.assign(HWLM_MASKLEN, 0);
+ size_t i = HWLM_MASKLEN - lag - 1;
+ do {
+ if (curr.empty() || contains(curr, h.start) ||
+ contains(curr, h.startDs)) {
+ DEBUG_PRINTF("end of the road\n");
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+ for (auto v : curr) {
DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index,
- describeClass(h[v].char_reach).c_str());
- cr |= h[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, h));
- }
- make_and_cmp_mask(cr, &msk[i], &cmp[i]);
- DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
- describeClass(cr).c_str(), msk.at(i), cmp.at(i));
- curr.swap(next);
- } while (i-- > 0);
-}
-
-static
-void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- const RoseInVertexProps &iv_info = ig[iv];
- assert(iv_info.type == RIV_LITERAL);
- assert(!parents.empty()); /* start vertices should not be here */
-
+ describeClass(h[v].char_reach).c_str());
+ cr |= h[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, h));
+ }
+ make_and_cmp_mask(cr, &msk[i], &cmp[i]);
+ DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
+ describeClass(cr).c_str(), msk.at(i), cmp.at(i));
+ curr.swap(next);
+ } while (i-- > 0);
+}
+
+static
+void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ const RoseInVertexProps &iv_info = ig[iv];
+ assert(iv_info.type == RIV_LITERAL);
+ assert(!parents.empty()); /* start vertices should not be here */
+
// ng_violet should have ensured that mixed-sensitivity literals are no
- // longer than the benefits max width.
- assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
- !mixed_sensitivity(iv_info.s));
-
- // Rose graph construction process should have given us a min_offset.
- assert(iv_info.min_offset > 0);
-
- if (use_eod_table) {
- goto floating;
- }
-
- DEBUG_PRINTF("rose find vertex\n");
- if (parents.size() == 1) {
- const RoseVertex u = parents.front().first;
- const RoseInEdgeProps &ep = ig[parents.front().second];
-
- if (!tbi->isAnyStart(u)) {
- goto floating;
- }
-
- if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
- goto floating;
- }
- if (ep.graph && !isAnchored(*ep.graph)) {
- goto floating;
- }
-
- DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
- ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
-
- /* need to check if putting iv into the anchored table would create
- * any bad_overlap relationships with its successor literals */
- for (const auto &e : out_edges_range(iv, ig)) {
- RoseInVertex t = target(e, ig);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
- goto floating;
- }
- }
-
- RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
- if (v != RoseGraph::null_vertex()) {
- DEBUG_PRINTF("add anchored literal vertex\n");
- vertex_map[iv].push_back(v);
- return;
- }
- }
-
-floating:
- vector<u8> msk, cmp;
- if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
- RoseInEdge e = *in_edges(iv, ig).first;
- if (ig[e].graph) {
- findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
- }
- }
-
- u32 delay = iv_info.delay;
- rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
-
- u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
-
- DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
- literalId, iv_info.s.length(), delay, iv_info.min_offset,
- iv_info.max_offset, dumpString(iv_info.s).c_str());
-
- createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
- iv_info.max_offset, literalId, delay, bd);
-}
-
-static
+ // longer than the benefits max width.
+ assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
+ !mixed_sensitivity(iv_info.s));
+
+ // Rose graph construction process should have given us a min_offset.
+ assert(iv_info.min_offset > 0);
+
+ if (use_eod_table) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("rose find vertex\n");
+ if (parents.size() == 1) {
+ const RoseVertex u = parents.front().first;
+ const RoseInEdgeProps &ep = ig[parents.front().second];
+
+ if (!tbi->isAnyStart(u)) {
+ goto floating;
+ }
+
+ if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
+ goto floating;
+ }
+ if (ep.graph && !isAnchored(*ep.graph)) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
+ ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
+
+ /* need to check if putting iv into the anchored table would create
+ * any bad_overlap relationships with its successor literals */
+ for (const auto &e : out_edges_range(iv, ig)) {
+ RoseInVertex t = target(e, ig);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
+ goto floating;
+ }
+ }
+
+ RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
+ if (v != RoseGraph::null_vertex()) {
+ DEBUG_PRINTF("add anchored literal vertex\n");
+ vertex_map[iv].push_back(v);
+ return;
+ }
+ }
+
+floating:
+ vector<u8> msk, cmp;
+ if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
+ RoseInEdge e = *in_edges(iv, ig).first;
+ if (ig[e].graph) {
+ findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
+ }
+ }
+
+ u32 delay = iv_info.delay;
+ rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
+
+ u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
+
+ DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
+ literalId, iv_info.s.length(), delay, iv_info.min_offset,
+ iv_info.max_offset, dumpString(iv_info.s).c_str());
+
+ createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
+ iv_info.max_offset, literalId, delay, bd);
+}
+
+static
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
map<flat_set<ReportID>, ReportID> &remap) {
- assert(generates_callbacks(h));
+ assert(generates_callbacks(h));
assert(!in_degree(h.accept, h));
auto gg = cloneHolder(h);
NGHolder &g = *gg;
g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
-
- // Move acceptEod edges over to accept.
- vector<NFAEdge> dead;
+
+ // Move acceptEod edges over to accept.
+ vector<NFAEdge> dead;
for (const auto &e : in_edges_range(g.acceptEod, g)) {
NFAVertex u = source(e, g);
if (u == g.accept) {
- continue;
- }
+ continue;
+ }
add_edge_if_not_present(u, g.accept, g);
- dead.push_back(e);
+ dead.push_back(e);
if (!contains(remap, g[u].reports)) {
remap[g[u].reports] = build.getNewNfaReport();
}
g[u].reports = { remap[g[u].reports] };
- }
-
+ }
+
remove_edges(dead, g);
return gg;
-}
-
-static
+}
+
+static
u32 getEodEventID(RoseBuildImpl &build) {
// Allocate the EOD event if it hasn't been already.
if (build.eod_event_literal_id == MO_INVALID_IDX) {
@@ -730,18 +730,18 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
}
static
-void doRoseAcceptVertex(RoseBuildImpl *tbi,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
-
- RoseGraph &g = tbi->g;
-
- for (const auto &pv : parents) {
- RoseVertex u = pv.first;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
-
+void doRoseAcceptVertex(RoseBuildImpl *tbi,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
+
+ RoseGraph &g = tbi->g;
+
+ for (const auto &pv : parents) {
+ RoseVertex u = pv.first;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
/* We need to duplicate the parent vertices if:
*
* 1) It already has a suffix, etc as we are going to add the specified
@@ -753,42 +753,42 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
* incompatible with normal successors. As accepts are processed last we
* do not need to worry about other normal successors being added later.
*/
- if (g[u].suffix || !g[u].reports.empty()
+ if (g[u].suffix || !g[u].reports.empty()
|| (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
&& !edge_props.graph)
- || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
+ || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index);
- assert(!tbi->isAnyStart(u));
- u = duplicate(tbi, u);
- g[u].suffix.reset();
- g[u].eod_accept = false;
- }
-
- assert(!g[u].suffix);
- if (ig[iv].type == RIV_ACCEPT) {
- assert(!tbi->isAnyStart(u));
+ assert(!tbi->isAnyStart(u));
+ u = duplicate(tbi, u);
+ g[u].suffix.reset();
+ g[u].eod_accept = false;
+ }
+
+ assert(!g[u].suffix);
+ if (ig[iv].type == RIV_ACCEPT) {
+ assert(!tbi->isAnyStart(u));
if (edge_props.dfa) {
DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index);
g[u].suffix.rdfa = edge_props.dfa;
- g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
- g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
- } else if (edge_props.graph) {
+ g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
+ g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
+ } else if (edge_props.graph) {
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- assert(g[u].suffix.graph->kind == NFA_SUFFIX);
- /* TODO: set dfa_(min|max)_width */
- } else if (edge_props.haig) {
+ g[u].suffix.graph = edge_props.graph;
+ assert(g[u].suffix.graph->kind == NFA_SUFFIX);
+ /* TODO: set dfa_(min|max)_width */
+ } else if (edge_props.haig) {
DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index);
- g[u].suffix.haig = edge_props.haig;
- } else {
+ g[u].suffix.haig = edge_props.haig;
+ } else {
DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index);
- assert(!g[u].eod_accept);
- g[u].reports = ig[iv].reports;
- }
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
+ assert(!g[u].eod_accept);
+ g[u].reports = ig[iv].reports;
+ }
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
assert(!edge_props.haig);
-
+
if (!edge_props.graph) {
RoseVertex w = add_vertex(g);
g[w].eod_accept = true;
@@ -809,129 +809,129 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
if (tbi->isInETable(u)) {
assert(h.kind == NFA_SUFFIX);
- assert(!tbi->isAnyStart(u));
- /* etable can't/shouldn't use eod event */
+ assert(!tbi->isAnyStart(u));
+ /* etable can't/shouldn't use eod event */
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- continue;
- }
-
+ g[u].suffix.graph = edge_props.graph;
+ continue;
+ }
+
makeEodEventLeftfix(*tbi, u, h);
- }
- }
-}
-
-static
-bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
- u32 *max_len, const CompileContext &cc) {
- map<RoseInVertex, u32> max_depth_from_eod;
- *max_len = 0;
-
- reverse(topo.begin(), topo.end()); /* we want to start at accept end */
-
- for (auto v : topo) {
- u32 v_depth = 0;
-
- if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("[ACCEPT]\n");
- for (const auto &e : in_edges_range(v, ig)) {
- if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
- DEBUG_PRINTF("floating accept\n");
- return false;
- }
- }
- }
-
- switch (ig[v].type) {
- case RIV_LITERAL:
- DEBUG_PRINTF("[LITERAL]\n");
- break;
- case RIV_START:
- DEBUG_PRINTF("[START]\n");
- break;
- case RIV_ANCHORED_START:
- DEBUG_PRINTF("[ANCHOR]\n");
- break;
- case RIV_ACCEPT:
- break;
- case RIV_ACCEPT_EOD:
- DEBUG_PRINTF("[EOD]\n");
- break;
- default:
- assert(0);
- DEBUG_PRINTF("????\n");
- return false;
- }
-
- for (const auto &e : out_edges_range(v, ig)) {
- RoseInVertex t = target(e, ig);
-
- assert(contains(max_depth_from_eod, t));
- u64a max_width;
-
- if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
- /* start itself doesn't need to be in history buffer
- * just need to make sure all succ literals are ok */
- if (ig[t].type == RIV_LITERAL) {
- max_width = ig[t].s.length();
- } else {
- max_width = 0;
- }
- if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(),
- ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width += graph_max_width;
- }
- } else if (ig[e].haig) {
- DEBUG_PRINTF("fail due to haig\n");
- return false;
- } else if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(), ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width = ig[e].graph_lag + graph_max_width;
- } else {
- max_width = ig[e].maxBound;
- if (ig[t].type == RIV_LITERAL) {
- max_width += ig[t].s.length();
- }
- }
-
- max_width += max_depth_from_eod[t];
- if (max_width > ROSE_BOUND_INF) {
- max_width = ROSE_BOUND_INF;
- }
-
- DEBUG_PRINTF("max_width=%llu\n", max_width);
-
- ENSURE_AT_LEAST(&v_depth, (u32)max_width);
- }
-
+ }
+ }
+}
+
+static
+bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
+ u32 *max_len, const CompileContext &cc) {
+ map<RoseInVertex, u32> max_depth_from_eod;
+ *max_len = 0;
+
+ reverse(topo.begin(), topo.end()); /* we want to start at accept end */
+
+ for (auto v : topo) {
+ u32 v_depth = 0;
+
+ if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("[ACCEPT]\n");
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
+ DEBUG_PRINTF("floating accept\n");
+ return false;
+ }
+ }
+ }
+
+ switch (ig[v].type) {
+ case RIV_LITERAL:
+ DEBUG_PRINTF("[LITERAL]\n");
+ break;
+ case RIV_START:
+ DEBUG_PRINTF("[START]\n");
+ break;
+ case RIV_ANCHORED_START:
+ DEBUG_PRINTF("[ANCHOR]\n");
+ break;
+ case RIV_ACCEPT:
+ break;
+ case RIV_ACCEPT_EOD:
+ DEBUG_PRINTF("[EOD]\n");
+ break;
+ default:
+ assert(0);
+ DEBUG_PRINTF("????\n");
+ return false;
+ }
+
+ for (const auto &e : out_edges_range(v, ig)) {
+ RoseInVertex t = target(e, ig);
+
+ assert(contains(max_depth_from_eod, t));
+ u64a max_width;
+
+ if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
+ /* start itself doesn't need to be in history buffer
+ * just need to make sure all succ literals are ok */
+ if (ig[t].type == RIV_LITERAL) {
+ max_width = ig[t].s.length();
+ } else {
+ max_width = 0;
+ }
+ if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(),
+ ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width += graph_max_width;
+ }
+ } else if (ig[e].haig) {
+ DEBUG_PRINTF("fail due to haig\n");
+ return false;
+ } else if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(), ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width = ig[e].graph_lag + graph_max_width;
+ } else {
+ max_width = ig[e].maxBound;
+ if (ig[t].type == RIV_LITERAL) {
+ max_width += ig[t].s.length();
+ }
+ }
+
+ max_width += max_depth_from_eod[t];
+ if (max_width > ROSE_BOUND_INF) {
+ max_width = ROSE_BOUND_INF;
+ }
+
+ DEBUG_PRINTF("max_width=%llu\n", max_width);
+
+ ENSURE_AT_LEAST(&v_depth, (u32)max_width);
+ }
+
if (v_depth == ROSE_BOUND_INF
|| v_depth > cc.grey.maxHistoryAvailable) {
- DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
- return false;
- }
-
- max_depth_from_eod[v] = v_depth;
- ENSURE_AT_LEAST(max_len, v_depth);
- }
-
- DEBUG_PRINTF("to the eod table and beyond\n");
- return true;
-}
-
-static
+ DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
+ return false;
+ }
+
+ max_depth_from_eod[v] = v_depth;
+ ENSURE_AT_LEAST(max_len, v_depth);
+ }
+
+ DEBUG_PRINTF("to the eod table and beyond\n");
+ return true;
+}
+
+static
void shift_accepts_to_end(const RoseInGraph &ig,
vector<RoseInVertex> &topo_order) {
stable_partition(begin(topo_order), end(topo_order),
@@ -939,761 +939,761 @@ void shift_accepts_to_end(const RoseInGraph &ig,
}
static
-void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
-
- /* add the pattern in to the main rose graph */
- DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
-
- /* Note: an input vertex may need to create several rose vertices. This is
- * primarily because a RoseVertex can only have 1 one leftfix */
- map<RoseInVertex, vector<RoseVertex> > vertex_map;
-
- vector<RoseInVertex> v_order = topo_order(ig);
+void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+
+ /* add the pattern in to the main rose graph */
+ DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
+
+ /* Note: an input vertex may need to create several rose vertices. This is
+ * primarily because a RoseVertex can only have 1 one leftfix */
+ map<RoseInVertex, vector<RoseVertex> > vertex_map;
+
+ vector<RoseInVertex> v_order = topo_order(ig);
shift_accepts_to_end(ig, v_order);
-
- u32 eod_space_required;
- bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
- tbi->cc);
- if (use_eod_table) {
- ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
- }
-
- assert(ig[v_order.front()].type == RIV_START
- || ig[v_order.front()].type == RIV_ANCHORED_START);
-
- for (RoseInVertex iv : v_order) {
+
+ u32 eod_space_required;
+ bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
+ tbi->cc);
+ if (use_eod_table) {
+ ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
+ }
+
+ assert(ig[v_order.front()].type == RIV_START
+ || ig[v_order.front()].type == RIV_ANCHORED_START);
+
+ for (RoseInVertex iv : v_order) {
DEBUG_PRINTF("vertex %zu\n", ig[iv].index);
-
- if (ig[iv].type == RIV_START) {
- DEBUG_PRINTF("is root\n");
- vertex_map[iv].push_back(tbi->root);
- continue;
- } else if (ig[iv].type == RIV_ANCHORED_START) {
- DEBUG_PRINTF("is anchored root\n");
- vertex_map[iv].push_back(tbi->anchored_root);
- continue;
- }
-
- vector<pair<RoseVertex, RoseInEdge> > parents;
- for (const auto &e : in_edges_range(iv, ig)) {
- RoseInVertex u = source(e, ig);
- assert(contains(vertex_map, u));
- const vector<RoseVertex> &images = vertex_map[u];
-
- // We should have no dupes.
+
+ if (ig[iv].type == RIV_START) {
+ DEBUG_PRINTF("is root\n");
+ vertex_map[iv].push_back(tbi->root);
+ continue;
+ } else if (ig[iv].type == RIV_ANCHORED_START) {
+ DEBUG_PRINTF("is anchored root\n");
+ vertex_map[iv].push_back(tbi->anchored_root);
+ continue;
+ }
+
+ vector<pair<RoseVertex, RoseInEdge> > parents;
+ for (const auto &e : in_edges_range(iv, ig)) {
+ RoseInVertex u = source(e, ig);
+ assert(contains(vertex_map, u));
+ const vector<RoseVertex> &images = vertex_map[u];
+
+ // We should have no dupes.
assert(set<RoseVertex>(images.begin(), images.end()).size()
- == images.size());
-
- for (auto v_image : images) {
- // v_image should NOT already be in our parents list.
- assert(find_if(parents.begin(), parents.end(),
- [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
- return p.first == v_image;
- }) == parents.end());
-
- parents.emplace_back(v_image, e);
-
- if (tbi->isAnchored(v_image)) {
- assert(!use_eod_table);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
- assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
- ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
- overlap);
- }
- }
- }
-
- if (ig[iv].type == RIV_LITERAL) {
- DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
- assert(!isLeafNode(iv, ig));
- doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
- bd);
- } else {
- if (ig[iv].type == RIV_ACCEPT) {
- DEBUG_PRINTF("ACCEPT\n");
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
- DEBUG_PRINTF("ACCEPT_EOD\n");
- }
- assert(isLeafNode(iv, ig)); /* accepts are final */
- doRoseAcceptVertex(tbi, parents, iv, bd);
- }
- }
- DEBUG_PRINTF("done\n");
-}
-
-template<typename GraphT>
-static
-bool empty(const GraphT &g) {
- typename GraphT::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(g);
- return vi == ve;
-}
-
-static
+ == images.size());
+
+ for (auto v_image : images) {
+ // v_image should NOT already be in our parents list.
+ assert(find_if(parents.begin(), parents.end(),
+ [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
+ return p.first == v_image;
+ }) == parents.end());
+
+ parents.emplace_back(v_image, e);
+
+ if (tbi->isAnchored(v_image)) {
+ assert(!use_eod_table);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
+ assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
+ ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
+ overlap);
+ }
+ }
+ }
+
+ if (ig[iv].type == RIV_LITERAL) {
+ DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
+ assert(!isLeafNode(iv, ig));
+ doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
+ bd);
+ } else {
+ if (ig[iv].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("ACCEPT\n");
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
+ DEBUG_PRINTF("ACCEPT_EOD\n");
+ }
+ assert(isLeafNode(iv, ig)); /* accepts are final */
+ doRoseAcceptVertex(tbi, parents, iv, bd);
+ }
+ }
+ DEBUG_PRINTF("done\n");
+}
+
+template<typename GraphT>
+static
+bool empty(const GraphT &g) {
+ typename GraphT::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(g);
+ return vi == ve;
+}
+
+static
bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm,
const CompileContext &cc) {
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
-
- if (prefilter && cc.grey.prefilterReductions) {
- // If we're prefiltering, we can have another go with a reduced graph.
- UNUSED size_t numBefore = num_vertices(h);
- prefilterReductions(h, cc);
- UNUSED size_t numAfter = num_vertices(h);
- DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
-
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
- }
-
- DEBUG_PRINTF("unable to build engine\n");
- return false;
-}
-
-static
-bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
- assert(in_degree(v, ig));
-
- for (const auto &e : in_edges_range(v, ig)) {
- if (ig[e].graph || ig[e].haig) {
- DEBUG_PRINTF("edge graph\n");
- return true;
- }
- if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("edge bounds\n");
- return true;
- }
-
- RoseInVertex u = source(e, ig);
- if (ig[u].type == RIV_START) {
- continue;
- }
- if (ig[u].type != RIV_LITERAL) {
- DEBUG_PRINTF("unsafe pred vertex\n");
- return true;
- }
- if (ig[u].delay) {
- DEBUG_PRINTF("pred has delay\n");
- return true;
- }
- }
-
- return false;
-}
-
-static
-u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
- /* overly conservative if only part of the string is nocase */
- string pred = pred_key.get_string();
- string lit = lit_key.get_string();
-
- if (pred_key.any_nocase() || lit_key.any_nocase()) {
- upperString(pred);
- upperString(lit);
- }
-
- string::size_type last = pred.rfind(lit);
- if (last == string::npos) {
- return MAX_DELAY;
- }
-
- u32 raw = pred.size() - last - 1;
- return MIN(raw, MAX_DELAY);
-}
-
-static
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+
+ if (prefilter && cc.grey.prefilterReductions) {
+ // If we're prefiltering, we can have another go with a reduced graph.
+ UNUSED size_t numBefore = num_vertices(h);
+ prefilterReductions(h, cc);
+ UNUSED size_t numAfter = num_vertices(h);
+ DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
+
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+ }
+
+ DEBUG_PRINTF("unable to build engine\n");
+ return false;
+}
+
+static
+bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
+ assert(in_degree(v, ig));
+
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (ig[e].graph || ig[e].haig) {
+ DEBUG_PRINTF("edge graph\n");
+ return true;
+ }
+ if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
+ DEBUG_PRINTF("edge bounds\n");
+ return true;
+ }
+
+ RoseInVertex u = source(e, ig);
+ if (ig[u].type == RIV_START) {
+ continue;
+ }
+ if (ig[u].type != RIV_LITERAL) {
+ DEBUG_PRINTF("unsafe pred vertex\n");
+ return true;
+ }
+ if (ig[u].delay) {
+ DEBUG_PRINTF("pred has delay\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
+ /* overly conservative if only part of the string is nocase */
+ string pred = pred_key.get_string();
+ string lit = lit_key.get_string();
+
+ if (pred_key.any_nocase() || lit_key.any_nocase()) {
+ upperString(pred);
+ upperString(lit);
+ }
+
+ string::size_type last = pred.rfind(lit);
+ if (last == string::npos) {
+ return MAX_DELAY;
+ }
+
+ u32 raw = pred.size() - last - 1;
+ return MIN(raw, MAX_DELAY);
+}
+
+static
u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) {
- // First, check the overlap constraints on (u,v).
- size_t max_delay;
- if (ig[v].type == RIV_LITERAL) {
- DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
- max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
- } else if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
- escapeString(ig[u].s).c_str());
- max_delay = MAX_DELAY;
- } else {
- assert(0);
- return 0;
- }
-
- DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
-
- // Now consider the predecessors of u.
- for (const auto &e : in_edges_range(u, ig)) {
- RoseInVertex w = source(e, ig);
- if (ig[w].type == RIV_START) {
- continue;
- }
- assert(ig[w].type == RIV_LITERAL);
- assert(ig[w].delay == 0);
-
- DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[w].s).c_str(),
- escapeString(ig[u].s).c_str());
-
- // We cannot delay the literal on u so much that a predecessor literal
- // could occur in the delayed region. For example, consider
- // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
- // 'barman' could occur in the input string and race with 'foobar', as
- // in 'foobarman'.
-
- const size_t pred_len = ig[w].s.length();
- size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
- DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
- assert(overlap <= pred_len);
- size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
- DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
- max_lit_delay);
- max_delay = min(max_delay, max_lit_delay);
- }
-
- DEBUG_PRINTF("max_delay=%zu\n", max_delay);
- assert(max_delay <= MAX_DELAY);
- return max_delay;
-}
-
-static
-bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, u32 *delay_out) {
- const u32 max_history =
- cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
-
- const RoseInVertex u = source(e, ig), v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- u32 delay = ig[v].s.length() + repeat.bounds.min;
- if (delay > MAX_DELAY) {
- DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
- return false;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- return false;
- }
-
- *delay_out = delay;
- return true;
-}
-
-static
-void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- u32 delay = 0;
- if (!transformInfixToDelay(ig, e, cc, &delay)) {
- continue;
- }
-
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = 0;
- ig[e].maxBound = ROSE_BOUND_INF;
- }
-}
-
-static
-bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, DepthMinMax *bounds) {
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- const RoseInVertex v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- *bounds = repeat.bounds;
- return true;
-}
-
-static
-void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
- const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (const auto &e : edges_range(ig)) {
- const RoseInVertex u = source(e, ig);
- const RoseInVertex v = target(e, ig);
-
- if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig || !ig[e].graph) {
- continue;
- }
-
- if (ig[u].min_offset != ig[u].max_offset) {
- DEBUG_PRINTF("u not fixed depth\n");
- continue;
- }
-
- DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- DepthMinMax bounds;
- if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
- continue;
- }
-
- DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
- bounds.str().c_str());
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = bounds.min;
- ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
- : ROSE_BOUND_INF;
- bd.anch_history_edges.insert(e);
- }
-}
-
-/**
- * \brief Transform small trailing dot repeat suffixes into delay on the last
- * literal.
- *
- * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
- * rather than having a suffix to handle the dot.
- *
- * This transformation looks for literal->accept edges and transforms them if
- * appropriate. It doesn't handle complex cases where the literal has more than
- * one successor.
- */
-static
-void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- set<RoseInVertex> modified_accepts; // may be dead after transform
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_ACCEPT) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
- escapeString(ig[u].s).c_str());
-
- const NGHolder &h = *ig[e].graph;
- const set<ReportID> reports = all_reports(h);
- if (reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- continue;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("suffix graph is not repeat\n");
- continue;
- }
- DEBUG_PRINTF("suffix graph is %s repeat\n",
- repeat.bounds.str().c_str());
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- continue;
- }
-
- if (repeat.bounds.min != repeat.bounds.max ||
- repeat.bounds.min > depth(MAX_DELAY)) {
- DEBUG_PRINTF("repeat is variable or too large\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- u32 delay = repeat.bounds.min;
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
-
- // Construct a new accept vertex for this report and remove edge e.
- // (This allows us to cope if v has more than one in-edge).
- RoseInVertex v2 =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(u, v2, ig);
- remove_edge(e, ig);
- modified_accepts.insert(v);
- }
-
- DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
-
- for (auto v : modified_accepts) {
- if (in_degree(v, ig) == 0) {
- DEBUG_PRINTF("removing accept vertex with no preds\n");
- remove_vertex(v, ig);
- }
- }
-}
-
-#ifndef NDEBUG
-static
-bool validateKinds(const RoseInGraph &g) {
- for (const auto &e : edges_range(g)) {
- if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
- return false;
- }
- }
-
- return true;
-}
-#endif
-
+ // First, check the overlap constraints on (u,v).
+ size_t max_delay;
+ if (ig[v].type == RIV_LITERAL) {
+ DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+ max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
+ } else if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
+ escapeString(ig[u].s).c_str());
+ max_delay = MAX_DELAY;
+ } else {
+ assert(0);
+ return 0;
+ }
+
+ DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
+
+ // Now consider the predecessors of u.
+ for (const auto &e : in_edges_range(u, ig)) {
+ RoseInVertex w = source(e, ig);
+ if (ig[w].type == RIV_START) {
+ continue;
+ }
+ assert(ig[w].type == RIV_LITERAL);
+ assert(ig[w].delay == 0);
+
+ DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[w].s).c_str(),
+ escapeString(ig[u].s).c_str());
+
+ // We cannot delay the literal on u so much that a predecessor literal
+ // could occur in the delayed region. For example, consider
+ // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
+ // 'barman' could occur in the input string and race with 'foobar', as
+ // in 'foobarman'.
+
+ const size_t pred_len = ig[w].s.length();
+ size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
+ DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
+ assert(overlap <= pred_len);
+ size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
+ DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
+ max_lit_delay);
+ max_delay = min(max_delay, max_lit_delay);
+ }
+
+ DEBUG_PRINTF("max_delay=%zu\n", max_delay);
+ assert(max_delay <= MAX_DELAY);
+ return max_delay;
+}
+
+static
+bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, u32 *delay_out) {
+ const u32 max_history =
+ cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
+
+ const RoseInVertex u = source(e, ig), v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ u32 delay = ig[v].s.length() + repeat.bounds.min;
+ if (delay > MAX_DELAY) {
+ DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
+ return false;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ return false;
+ }
+
+ *delay_out = delay;
+ return true;
+}
+
+static
+void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ u32 delay = 0;
+ if (!transformInfixToDelay(ig, e, cc, &delay)) {
+ continue;
+ }
+
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = 0;
+ ig[e].maxBound = ROSE_BOUND_INF;
+ }
+}
+
+static
+bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, DepthMinMax *bounds) {
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ const RoseInVertex v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ *bounds = repeat.bounds;
+ return true;
+}
+
+static
+void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
+ const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (const auto &e : edges_range(ig)) {
+ const RoseInVertex u = source(e, ig);
+ const RoseInVertex v = target(e, ig);
+
+ if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig || !ig[e].graph) {
+ continue;
+ }
+
+ if (ig[u].min_offset != ig[u].max_offset) {
+ DEBUG_PRINTF("u not fixed depth\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ DepthMinMax bounds;
+ if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
+ bounds.str().c_str());
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = bounds.min;
+ ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
+ : ROSE_BOUND_INF;
+ bd.anch_history_edges.insert(e);
+ }
+}
+
+/**
+ * \brief Transform small trailing dot repeat suffixes into delay on the last
+ * literal.
+ *
+ * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
+ * rather than having a suffix to handle the dot.
+ *
+ * This transformation looks for literal->accept edges and transforms them if
+ * appropriate. It doesn't handle complex cases where the literal has more than
+ * one successor.
+ */
+static
+void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ set<RoseInVertex> modified_accepts; // may be dead after transform
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_ACCEPT) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
+ escapeString(ig[u].s).c_str());
+
+ const NGHolder &h = *ig[e].graph;
+ const set<ReportID> reports = all_reports(h);
+ if (reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("suffix graph is not repeat\n");
+ continue;
+ }
+ DEBUG_PRINTF("suffix graph is %s repeat\n",
+ repeat.bounds.str().c_str());
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ continue;
+ }
+
+ if (repeat.bounds.min != repeat.bounds.max ||
+ repeat.bounds.min > depth(MAX_DELAY)) {
+ DEBUG_PRINTF("repeat is variable or too large\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ u32 delay = repeat.bounds.min;
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+
+ // Construct a new accept vertex for this report and remove edge e.
+ // (This allows us to cope if v has more than one in-edge).
+ RoseInVertex v2 =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(u, v2, ig);
+ remove_edge(e, ig);
+ modified_accepts.insert(v);
+ }
+
+ DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
+
+ for (auto v : modified_accepts) {
+ if (in_degree(v, ig) == 0) {
+ DEBUG_PRINTF("removing accept vertex with no preds\n");
+ remove_vertex(v, ig);
+ }
+ }
+}
+
+#ifndef NDEBUG
+static
+bool validateKinds(const RoseInGraph &g) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) {
- DEBUG_PRINTF("trying to rose\n");
- assert(validateKinds(ig));
+ DEBUG_PRINTF("trying to rose\n");
+ assert(validateKinds(ig));
assert(hasCorrectlyNumberedVertices(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
- RoseInGraph &in = *in_ptr;
-
- RoseBuildData bd(in, false);
-
- transformLiteralDelay(in, cc);
- transformAnchoredLiteralOverlap(in, bd, cc);
- transformSuffixDelay(in, cc);
-
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
+ RoseInGraph &in = *in_ptr;
+
+ RoseBuildData bd(in, false);
+
+ transformLiteralDelay(in, cc);
+ transformAnchoredLiteralOverlap(in, bd, cc);
+ transformSuffixDelay(in, cc);
+
renumber_vertices(in);
assert(validateKinds(in));
-
+
insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs;
-
- for (const auto &e : edges_range(in)) {
- if (!in[e].graph) {
+
+ for (const auto &e : edges_range(in)) {
+ if (!in[e].graph) {
assert(!in[e].dfa);
assert(!in[e].haig);
- continue; // no graph
- }
-
+ continue; // no graph
+ }
+
if (in[e].haig || in[e].dfa) {
/* Early DFAs/Haigs are always implementable (we've already built
* the raw DFA). */
- continue;
- }
-
- NGHolder *h = in[e].graph.get();
+ continue;
+ }
+
+ NGHolder *h = in[e].graph.get();
assert(isCorrectlyTopped(*h));
- graphs[h].push_back(e);
- }
-
- vector<RoseInEdge> graph_edges;
-
+ graphs[h].push_back(e);
+ }
+
+ vector<RoseInEdge> graph_edges;
+
for (const auto &m : graphs) {
NGHolder *h = m.first;
if (!canImplementGraph(*h, prefilter, rm, cc)) {
- return false;
- }
+ return false;
+ }
insert(&graph_edges, graph_edges.end(), m.second);
- }
-
- /* we are now past the point of no return. We can start making irreversible
- changes to the rose graph, etc */
-
- for (const auto &e : graph_edges) {
- assert(in[e].graph);
- assert(!in[e].haig);
- NGHolder &h = *in[e].graph;
- DEBUG_PRINTF("handling %p\n", &h);
- assert(allMatchStatesHaveReports(h));
-
- if (!generates_callbacks(whatRoseIsThis(in, e))
- && in[target(e, in)].type != RIV_ACCEPT_EOD) {
+ }
+
+ /* we are now past the point of no return. We can start making irreversible
+ changes to the rose graph, etc */
+
+ for (const auto &e : graph_edges) {
+ assert(in[e].graph);
+ assert(!in[e].haig);
+ NGHolder &h = *in[e].graph;
+ DEBUG_PRINTF("handling %p\n", &h);
+ assert(allMatchStatesHaveReports(h));
+
+ if (!generates_callbacks(whatRoseIsThis(in, e))
+ && in[target(e, in)].type != RIV_ACCEPT_EOD) {
set_report(h, getNewNfaReport());
- }
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
- DEBUG_PRINTF("rose is trying to consume a sombe\n");
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- RoseBuildData bd(ig, true);
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
- DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
- assert(allMatchStatesHaveReports(*ig[e].graph));
- assert(ig[e].haig);
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
- const ReportManager &rm, const CompileContext &cc) {
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
+ }
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
+ DEBUG_PRINTF("rose is trying to consume a sombe\n");
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ RoseBuildData bd(ig, true);
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+ DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
+ assert(allMatchStatesHaveReports(*ig[e].graph));
+ assert(ig[e].haig);
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
+ const ReportManager &rm, const CompileContext &cc) {
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
vector<NGHolder *> graphs;
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
-
- if (ig[e].haig) {
- // Haigs are always implementable (we've already built the raw DFA).
- continue;
- }
-
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+
+ if (ig[e].haig) {
+ // Haigs are always implementable (we've already built the raw DFA).
+ continue;
+ }
+
graphs.push_back(ig[e].graph.get());
- }
-
+ }
+
for (const auto &g : graphs) {
if (!canImplementGraph(*g, prefilter, rm, cc)) {
- return false;
- }
- }
-
- return true;
-}
-
-void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &reports) {
- assert(!reports.empty());
-
- if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
- lit.length() > 3) {
- DEBUG_PRINTF("adding as puffette\n");
- const CharReach &cr = *lit.begin();
- for (const auto &report : reports) {
- addOutfix(raw_puff(lit.length(), true, report, cr, true));
- }
-
- return;
- }
-
- RoseInGraph ig;
- RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
- RoseInVertex accept = add_vertex(
- eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
- : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
- add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
-
- calcVertexOffsets(ig);
-
- ig[accept].reports.insert(reports.begin(), reports.end());
-
- addRose(ig, false);
-}
-
-static
-u32 findMaxBAWidth(const NGHolder &h) {
- // Must be bi-anchored: no out-edges from startDs (other than its
- // self-loop), no in-edges to accept.
+ assert(!reports.empty());
+
+ if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
+ lit.length() > 3) {
+ DEBUG_PRINTF("adding as puffette\n");
+ const CharReach &cr = *lit.begin();
+ for (const auto &report : reports) {
+ addOutfix(raw_puff(lit.length(), true, report, cr, true));
+ }
+
+ return;
+ }
+
+ RoseInGraph ig;
+ RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
+ RoseInVertex accept = add_vertex(
+ eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
+ : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
+ add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
+
+ calcVertexOffsets(ig);
+
+ ig[accept].reports.insert(reports.begin(), reports.end());
+
+ addRose(ig, false);
+}
+
+static
+u32 findMaxBAWidth(const NGHolder &h) {
+ // Must be bi-anchored: no out-edges from startDs (other than its
+ // self-loop), no in-edges to accept.
if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) {
- return ROSE_BOUND_INF;
- }
- depth d = findMaxWidth(h);
- assert(d.is_reachable());
-
- if (!d.is_finite()) {
- return ROSE_BOUND_INF;
- }
- return d;
-}
-
-static
-void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
- const RoseBuildImpl &tbi) {
- outfix.maxBAWidth = findMaxBAWidth(h);
- outfix.minWidth = findMinWidth(h);
- outfix.maxWidth = findMaxWidth(h);
- outfix.maxOffset = findMaxOffset(h, tbi.rm);
- populateReverseAccelerationInfo(outfix.rev_info, h);
-}
-
+ return ROSE_BOUND_INF;
+ }
+ depth d = findMaxWidth(h);
+ assert(d.is_reachable());
+
+ if (!d.is_finite()) {
+ return ROSE_BOUND_INF;
+ }
+ return d;
+}
+
+static
+void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
+ const RoseBuildImpl &tbi) {
+ outfix.maxBAWidth = findMaxBAWidth(h);
+ outfix.minWidth = findMinWidth(h);
+ outfix.maxWidth = findMaxWidth(h);
+ outfix.maxOffset = findMaxOffset(h, tbi.rm);
+ populateReverseAccelerationInfo(outfix.rev_info, h);
+}
+
static
bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
map<flat_set<ReportID>, ReportID> report_remap;
@@ -1748,9 +1748,9 @@ bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
return true;
}
-bool RoseBuildImpl::addOutfix(const NGHolder &h) {
- DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
-
+bool RoseBuildImpl::addOutfix(const NGHolder &h) {
+ DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
+
/* TODO: handle more than one report */
if (!in_degree(h.accept, h)
&& all_reports(h).size() == 1
@@ -1758,241 +1758,241 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h) {
return true;
}
- const u32 nfa_states = isImplementableNFA(h, &rm, cc);
- if (nfa_states) {
- DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
- } else {
- DEBUG_PRINTF("not implementable as an NFA\n");
- }
-
- bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
- || can_exhaust(h, rm); /* can be pruned */
-
- unique_ptr<raw_dfa> rdfa;
-
- if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
- (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
- rdfa = buildMcClellan(h, &rm, cc.grey);
- }
-
- if (!nfa_states && !rdfa) {
- DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
- return false;
- }
-
- if (rdfa) {
- outfixes.push_back(OutfixInfo(move(rdfa)));
- } else {
- outfixes.push_back(OutfixInfo(cloneHolder(h)));
- }
-
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true;
-}
-
-bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
- DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
-
- outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
- if (!mpv_outfix) {
+ const u32 nfa_states = isImplementableNFA(h, &rm, cc);
+ if (nfa_states) {
+ DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
+ } else {
+ DEBUG_PRINTF("not implementable as an NFA\n");
+ }
+
+ bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
+ || can_exhaust(h, rm); /* can be pruned */
+
+ unique_ptr<raw_dfa> rdfa;
+
+ if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
+ (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
+ rdfa = buildMcClellan(h, &rm, cc.grey);
+ }
+
+ if (!nfa_states && !rdfa) {
+ DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
+ return false;
+ }
+
+ if (rdfa) {
+ outfixes.push_back(OutfixInfo(move(rdfa)));
+ } else {
+ outfixes.push_back(OutfixInfo(cloneHolder(h)));
+ }
+
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true;
+}
+
+bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
+ DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
+
+ outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
- mpv_outfix->maxOffset = ROSE_BOUND_INF;
- } else {
- mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
- }
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
- u32 *event_out) {
- if (!mpv_outfix) {
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
+ mpv_outfix->maxOffset = ROSE_BOUND_INF;
+ } else {
+ mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
+ }
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
+ u32 *event_out) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->triggered_puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
- * the caller */
-
- *queue_out = mpv_outfix->get_queue(qif);
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
+ * the caller */
+
+ *queue_out = mpv_outfix->get_queue(qif);
*event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1;
-
- return true; /* failure is not yet an option */
-}
-
-static
-bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
+
+ return true; /* failure is not yet an option */
+}
+
+static
+bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
NFAVertex u,
- const vector<DepthMinMax> &vertexDepths,
- map<u32, DepthMinMax> &depthMap,
+ const vector<DepthMinMax> &vertexDepths,
+ map<u32, DepthMinMax> &depthMap,
map<NFAVertex, set<u32>> &reportMap,
- map<ReportID, u32> &allocated_reports,
- flat_set<u32> &added_lit_ids) {
- const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
+ map<ReportID, u32> &allocated_reports,
+ flat_set<u32> &added_lit_ids) {
+ const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
const size_t index = w[u].index;
assert(index < vertexDepths.size());
const DepthMinMax &d = vertexDepths.at(index);
-
- for (const auto &int_report : w[u].reports) {
- assert(int_report != MO_INVALID_IDX);
-
- u32 lit_id;
- if (!contains(allocated_reports, int_report)) {
- lit_id = tbi.getNewLiteralId();
- added_lit_ids.insert(lit_id);
- allocated_reports[int_report] = lit_id;
- } else {
- lit_id = allocated_reports[int_report];
- }
-
- reportMap[u].insert(lit_id);
-
- if (!contains(depthMap, lit_id)) {
- depthMap[lit_id] = d;
- } else {
- depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
- }
-
+
+ for (const auto &int_report : w[u].reports) {
+ assert(int_report != MO_INVALID_IDX);
+
+ u32 lit_id;
+ if (!contains(allocated_reports, int_report)) {
+ lit_id = tbi.getNewLiteralId();
+ added_lit_ids.insert(lit_id);
+ allocated_reports[int_report] = lit_id;
+ } else {
+ lit_id = allocated_reports[int_report];
+ }
+
+ reportMap[u].insert(lit_id);
+
+ if (!contains(depthMap, lit_id)) {
+ depthMap[lit_id] = d;
+ } else {
+ depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
+ }
+
if (depthMap[lit_id].max > max_anchored_depth) {
- DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
+ DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
depthMap[lit_id].max.str().c_str(),
- tbi.cc.grey.maxAnchoredRegion);
- return false;
- }
- }
-
- return true;
-}
-
-// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
-// added to support anchored NFAs. Assumes that they are a contiguous range at
-// the end of the RoseBuildImpl::literal_info vector.
-static
-void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
- if (lit_ids.empty()) {
- return;
- }
-
+ tbi.cc.grey.maxAnchoredRegion);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
+// added to support anchored NFAs. Assumes that they are a contiguous range at
+// the end of the RoseBuildImpl::literal_info vector.
+static
+void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
+ if (lit_ids.empty()) {
+ return;
+ }
+
DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size());
- // lit_ids should be a contiguous range.
- assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
+ // lit_ids should be a contiguous range.
+ assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
assert(*lit_ids.rbegin() == tbi.literals.size() - 1);
-
+
assert(all_of_in(lit_ids, [&](u32 lit_id) {
return lit_id < tbi.literal_info.size() &&
tbi.literals.at(lit_id).table == ROSE_ANCHORED &&
tbi.literal_info[lit_id].vertices.empty();
}));
-
+
tbi.literals.erase_back(lit_ids.size());
assert(tbi.literals.size() == *lit_ids.begin());
-
- // lit_ids should be at the end of tbi.literal_info.
- assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
- tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
-}
-
-bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
+
+ // lit_ids should be at the end of tbi.literal_info.
+ assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
+ tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
+}
+
+bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
auto vertexDepths = calcDepthsFrom(h, h.start);
-
- map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
- map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
- map<ReportID, u32> allocated_reports; /* report -> literal id */
- flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
-
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+
+ map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
+ map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
+ map<ReportID, u32> allocated_reports; /* report -> literal id */
+ flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
+
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
-
- for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
- if (v == h.accept) {
- continue;
- }
+ reportMap, allocated_reports,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
+
+ for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
+ if (v == h.accept) {
+ continue;
+ }
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports_eod,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- assert(!reportMap.empty());
-
- int rv = addAnchoredNFA(*this, h, reportMap);
- if (rv != ANCHORED_FAIL) {
- assert(rv != ANCHORED_REMAP);
- DEBUG_PRINTF("added anchored nfa\n");
- /* add edges to the rose graph to bubble the match up */
- for (const auto &m : allocated_reports) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- g[v].reports.insert(report);
- }
-
- for (const auto &m : allocated_reports_eod) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- RoseVertex eod = add_vertex(g);
- g[eod].eod_accept = true;
- g[eod].reports.insert(report);
- g[eod].min_offset = g[v].min_offset;
- g[eod].max_offset = g[v].max_offset;
- add_edge(v, eod, g);
- }
-
- return true;
- } else {
- DEBUG_PRINTF("failed to add anchored nfa\n");
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
-}
-
-} // namespace ue2
+ reportMap, allocated_reports_eod,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ assert(!reportMap.empty());
+
+ int rv = addAnchoredNFA(*this, h, reportMap);
+ if (rv != ANCHORED_FAIL) {
+ assert(rv != ANCHORED_REMAP);
+ DEBUG_PRINTF("added anchored nfa\n");
+ /* add edges to the rose graph to bubble the match up */
+ for (const auto &m : allocated_reports) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ g[v].reports.insert(report);
+ }
+
+ for (const auto &m : allocated_reports_eod) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ RoseVertex eod = add_vertex(g);
+ g[eod].eod_accept = true;
+ g[eod].reports.insert(report);
+ g[eod].min_offset = g[v].min_offset;
+ g[eod].max_offset = g[v].max_offset;
+ add_edge(v, eod, g);
+ }
+
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add anchored nfa\n");
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+}
+
+} // namespace ue2