aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:11 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:11 +0300
commit5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
parent1aeb9a455974457866f78722ad98114bafc84e8a (diff)
downloadydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp')
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp476
1 files changed, 238 insertions, 238 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
index e7b9db416f..d25ac43e87 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -40,8 +40,8 @@
#include "util/depth.h"
#include "util/graph.h"
#include "util/graph_range.h"
-#include "util/graph_small_color_map.h"
-#include "util/ue2_graph.h"
+#include "util/graph_small_color_map.h"
+#include "util/ue2_graph.h"
#include "util/ue2string.h"
#include <algorithm>
@@ -66,31 +66,31 @@ namespace {
* compressAndScore. */
struct LitGraphVertexProps {
- LitGraphVertexProps() = default;
- explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
+ LitGraphVertexProps() = default;
+ explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
ue2_literal::elem c; // string element (char + bool)
size_t index = 0; // managed by ue2_graph
};
struct LitGraphEdgeProps {
- LitGraphEdgeProps() = default;
+ LitGraphEdgeProps() = default;
explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
u64a score = NO_LITERAL_AT_EDGE_SCORE;
size_t index = 0; // managed by ue2_graph
};
-struct LitGraph
- : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> {
-
- LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {}
-
- const vertex_descriptor root;
- const vertex_descriptor sink;
-};
-
-typedef LitGraph::vertex_descriptor LitVertex;
-typedef LitGraph::edge_descriptor LitEdge;
-
+struct LitGraph
+ : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> {
+
+ LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {}
+
+ const vertex_descriptor root;
+ const vertex_descriptor sink;
+};
+
+typedef LitGraph::vertex_descriptor LitVertex;
+typedef LitGraph::edge_descriptor LitEdge;
+
typedef pair<LitVertex, NFAVertex> VertexPair;
typedef std::queue<VertexPair> LitVertexQ;
@@ -100,16 +100,16 @@ typedef std::queue<VertexPair> LitVertexQ;
/** \brief Dump the literal graph in Graphviz format. */
static UNUSED
-void dumpGraph(const char *filename, const LitGraph &lg) {
+void dumpGraph(const char *filename, const LitGraph &lg) {
ofstream fout(filename);
fout << "digraph G {" << endl;
for (auto v : vertices_range(lg)) {
- fout << lg[v].index;
- if (v == lg.root) {
+ fout << lg[v].index;
+ if (v == lg.root) {
fout << "[label=\"ROOT\"];";
- } else if (v == lg.sink) {
+ } else if (v == lg.sink) {
fout << "[label=\"SINK\"];";
} else {
ue2_literal s;
@@ -121,9 +121,9 @@ void dumpGraph(const char *filename, const LitGraph &lg) {
for (const auto &e : edges_range(lg)) {
LitVertex u = source(e, lg), v = target(e, lg);
- fout << lg[u].index << " -> " << lg[v].index << "[label=\""
- << lg[e].score << "\"]"
- << ";" << endl;
+ fout << lg[u].index << " -> " << lg[v].index << "[label=\""
+ << lg[e].score << "\"]"
+ << ";" << endl;
}
fout << "}" << endl;
@@ -145,11 +145,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
}
static
-LitVertex addToLitGraph(LitGraph &lg, LitVertex pred,
- const ue2_literal::elem &c) {
+LitVertex addToLitGraph(LitGraph &lg, LitVertex pred,
+ const ue2_literal::elem &c) {
// Check if we already have this in the graph.
for (auto v : adjacent_vertices_range(pred, lg)) {
- if (v == lg.sink) {
+ if (v == lg.sink) {
continue;
}
if (lg[v].c == c) {
@@ -163,10 +163,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex pred,
}
static
-void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred,
- const CharReach &cr, NFAVertex v) {
- for (size_t i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
+void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred,
+ const CharReach &cr, NFAVertex v) {
+ for (size_t i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
if (myisupper(i) && cr.test(mytolower(i))) {
// ignore upper half of a nocase pair
continue;
@@ -174,14 +174,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred,
bool nocase = myislower(i) && cr.test(mytoupper(i));
ue2_literal::elem c((char)i, nocase);
- LitVertex lv = addToLitGraph(lg, pred, c);
+ LitVertex lv = addToLitGraph(lg, pred, c);
workQ.push(VertexPair(lv, v));
}
}
static
-void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g,
- const NFAEdge &e) {
+void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g,
+ const NFAEdge &e) {
NFAVertex u = source(e, g);
NFAVertex v = target(e, g);
const CharReach &cr = g[v].char_reach;
@@ -190,7 +190,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g,
return;
}
- addToQueue(workQ, lg, lg.root, cr, u);
+ addToQueue(workQ, lg, lg.root, cr, u);
}
static
@@ -202,8 +202,8 @@ u32 crCardinality(const CharReach &cr) {
}
u32 rv = 0;
- for (size_t i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
+ for (size_t i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
if (myisupper(i) && cr.test(mytolower(i))) {
// ignore upper half of a nocase pair
continue;
@@ -218,10 +218,10 @@ u32 crCardinality(const CharReach &cr) {
* identifying vertices connected to the sink and removing their other
* out-edges. */
static
-void filterLitGraph(LitGraph &lg) {
- for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) {
- remove_out_edge_if(v, [&lg](const LitEdge &e) {
- return target(e, lg) != lg.sink;
+void filterLitGraph(LitGraph &lg) {
+ for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) {
+ remove_out_edge_if(v, [&lg](const LitEdge &e) {
+ return target(e, lg) != lg.sink;
}, lg);
}
@@ -234,12 +234,12 @@ void filterLitGraph(LitGraph &lg) {
* from each predecessor of the sink (note: it's a suffix tree except for this
* convenience) towards the source, storing each string as we go. */
static
-void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) {
+void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) {
ue2_literal lit;
- for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) {
+ for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) {
lit.clear();
- while (u != lg.root) {
+ while (u != lg.root) {
lit.push_back(lg[u].c);
assert(in_degree(u, lg) <= 1);
LitGraph::inv_adjacency_iterator ai2, ae2;
@@ -283,7 +283,7 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e,
LitGraph lg;
LitVertexQ workQ;
- initWorkQueue(workQ, lg, g, e);
+ initWorkQueue(workQ, lg, g, e);
while (!workQ.empty()) {
const LitVertex lv = workQ.front().first;
@@ -292,18 +292,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e,
u32 cr_card = crCardinality(cr);
size_t numItems = cr_card * in_degree(t, g);
- size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1;
+ size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1;
if (g[t].index == NODE_START) {
// reached start, add to literal set
- add_edge_if_not_present(lv, lg.sink, lg);
+ add_edge_if_not_present(lv, lg.sink, lg);
goto next_work_elem;
}
// Expand next vertex
if (allowExpand(numItems, committed_count)) {
for (auto u : inv_adjacent_vertices_range(t, g)) {
- addToQueue(workQ, lg, lv, cr, u);
+ addToQueue(workQ, lg, lv, cr, u);
}
goto next_work_elem;
}
@@ -319,35 +319,35 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e,
bool nocase = myislower(i) && cr.test(mytoupper(i));
ue2_literal::elem c((char)i, nocase);
- LitVertex lt = addToLitGraph(lg, lv, c);
- add_edge_if_not_present(lt, lg.sink, lg);
+ LitVertex lt = addToLitGraph(lg, lv, c);
+ add_edge_if_not_present(lt, lg.sink, lg);
}
goto next_work_elem;
}
// add to literal set
- add_edge_if_not_present(lv, lg.sink, lg);
+ add_edge_if_not_present(lv, lg.sink, lg);
next_work_elem:
workQ.pop();
}
- filterLitGraph(lg);
- //dumpGraph("litgraph.dot", lg);
- extractLiterals(lg, s);
+ filterLitGraph(lg);
+ //dumpGraph("litgraph.dot", lg);
+ extractLiterals(lg, s);
// Our literal set should contain no literal that is a suffix of another.
assert(!hasSuffixLiterals(s));
- DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index,
+ DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index,
g[source(e, g)].index, g[target(e, g)].index, s.size());
}
-bool bad_mixed_sensitivity(const ue2_literal &s) {
- /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end,
- * we should be able to handle it */
- return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH;
-}
-
+bool bad_mixed_sensitivity(const ue2_literal &s) {
+ /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end,
+ * we should be able to handle it */
+ return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH;
+}
+
static
u64a litUniqueness(const string &s) {
CharReach seen(s);
@@ -412,15 +412,15 @@ u64a calculateScore(const ue2_literal &s) {
/** Adds a literal in reverse order, building up a suffix tree. */
static
-void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) {
+void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) {
DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
ue2_literal suffix;
- LitVertex v = lg.root;
+ LitVertex v = lg.root;
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
suffix.push_back(*it);
LitVertex w;
for (auto v2 : adjacent_vertices_range(v, lg)) {
- if (v2 != lg.sink && lg[v2].c == *it) {
+ if (v2 != lg.sink && lg[v2].c == *it) {
w = v2;
goto next_char;
}
@@ -432,18 +432,18 @@ next_char:
}
// Wire the last vertex to the sink.
- add_edge(v, lg.sink, lg);
+ add_edge(v, lg.sink, lg);
}
static
void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
- set<ue2_literal> &s) {
+ set<ue2_literal> &s) {
for (const auto &e : cutset) {
- LitVertex u = source(e, lg);
- LitVertex v = target(e, lg);
+ LitVertex u = source(e, lg);
+ LitVertex v = target(e, lg);
ue2_literal lit;
lit.push_back(lg[v].c);
- while (u != lg.root) {
+ while (u != lg.root) {
lit.push_back(lg[u].c);
assert(in_degree(u, lg) == 1);
LitGraph::inv_adjacency_iterator ai, ae;
@@ -463,13 +463,13 @@ next_literal:
#ifdef DEBUG
static UNUSED
-const char *describeColor(small_color c) {
+const char *describeColor(small_color c) {
switch (c) {
- case small_color::white:
+ case small_color::white:
return "white";
- case small_color::gray:
+ case small_color::gray:
return "gray";
- case small_color::black:
+ case small_color::black:
return "black";
default:
return "unknown";
@@ -479,90 +479,90 @@ const char *describeColor(small_color c) {
/**
* The BGL's boykov_kolmogorov_max_flow requires that all edges have their
- * reverse edge in the graph. This function adds them, returning a vector
- * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there
- * should be no existing reverse_edges.
+ * reverse edge in the graph. This function adds them, returning a vector
+ * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there
+ * should be no existing reverse_edges.
*/
static
-vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
- const size_t edge_count = num_edges(lg);
- vector<LitEdge> fwd_edges;
- fwd_edges.reserve(edge_count);
- for (const auto &e : edges_range(lg)) {
- fwd_edges.push_back(e);
- }
+vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
+ const size_t edge_count = num_edges(lg);
+ vector<LitEdge> fwd_edges;
+ fwd_edges.reserve(edge_count);
+ for (const auto &e : edges_range(lg)) {
+ fwd_edges.push_back(e);
+ }
- vector<LitEdge> rev_map(2 * edge_count);
+ vector<LitEdge> rev_map(2 * edge_count);
- for (const auto &e : fwd_edges) {
- LitVertex u = source(e, lg);
- LitVertex v = target(e, lg);
+ for (const auto &e : fwd_edges) {
+ LitVertex u = source(e, lg);
+ LitVertex v = target(e, lg);
- assert(!edge(v, u, lg).second);
+ assert(!edge(v, u, lg).second);
- LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first;
- rev_map[lg[e].index] = rev;
- rev_map[lg[rev].index] = e;
+ LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first;
+ rev_map[lg[e].index] = rev;
+ rev_map[lg[rev].index] = e;
}
- return rev_map;
+ return rev_map;
}
static
-void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) {
+void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) {
cutset.clear();
- //dumpGraph("litgraph.dot", lg);
+ //dumpGraph("litgraph.dot", lg);
- assert(!in_degree(lg.root, lg));
- assert(!out_degree(lg.sink, lg));
- size_t num_real_edges = num_edges(lg);
+ assert(!in_degree(lg.root, lg));
+ assert(!out_degree(lg.sink, lg));
+ size_t num_real_edges = num_edges(lg);
// Add reverse edges for the convenience of the BGL's max flow algorithm.
- vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg);
+ vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg);
- const auto v_index_map = get(&LitGraphVertexProps::index, lg);
- const auto e_index_map = get(&LitGraphEdgeProps::index, lg);
+ const auto v_index_map = get(&LitGraphVertexProps::index, lg);
+ const auto e_index_map = get(&LitGraphEdgeProps::index, lg);
const size_t num_verts = num_vertices(lg);
- auto colors = make_small_color_map(lg);
+ auto colors = make_small_color_map(lg);
vector<s32> distances(num_verts);
vector<LitEdge> predecessors(num_verts);
- vector<u64a> residuals(num_edges(lg));
+ vector<u64a> residuals(num_edges(lg));
UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
get(&LitGraphEdgeProps::score, lg),
- make_iterator_property_map(residuals.begin(), e_index_map),
- make_iterator_property_map(rev_edges.begin(), e_index_map),
+ make_iterator_property_map(residuals.begin(), e_index_map),
+ make_iterator_property_map(rev_edges.begin(), e_index_map),
make_iterator_property_map(predecessors.begin(), v_index_map),
- colors,
+ colors,
make_iterator_property_map(distances.begin(), v_index_map),
- v_index_map, lg.root, lg.sink);
+ v_index_map, lg.root, lg.sink);
DEBUG_PRINTF("done, flow = %llu\n", flow);
- /* remove reverse edges */
- remove_edge_if([&](const LitEdge &e) {
- return lg[e].index >= num_real_edges;
- }, lg);
+ /* remove reverse edges */
+ remove_edge_if([&](const LitEdge &e) {
+ return lg[e].index >= num_real_edges;
+ }, lg);
vector<LitEdge> white_cut, black_cut;
u64a white_flow = 0, black_flow = 0;
for (const auto &e : edges_range(lg)) {
const LitVertex u = source(e, lg), v = target(e, lg);
- const auto ucolor = get(colors, u);
- const auto vcolor = get(colors, v);
+ const auto ucolor = get(colors, u);
+ const auto vcolor = get(colors, v);
- DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index,
- describeColor(ucolor), lg[v].index, describeColor(vcolor),
+ DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index,
+ describeColor(ucolor), lg[v].index, describeColor(vcolor),
lg[e].score);
- if (ucolor != small_color::white && vcolor == small_color::white) {
- assert(v != lg.sink);
+ if (ucolor != small_color::white && vcolor == small_color::white) {
+ assert(v != lg.sink);
white_cut.push_back(e);
white_flow += lg[e].score;
}
- if (ucolor == small_color::black && vcolor != small_color::black) {
- assert(v != lg.sink);
+ if (ucolor == small_color::black && vcolor != small_color::black) {
+ assert(v != lg.sink);
black_cut.push_back(e);
black_flow += lg[e].score;
}
@@ -604,17 +604,17 @@ u64a compressAndScore(set<ue2_literal> &s) {
LitGraph lg;
for (const auto &lit : s) {
- addReversedLiteral(lit, lg);
+ addReversedLiteral(lit, lg);
}
DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
num_vertices(lg), num_edges(lg));
vector<LitEdge> cutset;
- findMinCut(lg, cutset);
+ findMinCut(lg, cutset);
s.clear();
- extractLiterals(cutset, lg, s);
+ extractLiterals(cutset, lg, s);
u64a score = scoreSet(s);
DEBUG_PRINTF("compressed score is %llu\n", score);
@@ -622,48 +622,48 @@ u64a compressAndScore(set<ue2_literal> &s) {
return score;
}
-/* like compressAndScore, but replaces long mixed sensitivity literals with
- * something weaker. */
-u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
- const size_t maxExploded = 8; // only case-explode this far
-
- /* TODO: the whole compression thing could be made better by systematically
- * considering replacing literal sets not just by common suffixes but also
- * by nocase literals. */
-
- vector<ue2_literal> replacements;
-
- for (auto it = lits.begin(); it != lits.end();) {
- auto jt = it;
- ++it;
-
- if (!bad_mixed_sensitivity(*jt)) {
- continue;
- }
-
- /* we have to replace *jt with something... */
- ue2_literal s = *jt;
- lits.erase(jt);
-
- vector<ue2_literal> exploded;
- for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) {
- exploded.emplace_back(*cit, false);
- if (exploded.size() > maxExploded) {
- goto dont_explode;
- }
- }
- insert(&replacements, replacements.end(), exploded);
-
- continue;
- dont_explode:
- make_nocase(&s);
- replacements.push_back(s);
- }
-
- insert(&lits, replacements);
- return compressAndScore(lits);
-}
-
+/* like compressAndScore, but replaces long mixed sensitivity literals with
+ * something weaker. */
+u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
+ const size_t maxExploded = 8; // only case-explode this far
+
+ /* TODO: the whole compression thing could be made better by systematically
+ * considering replacing literal sets not just by common suffixes but also
+ * by nocase literals. */
+
+ vector<ue2_literal> replacements;
+
+ for (auto it = lits.begin(); it != lits.end();) {
+ auto jt = it;
+ ++it;
+
+ if (!bad_mixed_sensitivity(*jt)) {
+ continue;
+ }
+
+ /* we have to replace *jt with something... */
+ ue2_literal s = *jt;
+ lits.erase(jt);
+
+ vector<ue2_literal> exploded;
+ for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) {
+ exploded.emplace_back(*cit, false);
+ if (exploded.size() > maxExploded) {
+ goto dont_explode;
+ }
+ }
+ insert(&replacements, replacements.end(), exploded);
+
+ continue;
+ dont_explode:
+ make_nocase(&s);
+ replacements.push_back(s);
+ }
+
+ insert(&lits, replacements);
+ return compressAndScore(lits);
+}
+
u64a scoreSet(const set<ue2_literal> &s) {
if (s.empty()) {
return NO_LITERAL_AT_EDGE_SCORE;
@@ -714,7 +714,7 @@ set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
return s;
}
-vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
+vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
assert(hasCorrectlyNumberedEdges(g));
vector<u64a> scores(num_edges(g));
@@ -722,43 +722,43 @@ vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
for (const auto &e : edges_range(g)) {
u32 eidx = g[e].index;
assert(eidx < scores.size());
- if (contains(known_bad, e)) {
- scores[eidx] = NO_LITERAL_AT_EDGE_SCORE;
- } else {
- set<ue2_literal> ls = getLiteralSet(g, e);
- scores[eidx] = compressAndScore(ls);
- }
+ if (contains(known_bad, e)) {
+ scores[eidx] = NO_LITERAL_AT_EDGE_SCORE;
+ } else {
+ set<ue2_literal> ls = getLiteralSet(g, e);
+ scores[eidx] = compressAndScore(ls);
+ }
}
return scores;
}
-bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
- NGHolder *rhs) {
- DEBUG_PRINTF("looking for leading floating literal\n");
- set<NFAVertex> s_succ;
- insert(&s_succ, adjacent_vertices(g.start, g));
+bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
+ NGHolder *rhs) {
+ DEBUG_PRINTF("looking for leading floating literal\n");
+ set<NFAVertex> s_succ;
+ insert(&s_succ, adjacent_vertices(g.start, g));
- set<NFAVertex> sds_succ;
- insert(&sds_succ, adjacent_vertices(g.startDs, g));
+ set<NFAVertex> sds_succ;
+ insert(&sds_succ, adjacent_vertices(g.startDs, g));
- bool floating = is_subset_of(s_succ, sds_succ);
- if (!floating) {
- DEBUG_PRINTF("not floating\n");
- return false;
- }
+ bool floating = is_subset_of(s_succ, sds_succ);
+ if (!floating) {
+ DEBUG_PRINTF("not floating\n");
+ return false;
+ }
- sds_succ.erase(g.startDs);
- if (sds_succ.size() != 1) {
- DEBUG_PRINTF("branchy root\n");
- return false;
- }
+ sds_succ.erase(g.startDs);
+ if (sds_succ.size() != 1) {
+ DEBUG_PRINTF("branchy root\n");
+ return false;
+ }
- NFAVertex u = g.startDs;
- NFAVertex v = *sds_succ.begin();
+ NFAVertex u = g.startDs;
+ NFAVertex v = *sds_succ.begin();
while (true) {
- DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
+ DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
assert(v != g.acceptEod && v != g.accept);
@@ -811,8 +811,8 @@ bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
}
assert(u != g.startDs);
- unordered_map<NFAVertex, NFAVertex> rhs_map;
- vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g));
+ unordered_map<NFAVertex, NFAVertex> rhs_map;
+ vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g));
splitRHS(g, pivots, rhs, &rhs_map);
DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
@@ -849,49 +849,49 @@ bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
return true;
}
-bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
- NFAVertex v = g.accept;
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- NGHolder::inv_adjacency_iterator ai, ae;
- tie(ai, ae) = inv_adjacent_vertices(v, g);
- if (ai == ae) {
- assert(0); // no predecessors?
- return false;
- }
- v = *ai++;
- if (ai != ae) {
- DEBUG_PRINTF("branch, fail\n");
- return false;
- }
-
- if (is_special(v, g)) {
- DEBUG_PRINTF("special found, fail\n");
- return false;
- }
-
- const CharReach &cr_g = g[v].char_reach;
- const CharReach &cr_l = *it;
-
- if (!cr_l.isSubsetOf(cr_g)) {
- /* running over the prefix is needed to prevent false postives */
- DEBUG_PRINTF("reach fail\n");
- return false;
- }
- }
-
- // Our last value for v should have only start states for predecessors.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_any_start(u, g)) {
- DEBUG_PRINTF("pred is not start\n");
- return false;
- }
- }
-
- assert(num_vertices(g) == lit.length() + N_SPECIALS);
-
- DEBUG_PRINTF("ok\n");
- return true;
-}
-
+bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
+ NFAVertex v = g.accept;
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ NGHolder::inv_adjacency_iterator ai, ae;
+ tie(ai, ae) = inv_adjacent_vertices(v, g);
+ if (ai == ae) {
+ assert(0); // no predecessors?
+ return false;
+ }
+ v = *ai++;
+ if (ai != ae) {
+ DEBUG_PRINTF("branch, fail\n");
+ return false;
+ }
+
+ if (is_special(v, g)) {
+ DEBUG_PRINTF("special found, fail\n");
+ return false;
+ }
+
+ const CharReach &cr_g = g[v].char_reach;
+ const CharReach &cr_l = *it;
+
+ if (!cr_l.isSubsetOf(cr_g)) {
+ /* running over the prefix is needed to prevent false postives */
+ DEBUG_PRINTF("reach fail\n");
+ return false;
+ }
+ }
+
+ // Our last value for v should have only start states for predecessors.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_any_start(u, g)) {
+ DEBUG_PRINTF("pred is not start\n");
+ return false;
+ }
+ }
+
+ assert(num_vertices(g) == lit.length() + N_SPECIALS);
+
+ DEBUG_PRINTF("ok\n");
+ return true;
+}
+
} // namespace ue2