diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:11 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:11 +0300 |
commit | 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch) | |
tree | 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp | |
parent | 1aeb9a455974457866f78722ad98114bafc84e8a (diff) | |
download | ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp | 476 |
1 files changed, 238 insertions, 238 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp index e7b9db416f..d25ac43e87 100644 --- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp +++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,8 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" -#include "util/ue2_graph.h" +#include "util/graph_small_color_map.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include <algorithm> @@ -66,31 +66,31 @@ namespace { * compressAndScore. */ struct LitGraphVertexProps { - LitGraphVertexProps() = default; - explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} + LitGraphVertexProps() = default; + explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} ue2_literal::elem c; // string element (char + bool) size_t index = 0; // managed by ue2_graph }; struct LitGraphEdgeProps { - LitGraphEdgeProps() = default; + LitGraphEdgeProps() = default; explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; size_t index = 0; // managed by ue2_graph }; -struct LitGraph - : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> { - - LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} - - const vertex_descriptor root; - const vertex_descriptor sink; -}; - -typedef LitGraph::vertex_descriptor LitVertex; -typedef LitGraph::edge_descriptor LitEdge; - +struct LitGraph + : public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> { + + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} + + const vertex_descriptor root; + const vertex_descriptor sink; +}; + +typedef LitGraph::vertex_descriptor LitVertex; +typedef LitGraph::edge_descriptor LitEdge; + typedef pair<LitVertex, NFAVertex> VertexPair; typedef std::queue<VertexPair> LitVertexQ; @@ -100,16 +100,16 @@ typedef std::queue<VertexPair> LitVertexQ; /** \brief Dump the literal graph in Graphviz format. */ static UNUSED -void dumpGraph(const char *filename, const LitGraph &lg) { +void dumpGraph(const char *filename, const LitGraph &lg) { ofstream fout(filename); fout << "digraph G {" << endl; for (auto v : vertices_range(lg)) { - fout << lg[v].index; - if (v == lg.root) { + fout << lg[v].index; + if (v == lg.root) { fout << "[label=\"ROOT\"];"; - } else if (v == lg.sink) { + } else if (v == lg.sink) { fout << "[label=\"SINK\"];"; } else { ue2_literal s; @@ -121,9 +121,9 @@ void dumpGraph(const char *filename, const LitGraph &lg) { for (const auto &e : edges_range(lg)) { LitVertex u = source(e, lg), v = target(e, lg); - fout << lg[u].index << " -> " << lg[v].index << "[label=\"" - << lg[e].score << "\"]" - << ";" << endl; + fout << lg[u].index << " -> " << lg[v].index << "[label=\"" + << lg[e].score << "\"]" + << ";" << endl; } fout << "}" << endl; @@ -145,11 +145,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) { } static -LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, - const ue2_literal::elem &c) { +LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, + const ue2_literal::elem &c) { // Check if we already have this in the graph. for (auto v : adjacent_vertices_range(pred, lg)) { - if (v == lg.sink) { + if (v == lg.sink) { continue; } if (lg[v].c == c) { @@ -163,10 +163,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, } static -void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, - const CharReach &cr, NFAVertex v) { - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { +void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, + const CharReach &cr, NFAVertex v) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -174,14 +174,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lv = addToLitGraph(lg, pred, c); + LitVertex lv = addToLitGraph(lg, pred, c); workQ.push(VertexPair(lv, v)); } } static -void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, - const NFAEdge &e) { +void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, + const NFAEdge &e) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); const CharReach &cr = g[v].char_reach; @@ -190,7 +190,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, return; } - addToQueue(workQ, lg, lg.root, cr, u); + addToQueue(workQ, lg, lg.root, cr, u); } static @@ -202,8 +202,8 @@ u32 crCardinality(const CharReach &cr) { } u32 rv = 0; - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -218,10 +218,10 @@ u32 crCardinality(const CharReach &cr) { * identifying vertices connected to the sink and removing their other * out-edges. */ static -void filterLitGraph(LitGraph &lg) { - for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { - remove_out_edge_if(v, [&lg](const LitEdge &e) { - return target(e, lg) != lg.sink; +void filterLitGraph(LitGraph &lg) { + for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { + remove_out_edge_if(v, [&lg](const LitEdge &e) { + return target(e, lg) != lg.sink; }, lg); } @@ -234,12 +234,12 @@ void filterLitGraph(LitGraph &lg) { * from each predecessor of the sink (note: it's a suffix tree except for this * convenience) towards the source, storing each string as we go. */ static -void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) { +void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) { ue2_literal lit; - for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { lit.clear(); - while (u != lg.root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) <= 1); LitGraph::inv_adjacency_iterator ai2, ae2; @@ -283,7 +283,7 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, LitGraph lg; LitVertexQ workQ; - initWorkQueue(workQ, lg, g, e); + initWorkQueue(workQ, lg, g, e); while (!workQ.empty()) { const LitVertex lv = workQ.front().first; @@ -292,18 +292,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, u32 cr_card = crCardinality(cr); size_t numItems = cr_card * in_degree(t, g); - size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; + size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; if (g[t].index == NODE_START) { // reached start, add to literal set - add_edge_if_not_present(lv, lg.sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); goto next_work_elem; } // Expand next vertex if (allowExpand(numItems, committed_count)) { for (auto u : inv_adjacent_vertices_range(t, g)) { - addToQueue(workQ, lg, lv, cr, u); + addToQueue(workQ, lg, lv, cr, u); } goto next_work_elem; } @@ -319,35 +319,35 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lt = addToLitGraph(lg, lv, c); - add_edge_if_not_present(lt, lg.sink, lg); + LitVertex lt = addToLitGraph(lg, lv, c); + add_edge_if_not_present(lt, lg.sink, lg); } goto next_work_elem; } // add to literal set - add_edge_if_not_present(lv, lg.sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); next_work_elem: workQ.pop(); } - filterLitGraph(lg); - //dumpGraph("litgraph.dot", lg); - extractLiterals(lg, s); + filterLitGraph(lg); + //dumpGraph("litgraph.dot", lg); + extractLiterals(lg, s); // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); - DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, + DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, g[source(e, g)].index, g[target(e, g)].index, s.size()); } -bool bad_mixed_sensitivity(const ue2_literal &s) { - /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, - * we should be able to handle it */ - return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; -} - +bool bad_mixed_sensitivity(const ue2_literal &s) { + /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, + * we should be able to handle it */ + return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; +} + static u64a litUniqueness(const string &s) { CharReach seen(s); @@ -412,15 +412,15 @@ u64a calculateScore(const ue2_literal &s) { /** Adds a literal in reverse order, building up a suffix tree. */ static -void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { +void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); ue2_literal suffix; - LitVertex v = lg.root; + LitVertex v = lg.root; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { suffix.push_back(*it); LitVertex w; for (auto v2 : adjacent_vertices_range(v, lg)) { - if (v2 != lg.sink && lg[v2].c == *it) { + if (v2 != lg.sink && lg[v2].c == *it) { w = v2; goto next_char; } @@ -432,18 +432,18 @@ next_char: } // Wire the last vertex to the sink. - add_edge(v, lg.sink, lg); + add_edge(v, lg.sink, lg); } static void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg, - set<ue2_literal> &s) { + set<ue2_literal> &s) { for (const auto &e : cutset) { - LitVertex u = source(e, lg); - LitVertex v = target(e, lg); + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); ue2_literal lit; lit.push_back(lg[v].c); - while (u != lg.root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) == 1); LitGraph::inv_adjacency_iterator ai, ae; @@ -463,13 +463,13 @@ next_literal: #ifdef DEBUG static UNUSED -const char *describeColor(small_color c) { +const char *describeColor(small_color c) { switch (c) { - case small_color::white: + case small_color::white: return "white"; - case small_color::gray: + case small_color::gray: return "gray"; - case small_color::black: + case small_color::black: return "black"; default: return "unknown"; @@ -479,90 +479,90 @@ const char *describeColor(small_color c) { /** * The BGL's boykov_kolmogorov_max_flow requires that all edges have their - * reverse edge in the graph. This function adds them, returning a vector - * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there - * should be no existing reverse_edges. + * reverse edge in the graph. This function adds them, returning a vector + * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there + * should be no existing reverse_edges. */ static -vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { - const size_t edge_count = num_edges(lg); - vector<LitEdge> fwd_edges; - fwd_edges.reserve(edge_count); - for (const auto &e : edges_range(lg)) { - fwd_edges.push_back(e); - } +vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) { + const size_t edge_count = num_edges(lg); + vector<LitEdge> fwd_edges; + fwd_edges.reserve(edge_count); + for (const auto &e : edges_range(lg)) { + fwd_edges.push_back(e); + } - vector<LitEdge> rev_map(2 * edge_count); + vector<LitEdge> rev_map(2 * edge_count); - for (const auto &e : fwd_edges) { - LitVertex u = source(e, lg); - LitVertex v = target(e, lg); + for (const auto &e : fwd_edges) { + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); - assert(!edge(v, u, lg).second); + assert(!edge(v, u, lg).second); - LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; - rev_map[lg[e].index] = rev; - rev_map[lg[rev].index] = e; + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; + rev_map[lg[e].index] = rev; + rev_map[lg[rev].index] = e; } - return rev_map; + return rev_map; } static -void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) { +void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) { cutset.clear(); - //dumpGraph("litgraph.dot", lg); + //dumpGraph("litgraph.dot", lg); - assert(!in_degree(lg.root, lg)); - assert(!out_degree(lg.sink, lg)); - size_t num_real_edges = num_edges(lg); + assert(!in_degree(lg.root, lg)); + assert(!out_degree(lg.sink, lg)); + size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. - vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg); + vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg); - const auto v_index_map = get(&LitGraphVertexProps::index, lg); - const auto e_index_map = get(&LitGraphEdgeProps::index, lg); + const auto v_index_map = get(&LitGraphVertexProps::index, lg); + const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); - auto colors = make_small_color_map(lg); + auto colors = make_small_color_map(lg); vector<s32> distances(num_verts); vector<LitEdge> predecessors(num_verts); - vector<u64a> residuals(num_edges(lg)); + vector<u64a> residuals(num_edges(lg)); UNUSED u64a flow = boykov_kolmogorov_max_flow(lg, get(&LitGraphEdgeProps::score, lg), - make_iterator_property_map(residuals.begin(), e_index_map), - make_iterator_property_map(rev_edges.begin(), e_index_map), + make_iterator_property_map(residuals.begin(), e_index_map), + make_iterator_property_map(rev_edges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), - colors, + colors, make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, lg.root, lg.sink); + v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); - /* remove reverse edges */ - remove_edge_if([&](const LitEdge &e) { - return lg[e].index >= num_real_edges; - }, lg); + /* remove reverse edges */ + remove_edge_if([&](const LitEdge &e) { + return lg[e].index >= num_real_edges; + }, lg); vector<LitEdge> white_cut, black_cut; u64a white_flow = 0, black_flow = 0; for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = get(colors, u); - const auto vcolor = get(colors, v); + const auto ucolor = get(colors, u); + const auto vcolor = get(colors, v); - DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, - describeColor(ucolor), lg[v].index, describeColor(vcolor), + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, + describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); - if (ucolor != small_color::white && vcolor == small_color::white) { - assert(v != lg.sink); + if (ucolor != small_color::white && vcolor == small_color::white) { + assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } - if (ucolor == small_color::black && vcolor != small_color::black) { - assert(v != lg.sink); + if (ucolor == small_color::black && vcolor != small_color::black) { + assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; } @@ -604,17 +604,17 @@ u64a compressAndScore(set<ue2_literal> &s) { LitGraph lg; for (const auto &lit : s) { - addReversedLiteral(lit, lg); + addReversedLiteral(lit, lg); } DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", num_vertices(lg), num_edges(lg)); vector<LitEdge> cutset; - findMinCut(lg, cutset); + findMinCut(lg, cutset); s.clear(); - extractLiterals(cutset, lg, s); + extractLiterals(cutset, lg, s); u64a score = scoreSet(s); DEBUG_PRINTF("compressed score is %llu\n", score); @@ -622,48 +622,48 @@ u64a compressAndScore(set<ue2_literal> &s) { return score; } -/* like compressAndScore, but replaces long mixed sensitivity literals with - * something weaker. */ -u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { - const size_t maxExploded = 8; // only case-explode this far - - /* TODO: the whole compression thing could be made better by systematically - * considering replacing literal sets not just by common suffixes but also - * by nocase literals. */ - - vector<ue2_literal> replacements; - - for (auto it = lits.begin(); it != lits.end();) { - auto jt = it; - ++it; - - if (!bad_mixed_sensitivity(*jt)) { - continue; - } - - /* we have to replace *jt with something... */ - ue2_literal s = *jt; - lits.erase(jt); - - vector<ue2_literal> exploded; - for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) { - exploded.emplace_back(*cit, false); - if (exploded.size() > maxExploded) { - goto dont_explode; - } - } - insert(&replacements, replacements.end(), exploded); - - continue; - dont_explode: - make_nocase(&s); - replacements.push_back(s); - } - - insert(&lits, replacements); - return compressAndScore(lits); -} - +/* like compressAndScore, but replaces long mixed sensitivity literals with + * something weaker. */ +u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) { + const size_t maxExploded = 8; // only case-explode this far + + /* TODO: the whole compression thing could be made better by systematically + * considering replacing literal sets not just by common suffixes but also + * by nocase literals. */ + + vector<ue2_literal> replacements; + + for (auto it = lits.begin(); it != lits.end();) { + auto jt = it; + ++it; + + if (!bad_mixed_sensitivity(*jt)) { + continue; + } + + /* we have to replace *jt with something... */ + ue2_literal s = *jt; + lits.erase(jt); + + vector<ue2_literal> exploded; + for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) { + exploded.emplace_back(*cit, false); + if (exploded.size() > maxExploded) { + goto dont_explode; + } + } + insert(&replacements, replacements.end(), exploded); + + continue; + dont_explode: + make_nocase(&s); + replacements.push_back(s); + } + + insert(&lits, replacements); + return compressAndScore(lits); +} + u64a scoreSet(const set<ue2_literal> &s) { if (s.empty()) { return NO_LITERAL_AT_EDGE_SCORE; @@ -714,7 +714,7 @@ set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, return s; } -vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { +vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { assert(hasCorrectlyNumberedEdges(g)); vector<u64a> scores(num_edges(g)); @@ -722,43 +722,43 @@ vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) { for (const auto &e : edges_range(g)) { u32 eidx = g[e].index; assert(eidx < scores.size()); - if (contains(known_bad, e)) { - scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; - } else { - set<ue2_literal> ls = getLiteralSet(g, e); - scores[eidx] = compressAndScore(ls); - } + if (contains(known_bad, e)) { + scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; + } else { + set<ue2_literal> ls = getLiteralSet(g, e); + scores[eidx] = compressAndScore(ls); + } } return scores; } -bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - DEBUG_PRINTF("looking for leading floating literal\n"); - set<NFAVertex> s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); +bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, + NGHolder *rhs) { + DEBUG_PRINTF("looking for leading floating literal\n"); + set<NFAVertex> s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); - set<NFAVertex> sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); + set<NFAVertex> sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); - bool floating = is_subset_of(s_succ, sds_succ); - if (!floating) { - DEBUG_PRINTF("not floating\n"); - return false; - } + bool floating = is_subset_of(s_succ, sds_succ); + if (!floating) { + DEBUG_PRINTF("not floating\n"); + return false; + } - sds_succ.erase(g.startDs); - if (sds_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } + sds_succ.erase(g.startDs); + if (sds_succ.size() != 1) { + DEBUG_PRINTF("branchy root\n"); + return false; + } - NFAVertex u = g.startDs; - NFAVertex v = *sds_succ.begin(); + NFAVertex u = g.startDs; + NFAVertex v = *sds_succ.begin(); while (true) { - DEBUG_PRINTF("validating vertex %zu\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod && v != g.accept); @@ -811,8 +811,8 @@ bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, } assert(u != g.startDs); - unordered_map<NFAVertex, NFAVertex> rhs_map; - vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g)); + unordered_map<NFAVertex, NFAVertex> rhs_map; + vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g)); splitRHS(g, pivots, rhs, &rhs_map); DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), @@ -849,49 +849,49 @@ bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { return true; } -bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { - NFAVertex v = g.accept; - - for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { - NGHolder::inv_adjacency_iterator ai, ae; - tie(ai, ae) = inv_adjacent_vertices(v, g); - if (ai == ae) { - assert(0); // no predecessors? - return false; - } - v = *ai++; - if (ai != ae) { - DEBUG_PRINTF("branch, fail\n"); - return false; - } - - if (is_special(v, g)) { - DEBUG_PRINTF("special found, fail\n"); - return false; - } - - const CharReach &cr_g = g[v].char_reach; - const CharReach &cr_l = *it; - - if (!cr_l.isSubsetOf(cr_g)) { - /* running over the prefix is needed to prevent false postives */ - DEBUG_PRINTF("reach fail\n"); - return false; - } - } - - // Our last value for v should have only start states for predecessors. - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_any_start(u, g)) { - DEBUG_PRINTF("pred is not start\n"); - return false; - } - } - - assert(num_vertices(g) == lit.length() + N_SPECIALS); - - DEBUG_PRINTF("ok\n"); - return true; -} - +bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { + NFAVertex v = g.accept; + + for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { + NGHolder::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(v, g); + if (ai == ae) { + assert(0); // no predecessors? + return false; + } + v = *ai++; + if (ai != ae) { + DEBUG_PRINTF("branch, fail\n"); + return false; + } + + if (is_special(v, g)) { + DEBUG_PRINTF("special found, fail\n"); + return false; + } + + const CharReach &cr_g = g[v].char_reach; + const CharReach &cr_l = *it; + + if (!cr_l.isSubsetOf(cr_g)) { + /* running over the prefix is needed to prevent false postives */ + DEBUG_PRINTF("reach fail\n"); + return false; + } + } + + // Our last value for v should have only start states for predecessors. + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_any_start(u, g)) { + DEBUG_PRINTF("pred is not start\n"); + return false; + } + } + + assert(num_vertices(g) == lit.length() + N_SPECIALS); + + DEBUG_PRINTF("ok\n"); + return true; +} + } // namespace ue2 |