aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
treed5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
parent1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
downloadydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp')
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp1176
1 files changed, 588 insertions, 588 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
index d0540d79b0..10e1cbfa5f 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
@@ -1,68 +1,68 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose compile-time analysis for lookaround masks.
- */
-#include "rose_build_lookaround.h"
-
-#include "rose_build_impl.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/rdfa.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose compile-time analysis for lookaround masks.
+ */
+#include "rose_build_lookaround.h"
+
+#include "rose_build_impl.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/rdfa.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/flat_containers.h"
-#include "util/verify_types.h"
-
-#include <cstdlib>
-#include <queue>
+#include "util/verify_types.h"
+
+#include <cstdlib>
+#include <queue>
#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Max search distance for reachability in front of a role. */
-static const u32 MAX_FWD_LEN = 64;
-
-/** \brief Max search distance for reachability behind a role. */
-static const u32 MAX_BACK_LEN = 64;
-
-/** \brief Max lookaround entries for a role. */
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Max search distance for reachability in front of a role. */
+static const u32 MAX_FWD_LEN = 64;
+
+/** \brief Max search distance for reachability behind a role. */
+static const u32 MAX_BACK_LEN = 64;
+
+/** \brief Max lookaround entries for a role. */
static const u32 MAX_LOOKAROUND_ENTRIES = 32;
-
-/** \brief We would rather have lookarounds with smaller reach than this. */
-static const u32 LOOKAROUND_WIDE_REACH = 200;
-
+
+/** \brief We would rather have lookarounds with smaller reach than this. */
+static const u32 LOOKAROUND_WIDE_REACH = 200;
+
#if defined(DEBUG) || defined(DUMP_SUPPORT)
static UNUSED
string dump(const map<s32, CharReach> &look) {
@@ -77,389 +77,389 @@ string dump(const map<s32, CharReach> &look) {
}
#endif
-static
-void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
+static
+void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- // Consider only successors of start with the required top.
- for (const auto &e : out_edges_range(g.start, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
+
+ // Consider only successors of start with the required top.
+ for (const auto &e : out_edges_range(g.start, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
if (contains(g[e].tops, top)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- if (curr.empty() || contains(curr, g.accept) ||
- contains(curr, g.acceptEod)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ if (curr.empty() || contains(curr, g.accept) ||
+ contains(curr, g.acceptEod)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (contains(g[v].reports, report)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
- if (curr.empty() || contains(curr, g.start) ||
- contains(curr, g.startDs)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[0 - i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getForwardReach(const CastleProto &castle, u32 top,
- map<s32, CharReach> &look) {
- depth len = castle.repeats.at(top).bounds.min;
- len = min(len, depth(MAX_FWD_LEN));
- assert(len.is_finite());
-
- const CharReach &cr = castle.reach();
- for (u32 i = 0; i < len; i++) {
- look[i] |= cr;
- }
-}
-
-static
-void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
- depth min_depth = depth::infinity();
- for (const auto &m : castle.repeats) {
- const PureRepeat &pr = m.second;
- if (contains(pr.reports, report)) {
- min_depth = min(min_depth, pr.bounds.min);
- }
- }
-
- if (!min_depth.is_finite()) {
- assert(0);
- return;
- }
-
- const CharReach &cr = castle.reach();
- for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
- i++) {
- look[0 - i] |= cr;
- }
-}
-
-static
-void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
- if (rdfa.states.size() < 2) {
- return;
- }
-
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (contains(g[v].reports, report)) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
+ if (curr.empty() || contains(curr, g.start) ||
+ contains(curr, g.startDs)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[0 - i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getForwardReach(const CastleProto &castle, u32 top,
+ map<s32, CharReach> &look) {
+ depth len = castle.repeats.at(top).bounds.min;
+ len = min(len, depth(MAX_FWD_LEN));
+ assert(len.is_finite());
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = 0; i < len; i++) {
+ look[i] |= cr;
+ }
+}
+
+static
+void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
+ depth min_depth = depth::infinity();
+ for (const auto &m : castle.repeats) {
+ const PureRepeat &pr = m.second;
+ if (contains(pr.reports, report)) {
+ min_depth = min(min_depth, pr.bounds.min);
+ }
+ }
+
+ if (!min_depth.is_finite()) {
+ assert(0);
+ return;
+ }
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
+ i++) {
+ look[0 - i] |= cr;
+ }
+}
+
+static
+void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
+ if (rdfa.states.size() < 2) {
+ return;
+ }
+
flat_set<dstate_id_t> curr, next;
- curr.insert(rdfa.start_anchored);
-
- for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
- next.clear();
- CharReach cr;
-
- for (const auto state_id : curr) {
- const dstate &ds = rdfa.states[state_id];
-
- if (!ds.reports.empty() || !ds.reports_eod.empty()) {
- return;
- }
-
- for (unsigned c = 0; c < N_CHARS; c++) {
- dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
- if (succ != DEAD_STATE) {
- cr.set(c);
- next.insert(succ);
- }
- }
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getSuffixForwardReach(const suffix_id &suff, u32 top,
- map<s32, CharReach> &look) {
- if (suff.graph()) {
- getForwardReach(*suff.graph(), top, look);
- } else if (suff.castle()) {
- getForwardReach(*suff.castle(), top, look);
- } else if (suff.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.dfa(), look);
- } else if (suff.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.haig(), look);
- }
-}
-
-static
-void getRoseForwardReach(const left_id &left, u32 top,
- map<s32, CharReach> &look) {
- if (left.graph()) {
- getForwardReach(*left.graph(), top, look);
- } else if (left.castle()) {
- getForwardReach(*left.castle(), top, look);
- } else if (left.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.dfa(), look);
- } else if (left.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.haig(), look);
- }
-}
-
-static
-void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
- map<s32, CharReach> &look) {
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- for (const auto &rlook : rose_look) {
- if (contains(rlook, i)) {
- look[i] |= rlook.at(i);
- } else {
- look[i].setall();
- }
- }
- }
-}
-
-static
-void findForwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].reports.empty()) {
- DEBUG_PRINTF("acceptor\n");
- return;
- }
-
- // Non-leaf vertices can pick up a mask per successor prefix rose
- // engine.
- vector<map<s32, CharReach>> rose_look;
- for (const auto &e : out_edges_range(v, g)) {
- RoseVertex t = target(e, g);
- if (!g[t].left) {
+ curr.insert(rdfa.start_anchored);
+
+ for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
+ next.clear();
+ CharReach cr;
+
+ for (const auto state_id : curr) {
+ const dstate &ds = rdfa.states[state_id];
+
+ if (!ds.reports.empty() || !ds.reports_eod.empty()) {
+ return;
+ }
+
+ for (unsigned c = 0; c < N_CHARS; c++) {
+ dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
+ if (succ != DEAD_STATE) {
+ cr.set(c);
+ next.insert(succ);
+ }
+ }
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getSuffixForwardReach(const suffix_id &suff, u32 top,
+ map<s32, CharReach> &look) {
+ if (suff.graph()) {
+ getForwardReach(*suff.graph(), top, look);
+ } else if (suff.castle()) {
+ getForwardReach(*suff.castle(), top, look);
+ } else if (suff.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.dfa(), look);
+ } else if (suff.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.haig(), look);
+ }
+}
+
+static
+void getRoseForwardReach(const left_id &left, u32 top,
+ map<s32, CharReach> &look) {
+ if (left.graph()) {
+ getForwardReach(*left.graph(), top, look);
+ } else if (left.castle()) {
+ getForwardReach(*left.castle(), top, look);
+ } else if (left.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.dfa(), look);
+ } else if (left.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.haig(), look);
+ }
+}
+
+static
+void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
+ map<s32, CharReach> &look) {
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ for (const auto &rlook : rose_look) {
+ if (contains(rlook, i)) {
+ look[i] |= rlook.at(i);
+ } else {
+ look[i].setall();
+ }
+ }
+ }
+}
+
+static
+void findForwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].reports.empty()) {
+ DEBUG_PRINTF("acceptor\n");
+ return;
+ }
+
+ // Non-leaf vertices can pick up a mask per successor prefix rose
+ // engine.
+ vector<map<s32, CharReach>> rose_look;
+ for (const auto &e : out_edges_range(v, g)) {
+ RoseVertex t = target(e, g);
+ if (!g[t].left) {
DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index);
- return;
- }
- rose_look.push_back(map<s32, CharReach>());
- getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
- }
-
- if (g[v].suffix) {
- DEBUG_PRINTF("suffix engine\n");
- rose_look.push_back(map<s32, CharReach>());
- getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
- }
-
- combineForwardMasks(rose_look, look);
-}
-
-static
-void findBackwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].left) {
- return;
- }
-
- DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
- g[v].left.lag);
-
- if (g[v].left.graph) {
- getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
- g[v].left.lag, look);
- } else if (g[v].left.castle) {
- getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
- g[v].left.lag, look);
- }
-
- // TODO: implement DFA variants if necessary.
-}
-
-static
-void normalise(map<s32, CharReach> &look) {
- // We can erase entries where the reach is "all characters".
- vector<s32> dead;
- for (const auto &m : look) {
- if (m.second.all()) {
- dead.push_back(m.first);
- }
- }
- erase_all(&look, dead);
-}
-
-namespace {
-
-struct LookPriority {
- explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
-
- bool operator()(s32 a, s32 b) const {
- const CharReach &a_reach = look.at(a);
- const CharReach &b_reach = look.at(b);
- if (a_reach.count() != b_reach.count()) {
- return a_reach.count() < b_reach.count();
- }
- return abs(a) < abs(b);
- }
-
-private:
- const map<s32, CharReach> &look;
-};
-
-} // namespace
-
-static
-bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
- for (const auto &m : look) {
- const CharReach &look_cr = m.second;
- if (!overlaps(look_cr, flood_cr)) {
- return false;
- }
- }
- DEBUG_PRINTF("look can't escape flood on %s\n",
- describeClass(flood_cr).c_str());
- return true;
-}
-
-static
-bool isFloodProne(const map<s32, CharReach> &look,
- const set<CharReach> &flood_reach) {
- if (flood_reach.empty()) {
- return false;
- }
-
- for (const CharReach &flood_cr : flood_reach) {
- if (isFloodProne(look, flood_cr)) {
- return true;
- }
- }
-
- return false;
-}
-
-static
-void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
- if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
- return;
- }
-
- DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
-
- // First, remove floods that we already can't escape; they shouldn't affect
- // the analysis below.
- for (auto it = flood_reach.begin(); it != flood_reach.end();) {
- if (isFloodProne(look, *it)) {
- DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
- describeClass(*it).c_str());
- flood_reach.erase(it++);
- } else {
- ++it;
- }
- }
-
- LookPriority cmp(look);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &m : look) {
- pq.push(m.first);
- }
-
- while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- while (!pq.empty()) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- if (cr.count() < LOOKAROUND_WIDE_REACH) {
- continue;
- }
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
-}
-
-static
-void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
- set<CharReach> &flood_reach) {
- for (u32 lit_id : tbi.g[v].literals) {
+ return;
+ }
+ rose_look.push_back(map<s32, CharReach>());
+ getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
+ }
+
+ if (g[v].suffix) {
+ DEBUG_PRINTF("suffix engine\n");
+ rose_look.push_back(map<s32, CharReach>());
+ getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
+ }
+
+ combineForwardMasks(rose_look, look);
+}
+
+static
+void findBackwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].left) {
+ return;
+ }
+
+ DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
+ g[v].left.lag);
+
+ if (g[v].left.graph) {
+ getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ } else if (g[v].left.castle) {
+ getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ }
+
+ // TODO: implement DFA variants if necessary.
+}
+
+static
+void normalise(map<s32, CharReach> &look) {
+ // We can erase entries where the reach is "all characters".
+ vector<s32> dead;
+ for (const auto &m : look) {
+ if (m.second.all()) {
+ dead.push_back(m.first);
+ }
+ }
+ erase_all(&look, dead);
+}
+
+namespace {
+
+struct LookPriority {
+ explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
+
+ bool operator()(s32 a, s32 b) const {
+ const CharReach &a_reach = look.at(a);
+ const CharReach &b_reach = look.at(b);
+ if (a_reach.count() != b_reach.count()) {
+ return a_reach.count() < b_reach.count();
+ }
+ return abs(a) < abs(b);
+ }
+
+private:
+ const map<s32, CharReach> &look;
+};
+
+} // namespace
+
+static
+bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
+ for (const auto &m : look) {
+ const CharReach &look_cr = m.second;
+ if (!overlaps(look_cr, flood_cr)) {
+ return false;
+ }
+ }
+ DEBUG_PRINTF("look can't escape flood on %s\n",
+ describeClass(flood_cr).c_str());
+ return true;
+}
+
+static
+bool isFloodProne(const map<s32, CharReach> &look,
+ const set<CharReach> &flood_reach) {
+ if (flood_reach.empty()) {
+ return false;
+ }
+
+ for (const CharReach &flood_cr : flood_reach) {
+ if (isFloodProne(look, flood_cr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
+ if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
+ return;
+ }
+
+ DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
+
+ // First, remove floods that we already can't escape; they shouldn't affect
+ // the analysis below.
+ for (auto it = flood_reach.begin(); it != flood_reach.end();) {
+ if (isFloodProne(look, *it)) {
+ DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
+ describeClass(*it).c_str());
+ flood_reach.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+
+ LookPriority cmp(look);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &m : look) {
+ pq.push(m.first);
+ }
+
+ while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ while (!pq.empty()) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ if (cr.count() < LOOKAROUND_WIDE_REACH) {
+ continue;
+ }
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
+}
+
+static
+void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
+ set<CharReach> &flood_reach) {
+ for (u32 lit_id : tbi.g[v].literals) {
const ue2_literal &s = tbi.literals.at(lit_id).s;
- if (s.empty()) {
- continue;
- }
- if (is_flood(s)) {
- CharReach cr(*s.begin());
- DEBUG_PRINTF("flood-prone with reach: %s\n",
- describeClass(cr).c_str());
- flood_reach.insert(cr);
- }
- }
-}
-
+ if (s.empty()) {
+ continue;
+ }
+ if (is_flood(s)) {
+ CharReach cr(*s.begin());
+ DEBUG_PRINTF("flood-prone with reach: %s\n",
+ describeClass(cr).c_str());
+ flood_reach.insert(cr);
+ }
+ }
+}
+
namespace {
struct LookProto {
@@ -470,7 +470,7 @@ struct LookProto {
};
}
-static
+static
vector<LookProto> findLiteralReach(const rose_literal_id &lit) {
vector<LookProto> look;
look.reserve(lit.s.length());
@@ -490,15 +490,15 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
bool first = true;
vector<LookProto> look;
- for (u32 lit_id : build.g[v].literals) {
+ for (u32 lit_id : build.g[v].literals) {
const rose_literal_id &lit = build.literals.at(lit_id);
auto lit_look = findLiteralReach(lit);
-
+
if (first) {
look = std::move(lit_look);
first = false;
continue;
- }
+ }
// Erase elements from look with keys not in lit_look. Where a key is
// in both maps, union its reach with the lookaround.
@@ -523,34 +523,34 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
++jt;
}
}
- }
-
- return look;
-}
-
-/**
- * Trim lookaround checks from the prefix that overlap with the literals
- * themselves.
- */
-static
-void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
- map<s32, CharReach> &look) {
- DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
-
- for (const auto &m : findLiteralReach(build, v)) {
+ }
+
+ return look;
+}
+
+/**
+ * Trim lookaround checks from the prefix that overlap with the literals
+ * themselves.
+ */
+static
+void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
+
+ for (const auto &m : findLiteralReach(build, v)) {
auto it = look.find(m.offset);
- if (it == end(look)) {
- continue;
- }
+ if (it == end(look)) {
+ continue;
+ }
if (m.reach.isSubsetOf(it->second)) {
- DEBUG_PRINTF("can trim entry at %d\n", it->first);
- look.erase(it);
- }
- }
-
- DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
-}
-
+ DEBUG_PRINTF("can trim entry at %d\n", it->first);
+ look.erase(it);
+ }
+ }
+
+ DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
+}
+
static
void normaliseLeftfix(map<s32, CharReach> &look) {
// We can erase entries where the reach is "all characters", except for the
@@ -621,44 +621,44 @@ void transToLookaround(const vector<map<s32, CharReach>> &looks,
}
}
-void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
- vector<LookEntry> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = tbi.g;
-
- map<s32, CharReach> look;
- findBackwardReach(g, v, look);
- findForwardReach(g, v, look);
- trimLiterals(tbi, v, look);
-
- if (look.empty()) {
- return;
- }
-
- normalise(look);
-
- if (look.empty()) {
- return;
- }
-
- set<CharReach> flood_reach;
- findFloodReach(tbi, v, flood_reach);
- reduce(look, flood_reach);
-
- if (look.empty()) {
- return;
- }
-
- DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
- lookaround.reserve(look.size());
- for (const auto &m : look) {
- s8 offset = verify_s8(m.first);
- lookaround.emplace_back(offset, m.second);
- }
-}
-
-static
+void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
+ vector<LookEntry> &lookaround) {
+ lookaround.clear();
+
+ const RoseGraph &g = tbi.g;
+
+ map<s32, CharReach> look;
+ findBackwardReach(g, v, look);
+ findForwardReach(g, v, look);
+ trimLiterals(tbi, v, look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ normalise(look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ set<CharReach> flood_reach;
+ findFloodReach(tbi, v, flood_reach);
+ reduce(look, flood_reach);
+
+ if (look.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
+ lookaround.reserve(look.size());
+ for (const auto &m : look) {
+ s8 offset = verify_s8(m.first);
+ lookaround.emplace_back(offset, m.second);
+ }
+}
+
+static
bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
u32 bucket_size) {
set<u32> bucket;
@@ -685,25 +685,25 @@ bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
bucket.insert(hi_lo);
}
}
- }
+ }
DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size());
return bucket.size() <= bucket_size;
}
-
+
static
bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
vector<map<s32, CharReach>> &looks) {
if (!isAcyclic(g)) {
DEBUG_PRINTF("contains back-edge\n");
- return false;
- }
-
+ return false;
+ }
+
// Must be floating chains wired to startDs.
if (!isFloating(g)) {
DEBUG_PRINTF("not a floating start\n");
- return false;
- }
-
+ return false;
+ }
+
vector<NFAVertex> curr;
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (v == g.start || v == g.startDs) {
@@ -737,16 +737,16 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
size_t curr_size = curr.size();
if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) {
DEBUG_PRINTF("range is larger than 16 in multi-path\n");
- return false;
- }
-
+ return false;
+ }
+
for (size_t idx = 0; idx < curr_size; idx++) {
NFAVertex v = curr[idx];
if (v == g.startDs) {
continue;
}
assert(!is_special(v, g));
-
+
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == g.start || u == g.startDs) {
curr[idx] = g.startDs;
@@ -792,88 +792,88 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
u32 bucket_size = total_len > 32 ? 8 : 16;
if (!checkShuftiBuckets(looks, bucket_size)) {
DEBUG_PRINTF("shufti has too many buckets\n");
- return false;
- }
+ return false;
+ }
}
-
+
assert(!looks.empty());
if (looks.size() == 1) {
DEBUG_PRINTF("single lookaround\n");
} else {
DEBUG_PRINTF("multi-path lookaround\n");
- }
- DEBUG_PRINTF("done\n");
- return true;
-}
-
-bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
+ }
+ DEBUG_PRINTF("done\n");
+ return true;
+}
+
+bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
vector<vector<LookEntry>> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = build.g;
- const left_id leftfix(g[v].left);
-
- if (!contains(build.transient, leftfix)) {
- DEBUG_PRINTF("not transient\n");
- return false;
- }
-
- if (!leftfix.graph()) {
- DEBUG_PRINTF("only supported for graphs so far\n");
- return false;
- }
-
+ lookaround.clear();
+
+ const RoseGraph &g = build.g;
+ const left_id leftfix(g[v].left);
+
+ if (!contains(build.transient, leftfix)) {
+ DEBUG_PRINTF("not transient\n");
+ return false;
+ }
+
+ if (!leftfix.graph()) {
+ DEBUG_PRINTF("only supported for graphs so far\n");
+ return false;
+ }
+
vector<map<s32, CharReach>> looks;
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report,
g[v].left.lag, looks)) {
DEBUG_PRINTF("graph has loop or too large\n");
- return false;
- }
-
+ return false;
+ }
+
if (!trimMultipathLeftfix(build, v, looks)) {
- return false;
- }
+ return false;
+ }
transToLookaround(looks, lookaround);
-
+
return !lookaround.empty();
-}
-
-void mergeLookaround(vector<LookEntry> &lookaround,
- const vector<LookEntry> &more_lookaround) {
- if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
- DEBUG_PRINTF("big enough!\n");
- return;
- }
-
- // Don't merge lookarounds at offsets we already have entries for.
+}
+
+void mergeLookaround(vector<LookEntry> &lookaround,
+ const vector<LookEntry> &more_lookaround) {
+ if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
+ DEBUG_PRINTF("big enough!\n");
+ return;
+ }
+
+ // Don't merge lookarounds at offsets we already have entries for.
flat_set<s8> offsets;
- for (const auto &e : lookaround) {
- offsets.insert(e.offset);
- }
-
- map<s32, CharReach> more;
- LookPriority cmp(more);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &e : more_lookaround) {
- if (!contains(offsets, e.offset)) {
- more.emplace(e.offset, e.reach);
- pq.push(e.offset);
- }
- }
-
- while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
- const s32 offset = pq.top();
- pq.pop();
- const auto &cr = more.at(offset);
- DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
- lookaround.emplace_back(verify_s8(offset), cr);
- }
-
- // Order by offset.
- sort(begin(lookaround), end(lookaround),
- [](const LookEntry &a, const LookEntry &b) {
- return a.offset < b.offset;
- });
-}
-
-} // namespace ue2
+ for (const auto &e : lookaround) {
+ offsets.insert(e.offset);
+ }
+
+ map<s32, CharReach> more;
+ LookPriority cmp(more);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &e : more_lookaround) {
+ if (!contains(offsets, e.offset)) {
+ more.emplace(e.offset, e.reach);
+ pq.push(e.offset);
+ }
+ }
+
+ while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
+ const s32 offset = pq.top();
+ pq.pop();
+ const auto &cr = more.at(offset);
+ DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
+ lookaround.emplace_back(verify_s8(offset), cr);
+ }
+
+ // Order by offset.
+ sort(begin(lookaround), end(lookaround),
+ [](const LookEntry &a, const LookEntry &b) {
+ return a.offset < b.offset;
+ });
+}
+
+} // namespace ue2