diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch) | |
tree | d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp | |
parent | 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff) | |
download | ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp | 1176 |
1 files changed, 588 insertions, 588 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp index d0540d79b0..10e1cbfa5f 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp @@ -1,68 +1,68 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose compile-time analysis for lookaround masks. - */ -#include "rose_build_lookaround.h" - -#include "rose_build_impl.h" -#include "nfa/castlecompile.h" -#include "nfa/goughcompile.h" -#include "nfa/rdfa.h" -#include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_util.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose compile-time analysis for lookaround masks. + */ +#include "rose_build_lookaround.h" + +#include "rose_build_impl.h" +#include "nfa/castlecompile.h" +#include "nfa/goughcompile.h" +#include "nfa/rdfa.h" +#include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_util.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" #include "util/flat_containers.h" -#include "util/verify_types.h" - -#include <cstdlib> -#include <queue> +#include "util/verify_types.h" + +#include <cstdlib> +#include <queue> #include <sstream> - -using namespace std; - -namespace ue2 { - -/** \brief Max search distance for reachability in front of a role. */ -static const u32 MAX_FWD_LEN = 64; - -/** \brief Max search distance for reachability behind a role. */ -static const u32 MAX_BACK_LEN = 64; - -/** \brief Max lookaround entries for a role. */ + +using namespace std; + +namespace ue2 { + +/** \brief Max search distance for reachability in front of a role. */ +static const u32 MAX_FWD_LEN = 64; + +/** \brief Max search distance for reachability behind a role. */ +static const u32 MAX_BACK_LEN = 64; + +/** \brief Max lookaround entries for a role. */ static const u32 MAX_LOOKAROUND_ENTRIES = 32; - -/** \brief We would rather have lookarounds with smaller reach than this. */ -static const u32 LOOKAROUND_WIDE_REACH = 200; - + +/** \brief We would rather have lookarounds with smaller reach than this. */ +static const u32 LOOKAROUND_WIDE_REACH = 200; + #if defined(DEBUG) || defined(DUMP_SUPPORT) static UNUSED string dump(const map<s32, CharReach> &look) { @@ -77,389 +77,389 @@ string dump(const map<s32, CharReach> &look) { } #endif -static -void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) { +static +void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) { flat_set<NFAVertex> curr, next; - - // Consider only successors of start with the required top. - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } + + // Consider only successors of start with the required top. + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + if (v == g.startDs) { + continue; + } if (contains(g[e].tops, top)) { - curr.insert(v); - } - } - - for (u32 i = 0; i < MAX_FWD_LEN; i++) { - if (curr.empty() || contains(curr, g.accept) || - contains(curr, g.acceptEod)) { - break; - } - - next.clear(); - CharReach cr; - - for (auto v : curr) { - assert(!is_special(v, g)); - cr |= g[v].char_reach; - insert(&next, adjacent_vertices(v, g)); - } - - assert(cr.any()); - look[i] |= cr; - curr.swap(next); - } -} - -static -void getBackwardReach(const NGHolder &g, ReportID report, u32 lag, - map<s32, CharReach> &look) { + curr.insert(v); + } + } + + for (u32 i = 0; i < MAX_FWD_LEN; i++) { + if (curr.empty() || contains(curr, g.accept) || + contains(curr, g.acceptEod)) { + break; + } + + next.clear(); + CharReach cr; + + for (auto v : curr) { + assert(!is_special(v, g)); + cr |= g[v].char_reach; + insert(&next, adjacent_vertices(v, g)); + } + + assert(cr.any()); + look[i] |= cr; + curr.swap(next); + } +} + +static +void getBackwardReach(const NGHolder &g, ReportID report, u32 lag, + map<s32, CharReach> &look) { flat_set<NFAVertex> curr, next; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (contains(g[v].reports, report)) { - curr.insert(v); - } - } - - for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) { - if (curr.empty() || contains(curr, g.start) || - contains(curr, g.startDs)) { - break; - } - - next.clear(); - CharReach cr; - - for (auto v : curr) { - assert(!is_special(v, g)); - cr |= g[v].char_reach; - insert(&next, inv_adjacent_vertices(v, g)); - } - - assert(cr.any()); - look[0 - i] |= cr; - curr.swap(next); - } -} - -static -void getForwardReach(const CastleProto &castle, u32 top, - map<s32, CharReach> &look) { - depth len = castle.repeats.at(top).bounds.min; - len = min(len, depth(MAX_FWD_LEN)); - assert(len.is_finite()); - - const CharReach &cr = castle.reach(); - for (u32 i = 0; i < len; i++) { - look[i] |= cr; - } -} - -static -void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag, - map<s32, CharReach> &look) { - depth min_depth = depth::infinity(); - for (const auto &m : castle.repeats) { - const PureRepeat &pr = m.second; - if (contains(pr.reports, report)) { - min_depth = min(min_depth, pr.bounds.min); - } - } - - if (!min_depth.is_finite()) { - assert(0); - return; - } - - const CharReach &cr = castle.reach(); - for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN); - i++) { - look[0 - i] |= cr; - } -} - -static -void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) { - if (rdfa.states.size() < 2) { - return; - } - + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (contains(g[v].reports, report)) { + curr.insert(v); + } + } + + for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) { + if (curr.empty() || contains(curr, g.start) || + contains(curr, g.startDs)) { + break; + } + + next.clear(); + CharReach cr; + + for (auto v : curr) { + assert(!is_special(v, g)); + cr |= g[v].char_reach; + insert(&next, inv_adjacent_vertices(v, g)); + } + + assert(cr.any()); + look[0 - i] |= cr; + curr.swap(next); + } +} + +static +void getForwardReach(const CastleProto &castle, u32 top, + map<s32, CharReach> &look) { + depth len = castle.repeats.at(top).bounds.min; + len = min(len, depth(MAX_FWD_LEN)); + assert(len.is_finite()); + + const CharReach &cr = castle.reach(); + for (u32 i = 0; i < len; i++) { + look[i] |= cr; + } +} + +static +void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag, + map<s32, CharReach> &look) { + depth min_depth = depth::infinity(); + for (const auto &m : castle.repeats) { + const PureRepeat &pr = m.second; + if (contains(pr.reports, report)) { + min_depth = min(min_depth, pr.bounds.min); + } + } + + if (!min_depth.is_finite()) { + assert(0); + return; + } + + const CharReach &cr = castle.reach(); + for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN); + i++) { + look[0 - i] |= cr; + } +} + +static +void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) { + if (rdfa.states.size() < 2) { + return; + } + flat_set<dstate_id_t> curr, next; - curr.insert(rdfa.start_anchored); - - for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) { - next.clear(); - CharReach cr; - - for (const auto state_id : curr) { - const dstate &ds = rdfa.states[state_id]; - - if (!ds.reports.empty() || !ds.reports_eod.empty()) { - return; - } - - for (unsigned c = 0; c < N_CHARS; c++) { - dstate_id_t succ = ds.next[rdfa.alpha_remap[c]]; - if (succ != DEAD_STATE) { - cr.set(c); - next.insert(succ); - } - } - } - - assert(cr.any()); - look[i] |= cr; - curr.swap(next); - } -} - -static -void getSuffixForwardReach(const suffix_id &suff, u32 top, - map<s32, CharReach> &look) { - if (suff.graph()) { - getForwardReach(*suff.graph(), top, look); - } else if (suff.castle()) { - getForwardReach(*suff.castle(), top, look); - } else if (suff.dfa()) { - assert(top == 0); // DFA isn't multi-top capable. - getForwardReach(*suff.dfa(), look); - } else if (suff.haig()) { - assert(top == 0); // DFA isn't multi-top capable. - getForwardReach(*suff.haig(), look); - } -} - -static -void getRoseForwardReach(const left_id &left, u32 top, - map<s32, CharReach> &look) { - if (left.graph()) { - getForwardReach(*left.graph(), top, look); - } else if (left.castle()) { - getForwardReach(*left.castle(), top, look); - } else if (left.dfa()) { - assert(top == 0); // DFA isn't multi-top capable. - getForwardReach(*left.dfa(), look); - } else if (left.haig()) { - assert(top == 0); // DFA isn't multi-top capable. - getForwardReach(*left.haig(), look); - } -} - -static -void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look, - map<s32, CharReach> &look) { - for (u32 i = 0; i < MAX_FWD_LEN; i++) { - for (const auto &rlook : rose_look) { - if (contains(rlook, i)) { - look[i] |= rlook.at(i); - } else { - look[i].setall(); - } - } - } -} - -static -void findForwardReach(const RoseGraph &g, const RoseVertex v, - map<s32, CharReach> &look) { - if (!g[v].reports.empty()) { - DEBUG_PRINTF("acceptor\n"); - return; - } - - // Non-leaf vertices can pick up a mask per successor prefix rose - // engine. - vector<map<s32, CharReach>> rose_look; - for (const auto &e : out_edges_range(v, g)) { - RoseVertex t = target(e, g); - if (!g[t].left) { + curr.insert(rdfa.start_anchored); + + for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) { + next.clear(); + CharReach cr; + + for (const auto state_id : curr) { + const dstate &ds = rdfa.states[state_id]; + + if (!ds.reports.empty() || !ds.reports_eod.empty()) { + return; + } + + for (unsigned c = 0; c < N_CHARS; c++) { + dstate_id_t succ = ds.next[rdfa.alpha_remap[c]]; + if (succ != DEAD_STATE) { + cr.set(c); + next.insert(succ); + } + } + } + + assert(cr.any()); + look[i] |= cr; + curr.swap(next); + } +} + +static +void getSuffixForwardReach(const suffix_id &suff, u32 top, + map<s32, CharReach> &look) { + if (suff.graph()) { + getForwardReach(*suff.graph(), top, look); + } else if (suff.castle()) { + getForwardReach(*suff.castle(), top, look); + } else if (suff.dfa()) { + assert(top == 0); // DFA isn't multi-top capable. + getForwardReach(*suff.dfa(), look); + } else if (suff.haig()) { + assert(top == 0); // DFA isn't multi-top capable. + getForwardReach(*suff.haig(), look); + } +} + +static +void getRoseForwardReach(const left_id &left, u32 top, + map<s32, CharReach> &look) { + if (left.graph()) { + getForwardReach(*left.graph(), top, look); + } else if (left.castle()) { + getForwardReach(*left.castle(), top, look); + } else if (left.dfa()) { + assert(top == 0); // DFA isn't multi-top capable. + getForwardReach(*left.dfa(), look); + } else if (left.haig()) { + assert(top == 0); // DFA isn't multi-top capable. + getForwardReach(*left.haig(), look); + } +} + +static +void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look, + map<s32, CharReach> &look) { + for (u32 i = 0; i < MAX_FWD_LEN; i++) { + for (const auto &rlook : rose_look) { + if (contains(rlook, i)) { + look[i] |= rlook.at(i); + } else { + look[i].setall(); + } + } + } +} + +static +void findForwardReach(const RoseGraph &g, const RoseVertex v, + map<s32, CharReach> &look) { + if (!g[v].reports.empty()) { + DEBUG_PRINTF("acceptor\n"); + return; + } + + // Non-leaf vertices can pick up a mask per successor prefix rose + // engine. + vector<map<s32, CharReach>> rose_look; + for (const auto &e : out_edges_range(v, g)) { + RoseVertex t = target(e, g); + if (!g[t].left) { DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index); - return; - } - rose_look.push_back(map<s32, CharReach>()); - getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back()); - } - - if (g[v].suffix) { - DEBUG_PRINTF("suffix engine\n"); - rose_look.push_back(map<s32, CharReach>()); - getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back()); - } - - combineForwardMasks(rose_look, look); -} - -static -void findBackwardReach(const RoseGraph &g, const RoseVertex v, - map<s32, CharReach> &look) { - if (!g[v].left) { - return; - } - - DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report, - g[v].left.lag); - - if (g[v].left.graph) { - getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report, - g[v].left.lag, look); - } else if (g[v].left.castle) { - getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report, - g[v].left.lag, look); - } - - // TODO: implement DFA variants if necessary. -} - -static -void normalise(map<s32, CharReach> &look) { - // We can erase entries where the reach is "all characters". - vector<s32> dead; - for (const auto &m : look) { - if (m.second.all()) { - dead.push_back(m.first); - } - } - erase_all(&look, dead); -} - -namespace { - -struct LookPriority { - explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {} - - bool operator()(s32 a, s32 b) const { - const CharReach &a_reach = look.at(a); - const CharReach &b_reach = look.at(b); - if (a_reach.count() != b_reach.count()) { - return a_reach.count() < b_reach.count(); - } - return abs(a) < abs(b); - } - -private: - const map<s32, CharReach> &look; -}; - -} // namespace - -static -bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) { - for (const auto &m : look) { - const CharReach &look_cr = m.second; - if (!overlaps(look_cr, flood_cr)) { - return false; - } - } - DEBUG_PRINTF("look can't escape flood on %s\n", - describeClass(flood_cr).c_str()); - return true; -} - -static -bool isFloodProne(const map<s32, CharReach> &look, - const set<CharReach> &flood_reach) { - if (flood_reach.empty()) { - return false; - } - - for (const CharReach &flood_cr : flood_reach) { - if (isFloodProne(look, flood_cr)) { - return true; - } - } - - return false; -} - -static -void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) { - if (look.size() <= MAX_LOOKAROUND_ENTRIES) { - return; - } - - DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str()); - - // First, remove floods that we already can't escape; they shouldn't affect - // the analysis below. - for (auto it = flood_reach.begin(); it != flood_reach.end();) { - if (isFloodProne(look, *it)) { - DEBUG_PRINTF("removing inescapable flood on %s from analysis\n", - describeClass(*it).c_str()); - flood_reach.erase(it++); - } else { - ++it; - } - } - - LookPriority cmp(look); - priority_queue<s32, vector<s32>, LookPriority> pq(cmp); - for (const auto &m : look) { - pq.push(m.first); - } - - while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) { - s32 d = pq.top(); - assert(contains(look, d)); - const CharReach cr(look[d]); // copy - pq.pop(); - - DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); - look.erase(d); - - // If removing this entry would result in us becoming flood_prone on a - // particular flood_reach case, reinstate it and move on. - if (isFloodProne(look, flood_reach)) { - DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, - describeClass(cr).c_str()); - look.insert(make_pair(d, cr)); - } - } - - while (!pq.empty()) { - s32 d = pq.top(); - assert(contains(look, d)); - const CharReach cr(look[d]); // copy - pq.pop(); - - if (cr.count() < LOOKAROUND_WIDE_REACH) { - continue; - } - - DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); - look.erase(d); - - // If removing this entry would result in us becoming flood_prone on a - // particular flood_reach case, reinstate it and move on. - if (isFloodProne(look, flood_reach)) { - DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, - describeClass(cr).c_str()); - look.insert(make_pair(d, cr)); - } - } - - DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str()); -} - -static -void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, - set<CharReach> &flood_reach) { - for (u32 lit_id : tbi.g[v].literals) { + return; + } + rose_look.push_back(map<s32, CharReach>()); + getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back()); + } + + if (g[v].suffix) { + DEBUG_PRINTF("suffix engine\n"); + rose_look.push_back(map<s32, CharReach>()); + getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back()); + } + + combineForwardMasks(rose_look, look); +} + +static +void findBackwardReach(const RoseGraph &g, const RoseVertex v, + map<s32, CharReach> &look) { + if (!g[v].left) { + return; + } + + DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report, + g[v].left.lag); + + if (g[v].left.graph) { + getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report, + g[v].left.lag, look); + } else if (g[v].left.castle) { + getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report, + g[v].left.lag, look); + } + + // TODO: implement DFA variants if necessary. +} + +static +void normalise(map<s32, CharReach> &look) { + // We can erase entries where the reach is "all characters". + vector<s32> dead; + for (const auto &m : look) { + if (m.second.all()) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + +namespace { + +struct LookPriority { + explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {} + + bool operator()(s32 a, s32 b) const { + const CharReach &a_reach = look.at(a); + const CharReach &b_reach = look.at(b); + if (a_reach.count() != b_reach.count()) { + return a_reach.count() < b_reach.count(); + } + return abs(a) < abs(b); + } + +private: + const map<s32, CharReach> &look; +}; + +} // namespace + +static +bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) { + for (const auto &m : look) { + const CharReach &look_cr = m.second; + if (!overlaps(look_cr, flood_cr)) { + return false; + } + } + DEBUG_PRINTF("look can't escape flood on %s\n", + describeClass(flood_cr).c_str()); + return true; +} + +static +bool isFloodProne(const map<s32, CharReach> &look, + const set<CharReach> &flood_reach) { + if (flood_reach.empty()) { + return false; + } + + for (const CharReach &flood_cr : flood_reach) { + if (isFloodProne(look, flood_cr)) { + return true; + } + } + + return false; +} + +static +void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) { + if (look.size() <= MAX_LOOKAROUND_ENTRIES) { + return; + } + + DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str()); + + // First, remove floods that we already can't escape; they shouldn't affect + // the analysis below. + for (auto it = flood_reach.begin(); it != flood_reach.end();) { + if (isFloodProne(look, *it)) { + DEBUG_PRINTF("removing inescapable flood on %s from analysis\n", + describeClass(*it).c_str()); + flood_reach.erase(it++); + } else { + ++it; + } + } + + LookPriority cmp(look); + priority_queue<s32, vector<s32>, LookPriority> pq(cmp); + for (const auto &m : look) { + pq.push(m.first); + } + + while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) { + s32 d = pq.top(); + assert(contains(look, d)); + const CharReach cr(look[d]); // copy + pq.pop(); + + DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); + look.erase(d); + + // If removing this entry would result in us becoming flood_prone on a + // particular flood_reach case, reinstate it and move on. + if (isFloodProne(look, flood_reach)) { + DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, + describeClass(cr).c_str()); + look.insert(make_pair(d, cr)); + } + } + + while (!pq.empty()) { + s32 d = pq.top(); + assert(contains(look, d)); + const CharReach cr(look[d]); // copy + pq.pop(); + + if (cr.count() < LOOKAROUND_WIDE_REACH) { + continue; + } + + DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); + look.erase(d); + + // If removing this entry would result in us becoming flood_prone on a + // particular flood_reach case, reinstate it and move on. + if (isFloodProne(look, flood_reach)) { + DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, + describeClass(cr).c_str()); + look.insert(make_pair(d, cr)); + } + } + + DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str()); +} + +static +void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, + set<CharReach> &flood_reach) { + for (u32 lit_id : tbi.g[v].literals) { const ue2_literal &s = tbi.literals.at(lit_id).s; - if (s.empty()) { - continue; - } - if (is_flood(s)) { - CharReach cr(*s.begin()); - DEBUG_PRINTF("flood-prone with reach: %s\n", - describeClass(cr).c_str()); - flood_reach.insert(cr); - } - } -} - + if (s.empty()) { + continue; + } + if (is_flood(s)) { + CharReach cr(*s.begin()); + DEBUG_PRINTF("flood-prone with reach: %s\n", + describeClass(cr).c_str()); + flood_reach.insert(cr); + } + } +} + namespace { struct LookProto { @@ -470,7 +470,7 @@ struct LookProto { }; } -static +static vector<LookProto> findLiteralReach(const rose_literal_id &lit) { vector<LookProto> look; look.reserve(lit.s.length()); @@ -490,15 +490,15 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build, bool first = true; vector<LookProto> look; - for (u32 lit_id : build.g[v].literals) { + for (u32 lit_id : build.g[v].literals) { const rose_literal_id &lit = build.literals.at(lit_id); auto lit_look = findLiteralReach(lit); - + if (first) { look = std::move(lit_look); first = false; continue; - } + } // Erase elements from look with keys not in lit_look. Where a key is // in both maps, union its reach with the lookaround. @@ -523,34 +523,34 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build, ++jt; } } - } - - return look; -} - -/** - * Trim lookaround checks from the prefix that overlap with the literals - * themselves. - */ -static -void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, - map<s32, CharReach> &look) { - DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str()); - - for (const auto &m : findLiteralReach(build, v)) { + } + + return look; +} + +/** + * Trim lookaround checks from the prefix that overlap with the literals + * themselves. + */ +static +void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, + map<s32, CharReach> &look) { + DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str()); + + for (const auto &m : findLiteralReach(build, v)) { auto it = look.find(m.offset); - if (it == end(look)) { - continue; - } + if (it == end(look)) { + continue; + } if (m.reach.isSubsetOf(it->second)) { - DEBUG_PRINTF("can trim entry at %d\n", it->first); - look.erase(it); - } - } - - DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); -} - + DEBUG_PRINTF("can trim entry at %d\n", it->first); + look.erase(it); + } + } + + DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); +} + static void normaliseLeftfix(map<s32, CharReach> &look) { // We can erase entries where the reach is "all characters", except for the @@ -621,44 +621,44 @@ void transToLookaround(const vector<map<s32, CharReach>> &looks, } } -void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, - vector<LookEntry> &lookaround) { - lookaround.clear(); - - const RoseGraph &g = tbi.g; - - map<s32, CharReach> look; - findBackwardReach(g, v, look); - findForwardReach(g, v, look); - trimLiterals(tbi, v, look); - - if (look.empty()) { - return; - } - - normalise(look); - - if (look.empty()) { - return; - } - - set<CharReach> flood_reach; - findFloodReach(tbi, v, flood_reach); - reduce(look, flood_reach); - - if (look.empty()) { - return; - } - - DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); - lookaround.reserve(look.size()); - for (const auto &m : look) { - s8 offset = verify_s8(m.first); - lookaround.emplace_back(offset, m.second); - } -} - -static +void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, + vector<LookEntry> &lookaround) { + lookaround.clear(); + + const RoseGraph &g = tbi.g; + + map<s32, CharReach> look; + findBackwardReach(g, v, look); + findForwardReach(g, v, look); + trimLiterals(tbi, v, look); + + if (look.empty()) { + return; + } + + normalise(look); + + if (look.empty()) { + return; + } + + set<CharReach> flood_reach; + findFloodReach(tbi, v, flood_reach); + reduce(look, flood_reach); + + if (look.empty()) { + return; + } + + DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); + lookaround.reserve(look.size()); + for (const auto &m : look) { + s8 offset = verify_s8(m.first); + lookaround.emplace_back(offset, m.second); + } +} + +static bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks, u32 bucket_size) { set<u32> bucket; @@ -685,25 +685,25 @@ bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks, bucket.insert(hi_lo); } } - } + } DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); return bucket.size() <= bucket_size; } - + static bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, vector<map<s32, CharReach>> &looks) { if (!isAcyclic(g)) { DEBUG_PRINTF("contains back-edge\n"); - return false; - } - + return false; + } + // Must be floating chains wired to startDs. if (!isFloating(g)) { DEBUG_PRINTF("not a floating start\n"); - return false; - } - + return false; + } + vector<NFAVertex> curr; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (v == g.start || v == g.startDs) { @@ -737,16 +737,16 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, size_t curr_size = curr.size(); if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { DEBUG_PRINTF("range is larger than 16 in multi-path\n"); - return false; - } - + return false; + } + for (size_t idx = 0; idx < curr_size; idx++) { NFAVertex v = curr[idx]; if (v == g.startDs) { continue; } assert(!is_special(v, g)); - + for (auto u : inv_adjacent_vertices_range(v, g)) { if (u == g.start || u == g.startDs) { curr[idx] = g.startDs; @@ -792,88 +792,88 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, u32 bucket_size = total_len > 32 ? 8 : 16; if (!checkShuftiBuckets(looks, bucket_size)) { DEBUG_PRINTF("shufti has too many buckets\n"); - return false; - } + return false; + } } - + assert(!looks.empty()); if (looks.size() == 1) { DEBUG_PRINTF("single lookaround\n"); } else { DEBUG_PRINTF("multi-path lookaround\n"); - } - DEBUG_PRINTF("done\n"); - return true; -} - -bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, + } + DEBUG_PRINTF("done\n"); + return true; +} + +bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, vector<vector<LookEntry>> &lookaround) { - lookaround.clear(); - - const RoseGraph &g = build.g; - const left_id leftfix(g[v].left); - - if (!contains(build.transient, leftfix)) { - DEBUG_PRINTF("not transient\n"); - return false; - } - - if (!leftfix.graph()) { - DEBUG_PRINTF("only supported for graphs so far\n"); - return false; - } - + lookaround.clear(); + + const RoseGraph &g = build.g; + const left_id leftfix(g[v].left); + + if (!contains(build.transient, leftfix)) { + DEBUG_PRINTF("not transient\n"); + return false; + } + + if (!leftfix.graph()) { + DEBUG_PRINTF("only supported for graphs so far\n"); + return false; + } + vector<map<s32, CharReach>> looks; if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, g[v].left.lag, looks)) { DEBUG_PRINTF("graph has loop or too large\n"); - return false; - } - + return false; + } + if (!trimMultipathLeftfix(build, v, looks)) { - return false; - } + return false; + } transToLookaround(looks, lookaround); - + return !lookaround.empty(); -} - -void mergeLookaround(vector<LookEntry> &lookaround, - const vector<LookEntry> &more_lookaround) { - if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) { - DEBUG_PRINTF("big enough!\n"); - return; - } - - // Don't merge lookarounds at offsets we already have entries for. +} + +void mergeLookaround(vector<LookEntry> &lookaround, + const vector<LookEntry> &more_lookaround) { + if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) { + DEBUG_PRINTF("big enough!\n"); + return; + } + + // Don't merge lookarounds at offsets we already have entries for. flat_set<s8> offsets; - for (const auto &e : lookaround) { - offsets.insert(e.offset); - } - - map<s32, CharReach> more; - LookPriority cmp(more); - priority_queue<s32, vector<s32>, LookPriority> pq(cmp); - for (const auto &e : more_lookaround) { - if (!contains(offsets, e.offset)) { - more.emplace(e.offset, e.reach); - pq.push(e.offset); - } - } - - while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) { - const s32 offset = pq.top(); - pq.pop(); - const auto &cr = more.at(offset); - DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str()); - lookaround.emplace_back(verify_s8(offset), cr); - } - - // Order by offset. - sort(begin(lookaround), end(lookaround), - [](const LookEntry &a, const LookEntry &b) { - return a.offset < b.offset; - }); -} - -} // namespace ue2 + for (const auto &e : lookaround) { + offsets.insert(e.offset); + } + + map<s32, CharReach> more; + LookPriority cmp(more); + priority_queue<s32, vector<s32>, LookPriority> pq(cmp); + for (const auto &e : more_lookaround) { + if (!contains(offsets, e.offset)) { + more.emplace(e.offset, e.reach); + pq.push(e.offset); + } + } + + while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) { + const s32 offset = pq.top(); + pq.pop(); + const auto &cr = more.at(offset); + DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str()); + lookaround.emplace_back(verify_s8(offset), cr); + } + + // Order by offset. + sort(begin(lookaround), end(lookaround), + [](const LookEntry &a, const LookEntry &b) { + return a.offset < b.offset; + }); +} + +} // namespace ue2 |