aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:11 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:11 +0300
commit5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
parent1aeb9a455974457866f78722ad98114bafc84e8a (diff)
downloadydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp')
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp778
1 files changed, 389 insertions, 389 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
index 271e14fb9c..f1f829f2c1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -40,20 +40,20 @@
#include "util/bitutils.h" // for CASE_CLEAR
#include "util/charreach.h"
-#include "util/compile_context.h"
+#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
-#include "util/small_vector.h"
-#include "util/target_info.h"
+#include "util/small_vector.h"
+#include "util/target_info.h"
#include <algorithm>
#include <map>
-#include <boost/range/adaptor/map.hpp>
-
+#include <boost/range/adaptor/map.hpp>
+
using namespace std;
-using boost::adaptors::map_keys;
+using boost::adaptors::map_keys;
namespace ue2 {
@@ -72,7 +72,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
}
const CharReach &acr = g[v].char_reach;
- DEBUG_PRINTF("checking %zu\n", g[v].index);
+ DEBUG_PRINTF("checking %zu\n", g[v].index);
if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
DEBUG_PRINTF("bad reach %zu\n", acr.count());
@@ -89,7 +89,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
next_preds->insert(v);
insert(next_cands, adjacent_vertices(v, g));
- DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
+ DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
friends->insert(v);
next_cand:;
}
@@ -136,321 +136,321 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
}
static
-void findPaths(const NGHolder &g, NFAVertex v,
- const vector<CharReach> &refined_cr,
- vector<vector<CharReach>> *paths,
- const flat_set<NFAVertex> &forbidden, u32 depth) {
- static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
- if (!depth) {
- paths->push_back({});
- return;
- }
- if (v == g.accept || v == g.acceptEod) {
- paths->push_back({});
- if (!generates_callbacks(g) || v == g.acceptEod) {
- paths->back().push_back(CharReach()); /* red tape options */
- }
- return;
- }
-
- /* for the escape 'literals' we want to use the minimal cr so we
- * can be more selective */
- const CharReach &cr = refined_cr[g[v].index];
-
- if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
- || hasSelfLoop(v, g)) {
- /* give up on pushing past this point */
- paths->push_back({cr});
- return;
- }
-
- vector<vector<CharReach>> curr;
+void findPaths(const NGHolder &g, NFAVertex v,
+ const vector<CharReach> &refined_cr,
+ vector<vector<CharReach>> *paths,
+ const flat_set<NFAVertex> &forbidden, u32 depth) {
+ static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
+ if (!depth) {
+ paths->push_back({});
+ return;
+ }
+ if (v == g.accept || v == g.acceptEod) {
+ paths->push_back({});
+ if (!generates_callbacks(g) || v == g.acceptEod) {
+ paths->back().push_back(CharReach()); /* red tape options */
+ }
+ return;
+ }
+
+ /* for the escape 'literals' we want to use the minimal cr so we
+ * can be more selective */
+ const CharReach &cr = refined_cr[g[v].index];
+
+ if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
+ || hasSelfLoop(v, g)) {
+ /* give up on pushing past this point */
+ paths->push_back({cr});
+ return;
+ }
+
+ vector<vector<CharReach>> curr;
for (auto w : adjacent_vertices_range(v, g)) {
- if (contains(forbidden, w)) {
- /* path has looped back to one of the active+boring acceleration
- * states. We can ignore this path if we have sufficient back-
- * off. */
+ if (contains(forbidden, w)) {
+ /* path has looped back to one of the active+boring acceleration
+ * states. We can ignore this path if we have sufficient back-
+ * off. */
paths->push_back({cr});
continue;
}
-
- u32 new_depth = depth - 1;
- do {
- curr.clear();
- findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
- } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
-
- for (auto &c : curr) {
- c.push_back(cr);
- paths->push_back(std::move(c));
+
+ u32 new_depth = depth - 1;
+ do {
+ curr.clear();
+ findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
+ } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
+
+ for (auto &c : curr) {
+ c.push_back(cr);
+ paths->push_back(std::move(c));
}
}
}
-namespace {
-struct SAccelScheme {
- SAccelScheme(CharReach cr_in, u32 offset_in)
- : cr(std::move(cr_in)), offset(offset_in) {
- assert(offset <= MAX_ACCEL_DEPTH);
+namespace {
+struct SAccelScheme {
+ SAccelScheme(CharReach cr_in, u32 offset_in)
+ : cr(std::move(cr_in)), offset(offset_in) {
+ assert(offset <= MAX_ACCEL_DEPTH);
}
- SAccelScheme() {}
+ SAccelScheme() {}
+
+ bool operator<(const SAccelScheme &b) const {
+ const SAccelScheme &a = *this;
- bool operator<(const SAccelScheme &b) const {
- const SAccelScheme &a = *this;
-
- const size_t a_count = cr.count(), b_count = b.cr.count();
- if (a_count != b_count) {
- return a_count < b_count;
+ const size_t a_count = cr.count(), b_count = b.cr.count();
+ if (a_count != b_count) {
+ return a_count < b_count;
}
- /* TODO: give bonus if one is a 'caseless' character */
- ORDER_CHECK(offset);
- ORDER_CHECK(cr);
+ /* TODO: give bonus if one is a 'caseless' character */
+ ORDER_CHECK(offset);
+ ORDER_CHECK(cr);
return false;
}
- CharReach cr = CharReach::dot();
- u32 offset = MAX_ACCEL_DEPTH + 1;
-};
+ CharReach cr = CharReach::dot();
+ u32 offset = MAX_ACCEL_DEPTH + 1;
+};
}
-/**
- * \brief Limit on the number of (recursive) calls to findBestInternal().
- */
-static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
-
+/**
+ * \brief Limit on the number of (recursive) calls to findBestInternal().
+ */
+static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
+
static
-void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
- vector<vector<CharReach>>::const_iterator pe,
- size_t *num_calls, const SAccelScheme &curr,
- SAccelScheme *best) {
- assert(curr.offset <= MAX_ACCEL_DEPTH);
-
- if (++(*num_calls) > MAX_FINDBEST_CALLS) {
- DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
- return;
- }
-
- DEBUG_PRINTF("paths left %zu\n", pe - pb);
- if (pb == pe) {
- if (curr < *best) {
- *best = curr;
- DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
- best->cr.count(), describeClass(best->cr).c_str(),
- best->offset);
- }
- return;
- }
-
- DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
-
- small_vector<SAccelScheme, 10> priority_path;
- priority_path.reserve(pb->size());
- u32 i = 0;
- for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
- SAccelScheme as(*p | curr.cr, max(i, curr.offset));
- if (*best < as) {
- DEBUG_PRINTF("worse\n");
- continue;
- }
- priority_path.push_back(move(as));
- }
-
- sort(priority_path.begin(), priority_path.end());
- for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
- auto jt = next(it);
- for (; jt != priority_path.end(); ++jt) {
- if (!it->cr.isSubsetOf(jt->cr)) {
- break;
- }
- }
- priority_path.erase(next(it), jt);
- DEBUG_PRINTF("||%zu\n", it->cr.count());
- }
- DEBUG_PRINTF("---\n");
-
- for (const SAccelScheme &in : priority_path) {
- DEBUG_PRINTF("in: count %zu\n", in.cr.count());
- if (*best < in) {
- DEBUG_PRINTF("worse\n");
- continue;
- }
- findBestInternal(pb + 1, pe, num_calls, in, best);
-
- if (curr.cr == best->cr) {
- return; /* could only get better by offset */
- }
+void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
+ vector<vector<CharReach>>::const_iterator pe,
+ size_t *num_calls, const SAccelScheme &curr,
+ SAccelScheme *best) {
+ assert(curr.offset <= MAX_ACCEL_DEPTH);
+
+ if (++(*num_calls) > MAX_FINDBEST_CALLS) {
+ DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
+ return;
+ }
+
+ DEBUG_PRINTF("paths left %zu\n", pe - pb);
+ if (pb == pe) {
+ if (curr < *best) {
+ *best = curr;
+ DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
+ best->cr.count(), describeClass(best->cr).c_str(),
+ best->offset);
+ }
+ return;
+ }
+
+ DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
+
+ small_vector<SAccelScheme, 10> priority_path;
+ priority_path.reserve(pb->size());
+ u32 i = 0;
+ for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
+ SAccelScheme as(*p | curr.cr, max(i, curr.offset));
+ if (*best < as) {
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
+ priority_path.push_back(move(as));
+ }
+
+ sort(priority_path.begin(), priority_path.end());
+ for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
+ auto jt = next(it);
+ for (; jt != priority_path.end(); ++jt) {
+ if (!it->cr.isSubsetOf(jt->cr)) {
+ break;
+ }
+ }
+ priority_path.erase(next(it), jt);
+ DEBUG_PRINTF("||%zu\n", it->cr.count());
+ }
+ DEBUG_PRINTF("---\n");
+
+ for (const SAccelScheme &in : priority_path) {
+ DEBUG_PRINTF("in: count %zu\n", in.cr.count());
+ if (*best < in) {
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
+ findBestInternal(pb + 1, pe, num_calls, in, best);
+
+ if (curr.cr == best->cr) {
+ return; /* could only get better by offset */
+ }
}
}
static
-SAccelScheme findBest(const vector<vector<CharReach>> &paths,
- const CharReach &terminating) {
- SAccelScheme curr(terminating, 0U);
- SAccelScheme best;
- size_t num_calls = 0;
- findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
- DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
- DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
- best.cr.count(), describeClass(best.cr).c_str(), best.offset);
- return best;
-}
-
-namespace {
-struct DAccelScheme {
- DAccelScheme(CharReach cr_in, u32 offset_in)
- : double_cr(std::move(cr_in)), double_offset(offset_in) {
- assert(double_offset <= MAX_ACCEL_DEPTH);
- }
-
- bool operator<(const DAccelScheme &b) const {
- const DAccelScheme &a = *this;
-
- size_t a_dcount = a.double_cr.count();
- size_t b_dcount = b.double_cr.count();
-
- assert(!a.double_byte.empty() || a_dcount || a.double_offset);
- assert(!b.double_byte.empty() || b_dcount || b.double_offset);
-
- if (a_dcount != b_dcount) {
- return a_dcount < b_dcount;
- }
-
- if (!a_dcount) {
- bool cd_a = buildDvermMask(a.double_byte);
- bool cd_b = buildDvermMask(b.double_byte);
- if (cd_a != cd_b) {
- return cd_a > cd_b;
+SAccelScheme findBest(const vector<vector<CharReach>> &paths,
+ const CharReach &terminating) {
+ SAccelScheme curr(terminating, 0U);
+ SAccelScheme best;
+ size_t num_calls = 0;
+ findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
+ DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
+ DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
+ best.cr.count(), describeClass(best.cr).c_str(), best.offset);
+ return best;
+}
+
+namespace {
+struct DAccelScheme {
+ DAccelScheme(CharReach cr_in, u32 offset_in)
+ : double_cr(std::move(cr_in)), double_offset(offset_in) {
+ assert(double_offset <= MAX_ACCEL_DEPTH);
+ }
+
+ bool operator<(const DAccelScheme &b) const {
+ const DAccelScheme &a = *this;
+
+ size_t a_dcount = a.double_cr.count();
+ size_t b_dcount = b.double_cr.count();
+
+ assert(!a.double_byte.empty() || a_dcount || a.double_offset);
+ assert(!b.double_byte.empty() || b_dcount || b.double_offset);
+
+ if (a_dcount != b_dcount) {
+ return a_dcount < b_dcount;
+ }
+
+ if (!a_dcount) {
+ bool cd_a = buildDvermMask(a.double_byte);
+ bool cd_b = buildDvermMask(b.double_byte);
+ if (cd_a != cd_b) {
+ return cd_a > cd_b;
}
}
- ORDER_CHECK(double_byte.size());
- ORDER_CHECK(double_offset);
+ ORDER_CHECK(double_byte.size());
+ ORDER_CHECK(double_offset);
- /* TODO: give bonus if one is a 'caseless' character */
- ORDER_CHECK(double_byte);
- ORDER_CHECK(double_cr);
+ /* TODO: give bonus if one is a 'caseless' character */
+ ORDER_CHECK(double_byte);
+ ORDER_CHECK(double_cr);
- return false;
+ return false;
}
- flat_set<pair<u8, u8>> double_byte;
- CharReach double_cr;
- u32 double_offset = 0;
-};
+ flat_set<pair<u8, u8>> double_byte;
+ CharReach double_cr;
+ u32 double_offset = 0;
+};
}
static
-DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
- const CharReach &cr_2_in, u32 offset_in) {
- cr_1 &= ~as.double_cr;
- CharReach cr_2 = cr_2_in & ~as.double_cr;
- u32 offset = offset_in;
-
- if (cr_1.none()) {
- DEBUG_PRINTF("empty first element\n");
- ENSURE_AT_LEAST(&as.double_offset, offset);
- return as;
- }
-
- if (cr_2_in != cr_2 || cr_2.none()) {
- offset = offset_in + 1;
- }
-
- size_t two_count = cr_1.count() * cr_2.count();
-
- DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
-
- if (!two_count) {
- DEBUG_PRINTF("empty element\n");
- ENSURE_AT_LEAST(&as.double_offset, offset);
- return as;
- }
-
- if (two_count > DOUBLE_SHUFTI_LIMIT) {
- if (cr_2.count() < cr_1.count()) {
- as.double_cr |= cr_2;
- offset = offset_in + 1;
- } else {
- as.double_cr |= cr_1;
- }
- } else {
- for (auto i = cr_1.find_first(); i != CharReach::npos;
- i = cr_1.find_next(i)) {
- for (auto j = cr_2.find_first(); j != CharReach::npos;
- j = cr_2.find_next(j)) {
- as.double_byte.emplace(i, j);
- }
- }
- }
-
- ENSURE_AT_LEAST(&as.double_offset, offset);
- DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
- as.double_byte.size(), as.double_cr.count(), as.double_offset);
- return as;
+DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
+ const CharReach &cr_2_in, u32 offset_in) {
+ cr_1 &= ~as.double_cr;
+ CharReach cr_2 = cr_2_in & ~as.double_cr;
+ u32 offset = offset_in;
+
+ if (cr_1.none()) {
+ DEBUG_PRINTF("empty first element\n");
+ ENSURE_AT_LEAST(&as.double_offset, offset);
+ return as;
+ }
+
+ if (cr_2_in != cr_2 || cr_2.none()) {
+ offset = offset_in + 1;
+ }
+
+ size_t two_count = cr_1.count() * cr_2.count();
+
+ DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
+
+ if (!two_count) {
+ DEBUG_PRINTF("empty element\n");
+ ENSURE_AT_LEAST(&as.double_offset, offset);
+ return as;
+ }
+
+ if (two_count > DOUBLE_SHUFTI_LIMIT) {
+ if (cr_2.count() < cr_1.count()) {
+ as.double_cr |= cr_2;
+ offset = offset_in + 1;
+ } else {
+ as.double_cr |= cr_1;
+ }
+ } else {
+ for (auto i = cr_1.find_first(); i != CharReach::npos;
+ i = cr_1.find_next(i)) {
+ for (auto j = cr_2.find_first(); j != CharReach::npos;
+ j = cr_2.find_next(j)) {
+ as.double_byte.emplace(i, j);
+ }
+ }
+ }
+
+ ENSURE_AT_LEAST(&as.double_offset, offset);
+ DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
+ as.double_byte.size(), as.double_cr.count(), as.double_offset);
+ return as;
}
static
-void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
+void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
vector<vector<CharReach> >::const_iterator pe,
- const DAccelScheme &curr, DAccelScheme *best) {
- assert(curr.double_offset <= MAX_ACCEL_DEPTH);
+ const DAccelScheme &curr, DAccelScheme *best) {
+ assert(curr.double_offset <= MAX_ACCEL_DEPTH);
DEBUG_PRINTF("paths left %zu\n", pe - pb);
- DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
- curr.double_byte.size(), curr.double_cr.count(),
- curr.double_offset);
+ DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
+ curr.double_byte.size(), curr.double_cr.count(),
+ curr.double_offset);
if (pb == pe) {
- if (curr < *best) {
- *best = curr;
- DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
- best->double_byte.size(), best->double_cr.count(),
- best->double_offset);
- }
+ if (curr < *best) {
+ *best = curr;
+ DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
+ best->double_byte.size(), best->double_cr.count(),
+ best->double_offset);
+ }
return;
}
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
- small_vector<DAccelScheme, 10> priority_path;
- priority_path.reserve(pb->size());
+ small_vector<DAccelScheme, 10> priority_path;
+ priority_path.reserve(pb->size());
u32 i = 0;
- for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
+ for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
++p, i++) {
- DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
- if (*best < as) {
- DEBUG_PRINTF("worse\n");
- continue;
- }
- priority_path.push_back(move(as));
+ DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
+ if (*best < as) {
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
+ priority_path.push_back(move(as));
}
sort(priority_path.begin(), priority_path.end());
- DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
- DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
- best->double_byte.size(), best->double_cr.count(),
- best->double_offset);
-
- for (const DAccelScheme &in : priority_path) {
- DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
- in.double_byte.size(), in.double_cr.count(),
- in.double_offset);
- if (*best < in) {
+ DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
+ DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
+ best->double_byte.size(), best->double_cr.count(),
+ best->double_offset);
+
+ for (const DAccelScheme &in : priority_path) {
+ DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
+ in.double_byte.size(), in.double_cr.count(),
+ in.double_offset);
+ if (*best < in) {
DEBUG_PRINTF("worse\n");
continue;
}
- findDoubleBest(pb + 1, pe, in, best);
+ findDoubleBest(pb + 1, pe, in, best);
}
}
#ifdef DEBUG
static
-void dumpPaths(const vector<vector<CharReach>> &paths) {
- for (const auto &path : paths) {
+void dumpPaths(const vector<vector<CharReach>> &paths) {
+ for (const auto &path : paths) {
DEBUG_PRINTF("path: [");
- for (const auto &cr : path) {
+ for (const auto &cr : path) {
printf(" [");
- describeClass(stdout, cr, 20, CC_OUT_TEXT);
+ describeClass(stdout, cr, 20, CC_OUT_TEXT);
printf("]");
}
printf(" ]\n");
@@ -459,13 +459,13 @@ void dumpPaths(const vector<vector<CharReach>> &paths) {
#endif
static
-void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
+void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
/* paths segments which are a superset of an earlier segment should never be
* picked as an acceleration segment -> to improve processing just replace
* with dot */
- for (auto &p : paths) {
- for (auto it = p.begin(); it != p.end(); ++it) {
- for (auto jt = next(it); jt != p.end(); ++jt) {
+ for (auto &p : paths) {
+ for (auto it = p.begin(); it != p.end(); ++it) {
+ for (auto jt = next(it); jt != p.end(); ++jt) {
if (it->isSubsetOf(*jt)) {
*jt = CharReach::dot();
}
@@ -475,10 +475,10 @@ void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
}
static
-void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
+void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
/* try to unify paths which only differ in the last segment */
- for (vector<vector<CharReach> >::iterator p = paths.begin();
- p != paths.end() && p + 1 != paths.end();) {
+ for (vector<vector<CharReach> >::iterator p = paths.begin();
+ p != paths.end() && p + 1 != paths.end();) {
vector<CharReach> &a = *p;
vector<CharReach> &b = *(p + 1);
@@ -496,7 +496,7 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
if (i == a.size() - 1) {
/* we can unify these paths */
a[i] |= b[i];
- paths.erase(p + 1);
+ paths.erase(p + 1);
} else {
++p;
}
@@ -504,117 +504,117 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
}
static
-void improvePaths(vector<vector<CharReach> > &paths) {
+void improvePaths(vector<vector<CharReach> > &paths) {
#ifdef DEBUG
DEBUG_PRINTF("orig paths\n");
- dumpPaths(paths);
+ dumpPaths(paths);
#endif
blowoutPathsLessStrictSegment(paths);
- sort(paths.begin(), paths.end());
+ sort(paths.begin(), paths.end());
unifyPathsLastSegment(paths);
#ifdef DEBUG
DEBUG_PRINTF("opt paths\n");
- dumpPaths(paths);
+ dumpPaths(paths);
#endif
}
-#define MAX_DOUBLE_ACCEL_PATHS 10
-
-static
-DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
- const CharReach &terminating) {
- DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
- terminating.count());
- unifyPathsLastSegment(paths);
-
-#ifdef DEBUG
- DEBUG_PRINTF("paths:\n");
- dumpPaths(paths);
-#endif
-
- /* if there are too many paths, shorten the paths to reduce the number of
- * distinct paths we have to consider */
- while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
- for (auto &p : paths) {
- if (p.empty()) {
- return DAccelScheme(terminating, 0U);
- }
- p.pop_back();
- }
- unifyPathsLastSegment(paths);
- }
-
- if (paths.empty()) {
- return DAccelScheme(terminating, 0U);
- }
-
- DAccelScheme curr(terminating, 0U);
- DAccelScheme best(CharReach::dot(), 0U);
- findDoubleBest(paths.begin(), paths.end(), curr, &best);
- DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
- best.double_cr.count());
- return best;
-}
-
-#define MAX_EXPLORE_PATHS 40
-
-AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
- const CharReach &terminating,
- bool look_for_double_byte) {
- AccelScheme rv;
- if (look_for_double_byte) {
- DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
- if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
- rv.double_byte = std::move(da.double_byte);
- rv.double_cr = move(da.double_cr);
- rv.double_offset = da.double_offset;
- }
- }
-
- improvePaths(paths);
-
- DEBUG_PRINTF("we have %zu paths\n", paths.size());
- if (paths.size() > MAX_EXPLORE_PATHS) {
- return rv; /* too many paths to explore */
- }
-
- /* if we were smart we would do something netflowy on the paths to find the
- * best cut. But we aren't, so we will just brute force it.
- */
- SAccelScheme best = findBest(paths, terminating);
-
- /* find best is a bit lazy in terms of minimising the offset, see if we can
- * make it better. need to find the min max offset that we need.*/
- u32 offset = 0;
- for (const auto &path : paths) {
- u32 i = 0;
- for (const auto &cr : path) {
- if (cr.isSubsetOf(best.cr)) {
- break;
- }
- i++;
- }
- offset = MAX(offset, i);
- }
- assert(offset <= best.offset);
- best.offset = offset;
-
- rv.offset = best.offset;
- rv.cr = best.cr;
- if (rv.cr.count() < rv.double_cr.count()) {
- rv.double_byte.clear();
- }
-
- return rv;
-}
-
+#define MAX_DOUBLE_ACCEL_PATHS 10
+
+static
+DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
+ const CharReach &terminating) {
+ DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
+ terminating.count());
+ unifyPathsLastSegment(paths);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("paths:\n");
+ dumpPaths(paths);
+#endif
+
+ /* if there are too many paths, shorten the paths to reduce the number of
+ * distinct paths we have to consider */
+ while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
+ for (auto &p : paths) {
+ if (p.empty()) {
+ return DAccelScheme(terminating, 0U);
+ }
+ p.pop_back();
+ }
+ unifyPathsLastSegment(paths);
+ }
+
+ if (paths.empty()) {
+ return DAccelScheme(terminating, 0U);
+ }
+
+ DAccelScheme curr(terminating, 0U);
+ DAccelScheme best(CharReach::dot(), 0U);
+ findDoubleBest(paths.begin(), paths.end(), curr, &best);
+ DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
+ best.double_cr.count());
+ return best;
+}
+
+#define MAX_EXPLORE_PATHS 40
+
+AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
+ const CharReach &terminating,
+ bool look_for_double_byte) {
+ AccelScheme rv;
+ if (look_for_double_byte) {
+ DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
+ if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
+ rv.double_byte = std::move(da.double_byte);
+ rv.double_cr = move(da.double_cr);
+ rv.double_offset = da.double_offset;
+ }
+ }
+
+ improvePaths(paths);
+
+ DEBUG_PRINTF("we have %zu paths\n", paths.size());
+ if (paths.size() > MAX_EXPLORE_PATHS) {
+ return rv; /* too many paths to explore */
+ }
+
+ /* if we were smart we would do something netflowy on the paths to find the
+ * best cut. But we aren't, so we will just brute force it.
+ */
+ SAccelScheme best = findBest(paths, terminating);
+
+ /* find best is a bit lazy in terms of minimising the offset, see if we can
+ * make it better. need to find the min max offset that we need.*/
+ u32 offset = 0;
+ for (const auto &path : paths) {
+ u32 i = 0;
+ for (const auto &cr : path) {
+ if (cr.isSubsetOf(best.cr)) {
+ break;
+ }
+ i++;
+ }
+ offset = MAX(offset, i);
+ }
+ assert(offset <= best.offset);
+ best.offset = offset;
+
+ rv.offset = best.offset;
+ rv.cr = best.cr;
+ if (rv.cr.count() < rv.double_cr.count()) {
+ rv.double_byte.clear();
+ }
+
+ return rv;
+}
+
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
const vector<CharReach> &refined_cr,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- bool allow_wide, bool look_for_double_byte) {
+ bool allow_wide, bool look_for_double_byte) {
CharReach terminating;
for (auto v : verts) {
if (!hasSelfLoop(v, g)) {
@@ -633,15 +633,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
return AccelScheme(); /* invalid scheme */
}
- vector<vector<CharReach>> paths;
+ vector<vector<CharReach>> paths;
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
/* Note: we can not in general (TODO: ignore when possible) ignore entries
* into the bounded repeat cyclic states as that is when the magic happens
*/
- for (auto v : br_cyclic | map_keys) {
+ for (auto v : br_cyclic | map_keys) {
/* TODO: can allow if repeatMin <= 1 ? */
- ignore_vert_set.erase(v);
+ ignore_vert_set.erase(v);
}
for (auto v : verts) {
@@ -654,12 +654,12 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
}
/* paths built wrong: reverse them */
- for (auto &path : paths) {
- reverse(path.begin(), path.end());
+ for (auto &path : paths) {
+ reverse(path.begin(), path.end());
}
- return findBestAccelScheme(std::move(paths), terminating,
- look_for_double_byte);
+ return findBestAccelScheme(std::move(paths), terminating,
+ look_for_double_byte);
}
NFAVertex get_sds_or_proxy(const NGHolder &g) {
@@ -668,7 +668,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
return g.startDs;
}
- NFAVertex v = NGHolder::null_vertex();
+ NFAVertex v = NGHolder::null_vertex();
for (auto w : adjacent_vertices_range(g.start, g)) {
if (w != g.startDs) {
if (!v) {
@@ -685,7 +685,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
while (true) {
if (hasSelfLoop(v, g)) {
- DEBUG_PRINTF("woot %zu\n", g[v].index);
+ DEBUG_PRINTF("woot %zu\n", g[v].index);
return v;
}
if (out_degree(v, g) != 1) {
@@ -719,7 +719,7 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
CharReach terminating = g[v].char_reach;
terminating.flip();
- DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
+ DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
g[v].index, terminating.count(),
allow_wide ? " (w)" : "");
@@ -789,9 +789,9 @@ depth_done:
for (unsigned int i = 0; i < depth; i++) {
if (depthReach[i].none()) {
DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
- *as = AccelScheme();
- as->offset = i;
- as->cr = CharReach();
+ *as = AccelScheme();
+ as->offset = i;
+ as->cr = CharReach();
return true;
}
}
@@ -806,8 +806,8 @@ depth_done:
|| (cra.count() == 2 && crb.count() == 2
&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
- *as = AccelScheme();
- as->offset = i;
+ *as = AccelScheme();
+ as->offset = i;
return true;
}
}
@@ -817,19 +817,19 @@ depth_done:
// literals)
if (depth > 1) {
for (unsigned int i = 0; i < (depth - 1); i++) {
- if (depthReach[i].count() * depthReach[i+1].count()
- <= DOUBLE_SHUFTI_LIMIT) {
+ if (depthReach[i].count() * depthReach[i+1].count()
+ <= DOUBLE_SHUFTI_LIMIT) {
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
- *as = AccelScheme();
- as->offset = i;
+ *as = AccelScheme();
+ as->offset = i;
return true;
}
}
}
- // Look for offset accel schemes verm/shufti;
+ // Look for offset accel schemes verm/shufti;
vector<NFAVertex> verts(1, v);
- *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
+ *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
DEBUG_PRINTF("as width %zu\n", as->cr.count());
return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
}