aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfa
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.ru>2022-02-10 16:45:12 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:12 +0300
commit49116032d905455a7b1c994e4a696afc885c1e71 (patch)
treebe835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/hyperscan/src/nfa
parent4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)
downloadydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa')
-rw-r--r--contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp2
-rw-r--r--contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h16
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile.cpp6
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_compile.cpp270
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_compile.h4
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_exceptional.h146
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_internal.h10
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan.c344
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h212
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan_internal.h106
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp1142
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile.h4
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellandump.h124
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng.c2672
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng.h148
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp928
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng_compile.h4
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng_data.c26
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcsheng_internal.h62
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c10
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp120
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_build_util.h2
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_internal.h46
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng.c2412
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng.h166
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng_defs.h804
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng_impl.h250
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng_impl4.h856
-rw-r--r--contrib/libs/hyperscan/src/nfa/sheng_internal.h76
-rw-r--r--contrib/libs/hyperscan/src/nfa/shengcompile.cpp626
-rw-r--r--contrib/libs/hyperscan/src/nfa/shengcompile.h20
-rw-r--r--contrib/libs/hyperscan/src/nfa/vermicelli.h274
-rw-r--r--contrib/libs/hyperscan/src/nfa/vermicelli_sse.h994
33 files changed, 6441 insertions, 6441 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp
index 842665f1cc..ae71e141a2 100644
--- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp
+++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp
@@ -214,7 +214,7 @@ static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty() &&
info.double_cr.count() < info.double_byte.size() &&
- info.double_cr.count() <= 2;
+ info.double_cr.count() <= 2;
}
static
diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h
index cb47d38cc5..53a6f35b3d 100644
--- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h
+++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -40,11 +40,11 @@ namespace ue2 {
class ReportManager;
struct Grey;
-enum DfaType {
- McClellan,
- Sheng,
- Gough
-};
+enum DfaType {
+ McClellan,
+ Sheng,
+ Gough
+};
class accel_dfa_build_strat : public dfa_build_strat {
public:
@@ -58,8 +58,8 @@ public:
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out);
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
- virtual DfaType getType() const = 0;
-
+ virtual DfaType getType() const = 0;
+
private:
bool only_accel_init;
};
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
index cb7f0eafc9..d41c6f4235 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -91,7 +91,7 @@ public:
void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) override;
u32 max_allowed_offset_accel() const override { return 0; }
- DfaType getType() const override { return Gough; }
+ DfaType getType() const override { return Gough; }
raw_som_dfa &rdfa;
const GoughGraph &gg;
@@ -375,7 +375,7 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
}
u16 top_sym = raw.alpha_remap[TOP];
- DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str());
+ DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str());
/* create edges, JOIN variables (on edge targets) */
map<dstate_id_t, GoughEdge> seen;
diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
index bad7434d79..9233ae515e 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -85,18 +85,18 @@ namespace ue2 {
*/
static constexpr u32 NO_STATE = ~0;
-/* Maximum number of states taken as a small NFA */
-static constexpr u32 MAX_SMALL_NFA_STATES = 64;
-
-/* Maximum bounded repeat upper bound to consider as a fast NFA */
-static constexpr u64a MAX_REPEAT_SIZE = 200;
-
-/* Maximum bounded repeat char reach size to consider as a fast NFA */
-static constexpr u32 MAX_REPEAT_CHAR_REACH = 26;
-
-/* Minimum bounded repeat trigger distance to consider as a fast NFA */
-static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6;
-
+/* Maximum number of states taken as a small NFA */
+static constexpr u32 MAX_SMALL_NFA_STATES = 64;
+
+/* Maximum bounded repeat upper bound to consider as a fast NFA */
+static constexpr u64a MAX_REPEAT_SIZE = 200;
+
+/* Maximum bounded repeat char reach size to consider as a fast NFA */
+static constexpr u32 MAX_REPEAT_CHAR_REACH = 26;
+
+/* Minimum bounded repeat trigger distance to consider as a fast NFA */
+static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6;
+
namespace {
struct precalcAccel {
@@ -992,7 +992,7 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v,
// see if we've already seen it, otherwise add a new one.
auto it = find(squash.begin(), squash.end(), sit->second);
if (it != squash.end()) {
- return verify_u32(std::distance(squash.begin(), it));
+ return verify_u32(std::distance(squash.begin(), it));
}
u32 idx = verify_u32(squash.size());
squash.push_back(sit->second);
@@ -1019,7 +1019,7 @@ u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
auto it = search(begin(reports), end(reports), begin(my_reports),
end(my_reports));
if (it != end(reports)) {
- u32 offset = verify_u32(std::distance(begin(reports), it));
+ u32 offset = verify_u32(std::distance(begin(reports), it));
DEBUG_PRINTF("reusing found report list at %u\n", offset);
return offset;
}
@@ -1922,8 +1922,8 @@ struct Factory {
}
static
- void writeExceptions(const build_info &args,
- const map<ExceptionProto, vector<u32>> &exceptionMap,
+ void writeExceptions(const build_info &args,
+ const map<ExceptionProto, vector<u32>> &exceptionMap,
const vector<u32> &repeatOffsets, implNFA_t *limex,
const u32 exceptionsOffset,
const u32 reportListOffset) {
@@ -1975,59 +1975,59 @@ struct Factory {
limex->exceptionOffset = exceptionsOffset;
limex->exceptionCount = ecount;
-
- if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
- const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
- u8 *shufMask = (u8 *)&limex->exceptionShufMask;
- u8 *bitMask = (u8 *)&limex->exceptionBitMask;
- u8 *andMask = (u8 *)&limex->exceptionAndMask;
-
- u32 tot_cnt = 0;
- u32 pos = 0;
- bool valid = true;
- size_t tot = sizeof(limex->exceptionMask);
- size_t base = 0;
-
- // We normally have up to 64 exceptions to handle,
- // but treat 384 state Limex differently to simplify operations
- size_t limit = 64;
- if (args.num_states > 256 && args.num_states <= 384) {
- limit = 48;
- }
-
- for (size_t i = 0; i < tot; i++) {
- if (!exceptionMask[i]) {
- continue;
- }
- u32 bit_cnt = popcount32(exceptionMask[i]);
-
- tot_cnt += bit_cnt;
- if (tot_cnt > limit) {
- valid = false;
- break;
- }
-
- u32 emsk = exceptionMask[i];
- while (emsk) {
- u32 t = findAndClearLSB_32(&emsk);
- bitMask[pos] = 1U << t;
- andMask[pos] = 1U << t;
- shufMask[pos++] = i + base;
-
- if (pos == 32 &&
- (args.num_states > 128 && args.num_states <= 256)) {
- base += 32;
- }
- }
- }
- // Avoid matching unused bytes
- for (u32 i = pos; i < 64; i++) {
- bitMask[i] = 0xff;
- }
- if (valid) {
- setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
- }
- }
+
+ if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
+ const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
+ u8 *shufMask = (u8 *)&limex->exceptionShufMask;
+ u8 *bitMask = (u8 *)&limex->exceptionBitMask;
+ u8 *andMask = (u8 *)&limex->exceptionAndMask;
+
+ u32 tot_cnt = 0;
+ u32 pos = 0;
+ bool valid = true;
+ size_t tot = sizeof(limex->exceptionMask);
+ size_t base = 0;
+
+ // We normally have up to 64 exceptions to handle,
+ // but treat 384 state Limex differently to simplify operations
+ size_t limit = 64;
+ if (args.num_states > 256 && args.num_states <= 384) {
+ limit = 48;
+ }
+
+ for (size_t i = 0; i < tot; i++) {
+ if (!exceptionMask[i]) {
+ continue;
+ }
+ u32 bit_cnt = popcount32(exceptionMask[i]);
+
+ tot_cnt += bit_cnt;
+ if (tot_cnt > limit) {
+ valid = false;
+ break;
+ }
+
+ u32 emsk = exceptionMask[i];
+ while (emsk) {
+ u32 t = findAndClearLSB_32(&emsk);
+ bitMask[pos] = 1U << t;
+ andMask[pos] = 1U << t;
+ shufMask[pos++] = i + base;
+
+ if (pos == 32 &&
+ (args.num_states > 128 && args.num_states <= 256)) {
+ base += 32;
+ }
+ }
+ }
+ // Avoid matching unused bytes
+ for (u32 i = pos; i < 64; i++) {
+ bitMask[i] = 0xff;
+ }
+ if (valid) {
+ setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
+ }
+ }
}
static
@@ -2353,7 +2353,7 @@ struct Factory {
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
repeatsOffset);
- writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
+ writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
reportListOffset);
writeLimexMasks(args, limex);
@@ -2489,68 +2489,68 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
#endif // NDEBUG
static
-bool isFast(const build_info &args) {
- const NGHolder &h = args.h;
- const u32 num_states = args.num_states;
-
- if (num_states > MAX_SMALL_NFA_STATES) {
- return false;
- }
-
- unordered_map<NFAVertex, bool> pos_trigger;
- for (u32 i = 0; i < args.repeats.size(); i++) {
- const BoundedRepeatData &br = args.repeats[i];
- assert(!contains(pos_trigger, br.pos_trigger));
- pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE;
- }
-
- // Small NFA without bounded repeat should be fast.
- if (pos_trigger.empty()) {
- return true;
- }
-
- vector<NFAVertex> cur;
- unordered_set<NFAVertex> visited;
- for (const auto &m : args.tops) {
- for (NFAVertex v : m.second) {
- cur.push_back(v);
- visited.insert(v);
- }
- }
-
- u8 pos_dist = 0;
- while (!cur.empty()) {
- vector<NFAVertex> next;
- for (const auto &v : cur) {
- if (contains(pos_trigger, v)) {
- const CharReach &cr = h[v].char_reach;
- if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) {
- return false;
- }
- }
- for (const auto &w : adjacent_vertices_range(v, h)) {
- if (w == v) {
- continue;
- }
- u32 j = args.state_ids.at(w);
- if (j == NO_STATE) {
- continue;
- }
- if (!contains(visited, w)) {
- next.push_back(w);
- visited.insert(w);
- }
- }
- }
- if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) {
- break;
- }
- swap(cur, next);
- }
- return true;
-}
-
-static
+bool isFast(const build_info &args) {
+ const NGHolder &h = args.h;
+ const u32 num_states = args.num_states;
+
+ if (num_states > MAX_SMALL_NFA_STATES) {
+ return false;
+ }
+
+ unordered_map<NFAVertex, bool> pos_trigger;
+ for (u32 i = 0; i < args.repeats.size(); i++) {
+ const BoundedRepeatData &br = args.repeats[i];
+ assert(!contains(pos_trigger, br.pos_trigger));
+ pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE;
+ }
+
+ // Small NFA without bounded repeat should be fast.
+ if (pos_trigger.empty()) {
+ return true;
+ }
+
+ vector<NFAVertex> cur;
+ unordered_set<NFAVertex> visited;
+ for (const auto &m : args.tops) {
+ for (NFAVertex v : m.second) {
+ cur.push_back(v);
+ visited.insert(v);
+ }
+ }
+
+ u8 pos_dist = 0;
+ while (!cur.empty()) {
+ vector<NFAVertex> next;
+ for (const auto &v : cur) {
+ if (contains(pos_trigger, v)) {
+ const CharReach &cr = h[v].char_reach;
+ if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) {
+ return false;
+ }
+ }
+ for (const auto &w : adjacent_vertices_range(v, h)) {
+ if (w == v) {
+ continue;
+ }
+ u32 j = args.state_ids.at(w);
+ if (j == NO_STATE) {
+ continue;
+ }
+ if (!contains(visited, w)) {
+ next.push_back(w);
+ visited.insert(w);
+ }
+ }
+ }
+ if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) {
+ break;
+ }
+ swap(cur, next);
+ }
+ return true;
+}
+
+static
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
u32 rv = 0;
for (const auto &m : state_ids) {
@@ -2570,7 +2570,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, bool do_accel,
- bool stateCompression, bool &fast, u32 hint,
+ bool stateCompression, bool &fast, u32 hint,
const CompileContext &cc) {
const u32 num_states = max_state(states) + 1;
DEBUG_PRINTF("total states: %u\n", num_states);
@@ -2625,7 +2625,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
if (nfa) {
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name(limex_model));
- fast = isFast(arg);
+ fast = isFast(arg);
return nfa;
}
}
diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.h b/contrib/libs/hyperscan/src/nfa/limex_compile.h
index 2562727d68..4afdcdb3e4 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_compile.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_compile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -78,7 +78,7 @@ bytecode_ptr<NFA> generate(NGHolder &g,
const std::set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
- bool &fast,
+ bool &fast,
u32 hint,
const CompileContext &cc);
diff --git a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
index 65bc9d97cd..6c7335f1b9 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -47,8 +47,8 @@
#define AND_STATE JOIN(and_, STATE_T)
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
#define OR_STATE JOIN(or_, STATE_T)
-#define EXPAND_STATE JOIN(expand_, STATE_T)
-#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
+#define EXPAND_STATE JOIN(expand_, STATE_T)
+#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
#define CONTEXT_T JOIN(NFAContext, SIZE)
@@ -210,7 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
/** \brief Process all of the exceptions associated with the states in the \a
* estate. */
static really_inline
-int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
+int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
assert(diffmask > 0); // guaranteed by caller macro
@@ -235,72 +235,72 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
ctx->local_succ = ZERO_STATE;
#endif
- struct proto_cache new_cache = {0, NULL};
- enum CacheResult cacheable = CACHE_RESULT;
-
-#if defined(HAVE_AVX512VBMI) && SIZE > 64
- if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) {
- m512 emask = EXPAND_STATE(*STATE_ARG_P);
- emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask);
- emask = and512(emask, load_m512(&limex->exceptionAndMask));
- u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask));
-
- do {
- u32 bit = FIND_AND_CLEAR_FN(&word);
- const EXCEPTION_T *e = &exceptions[bit];
-
- if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
-#ifndef BIG_MODEL
- &local_succ,
-#endif
- limex, offset, ctx, &new_cache, &cacheable,
- in_rev, flags)) {
- return PE_RV_HALT;
- }
- } while (word);
- } else {
- // A copy of the estate as an array of GPR-sized chunks.
- CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
- CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
-#ifdef ESTATE_ON_STACK
- memcpy(chunks, &estate, sizeof(STATE_T));
-#else
- memcpy(chunks, estatep, sizeof(STATE_T));
-#endif
- memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
-
- u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
- base_index[0] = 0;
- for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
- base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
- }
-
- do {
- u32 t = findAndClearLSB_32(&diffmask);
-#ifdef ARCH_64_BIT
- t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
-#endif
- assert(t < ARRAY_LENGTH(chunks));
- CHUNK_T word = chunks[t];
- assert(word != 0);
- do {
- u32 bit = FIND_AND_CLEAR_FN(&word);
- u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
- u32 idx = local_index + base_index[t];
- const EXCEPTION_T *e = &exceptions[idx];
-
- if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
-#ifndef BIG_MODEL
- &local_succ,
-#endif
- limex, offset, ctx, &new_cache, &cacheable,
- in_rev, flags)) {
- return PE_RV_HALT;
- }
- } while (word);
- } while (diffmask);
- }
-#else
+ struct proto_cache new_cache = {0, NULL};
+ enum CacheResult cacheable = CACHE_RESULT;
+
+#if defined(HAVE_AVX512VBMI) && SIZE > 64
+ if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) {
+ m512 emask = EXPAND_STATE(*STATE_ARG_P);
+ emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask);
+ emask = and512(emask, load_m512(&limex->exceptionAndMask));
+ u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask));
+
+ do {
+ u32 bit = FIND_AND_CLEAR_FN(&word);
+ const EXCEPTION_T *e = &exceptions[bit];
+
+ if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
+#ifndef BIG_MODEL
+ &local_succ,
+#endif
+ limex, offset, ctx, &new_cache, &cacheable,
+ in_rev, flags)) {
+ return PE_RV_HALT;
+ }
+ } while (word);
+ } else {
+ // A copy of the estate as an array of GPR-sized chunks.
+ CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
+ CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
+#ifdef ESTATE_ON_STACK
+ memcpy(chunks, &estate, sizeof(STATE_T));
+#else
+ memcpy(chunks, estatep, sizeof(STATE_T));
+#endif
+ memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
+
+ u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
+ base_index[0] = 0;
+ for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
+ base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
+ }
+
+ do {
+ u32 t = findAndClearLSB_32(&diffmask);
+#ifdef ARCH_64_BIT
+ t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
+#endif
+ assert(t < ARRAY_LENGTH(chunks));
+ CHUNK_T word = chunks[t];
+ assert(word != 0);
+ do {
+ u32 bit = FIND_AND_CLEAR_FN(&word);
+ u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
+ u32 idx = local_index + base_index[t];
+ const EXCEPTION_T *e = &exceptions[idx];
+
+ if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
+#ifndef BIG_MODEL
+ &local_succ,
+#endif
+ limex, offset, ctx, &new_cache, &cacheable,
+ in_rev, flags)) {
+ return PE_RV_HALT;
+ }
+ } while (word);
+ } while (diffmask);
+ }
+#else
// A copy of the estate as an array of GPR-sized chunks.
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
@@ -341,7 +341,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
}
} while (word);
} while (diffmask);
-#endif
+#endif
#ifndef BIG_MODEL
*succ = OR_STATE(*succ, local_succ);
@@ -373,8 +373,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
#undef AND_STATE
#undef EQ_STATE
#undef OR_STATE
-#undef EXPAND_STATE
-#undef SHUFFLE_BYTE_STATE
+#undef EXPAND_STATE
+#undef SHUFFLE_BYTE_STATE
#undef TESTBIT_STATE
#undef PE_FN
#undef RUN_EXCEPTION_FN
diff --git a/contrib/libs/hyperscan/src/nfa/limex_internal.h b/contrib/libs/hyperscan/src/nfa/limex_internal.h
index 59795a65b7..23b1bd9707 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -86,7 +86,7 @@
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
-#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
+#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
enum LimExTrigger {
LIMEX_TRIGGER_NONE = 0,
@@ -158,9 +158,9 @@ struct LimExNFA##size { \
u_##size shift[MAX_SHIFT_COUNT]; \
u32 shiftCount; /**< number of shift masks used */ \
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
- m512 exceptionShufMask; /**< exception byte shuffle mask */ \
- m512 exceptionBitMask; /**< exception bit mask */ \
- m512 exceptionAndMask; /**< exception and mask */ \
+ m512 exceptionShufMask; /**< exception byte shuffle mask */ \
+ m512 exceptionBitMask; /**< exception bit mask */ \
+ m512 exceptionAndMask; /**< exception and mask */ \
};
CREATE_NFA_LIMEX(32)
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.c b/contrib/libs/hyperscan/src/nfa/mcclellan.c
index fe21700632..71f71e3275 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan.c
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -167,68 +167,68 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
}
static really_inline
-u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout,
- const u8 *end, u32 s, char *qstate, u16 *offset,
- char do_accel, enum MatchMode mode) {
- const u8 *c = *c_inout;
-
- u32 wide_limit = m->wide_limit;
- const char *wide_base
- = (const char *)m - sizeof(struct NFA) + m->wide_offset;
-
- const u16 *succ_table
- = (const u16 *)((const char *)m + sizeof(struct mcclellan));
- assert(ISALIGNED_N(succ_table, 2));
- u32 sherman_base = m->sherman_limit;
- const char *sherman_base_offset
- = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
- u32 as = m->alphaShift;
-
- s &= STATE_MASK;
-
- while (c < end && s) {
- u8 cprime = m->remap[*c];
- DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c,
- ourisprint(*c) ? *c : '?', cprime, s, c);
-
- if (unlikely(s >= wide_limit)) {
- const char *wide_entry
- = findWideEntry16(m, wide_base, wide_limit, s);
- DEBUG_PRINTF("doing wide head (%u)\n", s);
- s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate,
- offset);
- } else if (s >= sherman_base) {
- const char *sherman_state
- = findShermanState(m, sherman_base_offset, sherman_base, s);
- DEBUG_PRINTF("doing sherman (%u)\n", s);
- s = doSherman16(sherman_state, cprime, succ_table, as);
- } else {
- DEBUG_PRINTF("doing normal\n");
- s = succ_table[(s << as) + cprime];
- }
-
- DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
- c++;
-
- if (do_accel && (s & ACCEL_FLAG)) {
- break;
- }
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- break;
- }
-
- s &= STATE_MASK;
- }
-
- *c_inout = c;
- return s;
-}
-
-static really_inline
-char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate,
- const u8 *buf, size_t len, u64a offAdj, NfaCallback cb,
- void *ctxt, char single, const u8 **c_final,
- enum MatchMode mode) {
+u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout,
+ const u8 *end, u32 s, char *qstate, u16 *offset,
+ char do_accel, enum MatchMode mode) {
+ const u8 *c = *c_inout;
+
+ u32 wide_limit = m->wide_limit;
+ const char *wide_base
+ = (const char *)m - sizeof(struct NFA) + m->wide_offset;
+
+ const u16 *succ_table
+ = (const u16 *)((const char *)m + sizeof(struct mcclellan));
+ assert(ISALIGNED_N(succ_table, 2));
+ u32 sherman_base = m->sherman_limit;
+ const char *sherman_base_offset
+ = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
+ u32 as = m->alphaShift;
+
+ s &= STATE_MASK;
+
+ while (c < end && s) {
+ u8 cprime = m->remap[*c];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c,
+ ourisprint(*c) ? *c : '?', cprime, s, c);
+
+ if (unlikely(s >= wide_limit)) {
+ const char *wide_entry
+ = findWideEntry16(m, wide_base, wide_limit, s);
+ DEBUG_PRINTF("doing wide head (%u)\n", s);
+ s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate,
+ offset);
+ } else if (s >= sherman_base) {
+ const char *sherman_state
+ = findShermanState(m, sherman_base_offset, sherman_base, s);
+ DEBUG_PRINTF("doing sherman (%u)\n", s);
+ s = doSherman16(sherman_state, cprime, succ_table, as);
+ } else {
+ DEBUG_PRINTF("doing normal\n");
+ s = succ_table[(s << as) + cprime];
+ }
+
+ DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
+ c++;
+
+ if (do_accel && (s & ACCEL_FLAG)) {
+ break;
+ }
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ break;
+ }
+
+ s &= STATE_MASK;
+ }
+
+ *c_inout = c;
+ return s;
+}
+
+static really_inline
+char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate,
+ const u8 *buf, size_t len, u64a offAdj, NfaCallback cb,
+ void *ctxt, char single, const u8 **c_final,
+ enum MatchMode mode) {
assert(ISALIGNED_N(state, 2));
if (!len) {
if (mode == STOP_AT_MATCH) {
@@ -238,7 +238,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate,
}
u32 s = *state;
- u16 offset = 0;
+ u16 offset = 0;
const u8 *c = buf;
const u8 *c_end = buf + len;
const struct mstate_aux *aux
@@ -267,12 +267,12 @@ without_accel:
goto exit;
}
- if (unlikely(m->has_wide)) {
- s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0,
- mode);
- } else {
- s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
- }
+ if (unlikely(m->has_wide)) {
+ s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0,
+ mode);
+ } else {
+ s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
+ }
if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
if (mode == STOP_AT_MATCH) {
@@ -324,11 +324,11 @@ with_accel:
}
}
- if (unlikely(m->has_wide)) {
- s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode);
- } else {
- s = doNormal16(m, &c, c_end, s, 1, mode);
- }
+ if (unlikely(m->has_wide)) {
+ s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode);
+ } else {
+ s = doNormal16(m, &c, c_end, s, 1, mode);
+ }
if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
if (mode == STOP_AT_MATCH) {
@@ -366,47 +366,47 @@ exit:
}
static never_inline
-char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate,
- const u8 *buf, size_t len, u64a offAdj,
- NfaCallback cb, void *ctxt, char single,
- const u8 **final_point) {
- return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
- single, final_point, CALLBACK_OUTPUT);
+char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate,
+ const u8 *buf, size_t len, u64a offAdj,
+ NfaCallback cb, void *ctxt, char single,
+ const u8 **final_point) {
+ return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
+ single, final_point, CALLBACK_OUTPUT);
}
static never_inline
-char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate,
- const u8 *buf, size_t len, u64a offAdj,
- NfaCallback cb, void *ctxt, char single,
- const u8 **final_point) {
- return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
- single, final_point, STOP_AT_MATCH);
+char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate,
+ const u8 *buf, size_t len, u64a offAdj,
+ NfaCallback cb, void *ctxt, char single,
+ const u8 **final_point) {
+ return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
+ single, final_point, STOP_AT_MATCH);
}
static never_inline
-char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate,
- const u8 *buf, size_t len, u64a offAdj,
- NfaCallback cb, void *ctxt, char single,
- const u8 **final_point) {
- return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
- single, final_point, NO_MATCHES);
+char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate,
+ const u8 *buf, size_t len, u64a offAdj,
+ NfaCallback cb, void *ctxt, char single,
+ const u8 **final_point) {
+ return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
+ single, final_point, NO_MATCHES);
}
static really_inline
-char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate,
- const u8 *buf, size_t len, u64a offAdj,
- NfaCallback cb, void *ctxt, char single,
- const u8 **final_point, enum MatchMode mode) {
+char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate,
+ const u8 *buf, size_t len, u64a offAdj,
+ NfaCallback cb, void *ctxt, char single,
+ const u8 **final_point, enum MatchMode mode) {
if (mode == CALLBACK_OUTPUT) {
- return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb,
- ctxt, single, final_point);
+ return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb,
+ ctxt, single, final_point);
} else if (mode == STOP_AT_MATCH) {
- return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb,
- ctxt, single, final_point);
+ return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb,
+ ctxt, single, final_point);
} else {
assert(mode == NO_MATCHES);
- return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb,
- ctxt, single, final_point);
+ return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb,
+ ctxt, single, final_point);
}
}
@@ -612,10 +612,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset,
const struct mcclellan *m = getImplNfa(nfa);
const struct mstate_aux *aux = get_aux(m, s);
- if (m->has_wide == 1 && s >= m->wide_limit) {
- return MO_CONTINUE_MATCHING;
- }
-
+ if (m->has_wide == 1 && s >= m->wide_limit) {
+ return MO_CONTINUE_MATCHING;
+ }
+
if (!aux->accept_eod) {
return MO_CONTINUE_MATCHING;
}
@@ -688,9 +688,9 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
/* do main buffer region */
const u8 *final_look;
- char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp,
- local_ep - sp, offset + sp, cb, context,
- single, &final_look, mode);
+ char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp,
+ local_ep - sp, offset + sp, cb, context,
+ single, &final_look, mode);
if (rv == MO_DEAD) {
*(u16 *)q->state = 0;
return MO_DEAD;
@@ -760,16 +760,16 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
const struct mcclellan *m = getImplNfa(n);
u32 s = m->start_anchored;
- if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context,
- single, NULL, CALLBACK_OUTPUT)
+ if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context,
+ single, NULL, CALLBACK_OUTPUT)
== MO_DEAD) {
return s ? MO_ALIVE : MO_DEAD;
}
- if (m->has_wide == 1 && s >= m->wide_limit) {
- return MO_ALIVE;
- }
-
+ if (m->has_wide == 1 && s >= m->wide_limit) {
+ return MO_ALIVE;
+ }
+
const struct mstate_aux *aux = get_aux(m, s);
if (aux->accept_eod) {
@@ -848,7 +848,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
-
+
if (rv == MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
return MO_DEAD;
@@ -1097,8 +1097,8 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s);
- return (m->has_wide == 1 && s >= m->wide_limit) ?
- 0 : mcclellanHasAccept(m, get_aux(m, s), report);
+ return (m->has_wide == 1 && s >= m->wide_limit) ?
+ 0 : mcclellanHasAccept(m, get_aux(m, s), report);
}
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
@@ -1108,8 +1108,8 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s);
- return (m->has_wide == 1 && s >= m->wide_limit) ?
- 0 : !!get_aux(m, s)->accept;
+ return (m->has_wide == 1 && s >= m->wide_limit) ?
+ 0 : !!get_aux(m, s)->accept;
}
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
@@ -1194,12 +1194,12 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) {
const struct mcclellan *m = getImplNfa(nfa);
u16 s = offset ? m->start_floating : m->start_anchored;
-
- // new byte
- if (m->has_wide) {
- unaligned_store_u16((u16 *)state + 1, 0);
- }
-
+
+ // new byte
+ if (m->has_wide) {
+ unaligned_store_u16((u16 *)state + 1, 0);
+ }
+
if (s) {
unaligned_store_u16(state, s);
return 1;
@@ -1229,24 +1229,24 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
const u8 *buf, char top, size_t start_off,
size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
- u32 s;
-
- if (top) {
- s = m->start_anchored;
-
- // new byte
- if (m->has_wide) {
- unaligned_store_u16((u16 *)state + 1, 0);
- }
- } else {
- s = unaligned_load_u16(state);
- }
-
+ u32 s;
+
+ if (top) {
+ s = m->start_anchored;
+
+ // new byte
+ if (m->has_wide) {
+ unaligned_store_u16((u16 *)state + 1, 0);
+ }
+ } else {
+ s = unaligned_load_u16(state);
+ }
+
if (m->flags & MCCLELLAN_FLAG_SINGLE) {
- mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
+ mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
} else {
- mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
+ mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
}
@@ -1277,16 +1277,16 @@ char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa,
char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa,
struct mq *q) {
- const struct mcclellan *m = getImplNfa(nfa);
- assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
- : nfa->scratchStateSize == 2);
+ const struct mcclellan *m = getImplNfa(nfa);
+ assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
+ : nfa->scratchStateSize == 2);
assert(ISALIGNED_N(q->state, 2));
*(u16 *)q->state = 0;
-
- // new byte
- if (m->has_wide) {
- unaligned_store_u16((u16 *)q->state + 1, 0);
- }
+
+ // new byte
+ if (m->has_wide) {
+ unaligned_store_u16((u16 *)q->state + 1, 0);
+ }
return 0;
}
@@ -1312,39 +1312,39 @@ char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest,
char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa,
const struct mq *q,
UNUSED s64a loc) {
- const struct mcclellan *m = getImplNfa(nfa);
+ const struct mcclellan *m = getImplNfa(nfa);
void *dest = q->streamState;
const void *src = q->state;
- assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
- : nfa->scratchStateSize == 2);
- assert(m->has_wide == 1 ? nfa->streamStateSize == 4
- : nfa->streamStateSize == 2);
-
+ assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
+ : nfa->scratchStateSize == 2);
+ assert(m->has_wide == 1 ? nfa->streamStateSize == 4
+ : nfa->streamStateSize == 2);
+
assert(ISALIGNED_N(src, 2));
unaligned_store_u16(dest, *(const u16 *)(src));
-
- // new byte
- if (m->has_wide) {
- unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1));
- }
+
+ // new byte
+ if (m->has_wide) {
+ unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1));
+ }
return 0;
}
char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest,
const void *src, UNUSED u64a offset,
UNUSED u8 key) {
- const struct mcclellan *m = getImplNfa(nfa);
- assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
- : nfa->scratchStateSize == 2);
- assert(m->has_wide == 1 ? nfa->streamStateSize == 4
- : nfa->streamStateSize == 2);
-
+ const struct mcclellan *m = getImplNfa(nfa);
+ assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
+ : nfa->scratchStateSize == 2);
+ assert(m->has_wide == 1 ? nfa->streamStateSize == 4
+ : nfa->streamStateSize == 2);
+
assert(ISALIGNED_N(dest, 2));
*(u16 *)dest = unaligned_load_u16(src);
-
- // new byte
- if (m->has_wide) {
- *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1);
- }
+
+ // new byte
+ if (m->has_wide) {
+ *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1);
+ }
return 0;
}
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
index 431b554693..7b0e7f48cd 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -82,108 +82,108 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
return succ_table[(daddy << as) + cprime];
}
-
-static really_inline
-u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end,
- const u8 *remap, const u16 *s, char *qstate, u16 *offset) {
- // Internal relative offset after the last visit of the wide state.
- if (qstate != NULL) { // stream mode
- *offset = unaligned_load_u16((const u16 *)(qstate + 2));
- }
-
- u8 successful = 0;
- const u8 *c = *c_inout;
- u32 len_c = end - c;
-
- u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET);
- assert(width >= 8);
- const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16);
- const u16 *trans = (const u16 *)(wide_entry +
- WIDE_TRANSITION_OFFSET16(width));
-
- assert(*offset < width);
- u16 len_w = width - *offset;
- const u8 *sym = symbols + *offset;
-
- char tmp[16];
- u16 pos = 0;
-
- if (*offset == 0 && remap[*c] != *sym) {
- goto normal;
- }
-
- // both in (16, +oo).
- while (len_w >= 16 && len_c >= 16) {
- m128 str_w = loadu128(sym);
- for (size_t i = 0; i < 16; i++) {
- tmp[i] = remap[*(c + i)];
- }
- m128 str_c = loadu128(tmp);
-
- u32 z = movemask128(eq128(str_w, str_c));
- pos = ctz32(~z);
- assert(pos <= 16);
-
- if (pos < 16) {
- goto normal;
- }
-
- sym += 16;
- c += 16;
- len_w -= 16;
- len_c -= 16;
- }
-
- pos = 0;
- // at least one in (0, 16).
- u32 loadLength_w = MIN(len_w, 16);
- u32 loadLength_c = MIN(len_c, 16);
- m128 str_w = loadbytes128(sym, loadLength_w);
- for (size_t i = 0; i < loadLength_c; i++) {
- tmp[i] = remap[*(c + i)];
- }
- m128 str_c = loadbytes128(tmp, loadLength_c);
-
- u32 z = movemask128(eq128(str_w, str_c));
- pos = ctz32(~z);
-
- pos = MIN(pos, MIN(loadLength_w, loadLength_c));
-
- if (loadLength_w <= loadLength_c) {
- assert(pos <= loadLength_w);
- // successful matching.
- if (pos == loadLength_w) {
- c -= 1;
- successful = 1;
- }
- // failure, do nothing.
- } else {
- assert(pos <= loadLength_c);
- // successful partial matching.
- if (pos == loadLength_c) {
- c -= 1;
- goto partial;
- }
- // failure, do nothing.
- }
-
-normal:
- *offset = 0;
- if (qstate != NULL) {
- // Internal relative offset.
- unaligned_store_u16(qstate + 2, *offset);
- }
- c += pos;
- *c_inout = c;
- return successful ? *trans : *(trans + 1 + remap[*c]);
-
-partial:
- *offset = sym - symbols + pos;
- if (qstate != NULL) {
- // Internal relative offset.
- unaligned_store_u16(qstate + 2, *offset);
- }
- c += pos;
- *c_inout = c;
- return *s;
-}
+
+static really_inline
+u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end,
+ const u8 *remap, const u16 *s, char *qstate, u16 *offset) {
+ // Internal relative offset after the last visit of the wide state.
+ if (qstate != NULL) { // stream mode
+ *offset = unaligned_load_u16((const u16 *)(qstate + 2));
+ }
+
+ u8 successful = 0;
+ const u8 *c = *c_inout;
+ u32 len_c = end - c;
+
+ u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET);
+ assert(width >= 8);
+ const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16);
+ const u16 *trans = (const u16 *)(wide_entry +
+ WIDE_TRANSITION_OFFSET16(width));
+
+ assert(*offset < width);
+ u16 len_w = width - *offset;
+ const u8 *sym = symbols + *offset;
+
+ char tmp[16];
+ u16 pos = 0;
+
+ if (*offset == 0 && remap[*c] != *sym) {
+ goto normal;
+ }
+
+ // both in (16, +oo).
+ while (len_w >= 16 && len_c >= 16) {
+ m128 str_w = loadu128(sym);
+ for (size_t i = 0; i < 16; i++) {
+ tmp[i] = remap[*(c + i)];
+ }
+ m128 str_c = loadu128(tmp);
+
+ u32 z = movemask128(eq128(str_w, str_c));
+ pos = ctz32(~z);
+ assert(pos <= 16);
+
+ if (pos < 16) {
+ goto normal;
+ }
+
+ sym += 16;
+ c += 16;
+ len_w -= 16;
+ len_c -= 16;
+ }
+
+ pos = 0;
+ // at least one in (0, 16).
+ u32 loadLength_w = MIN(len_w, 16);
+ u32 loadLength_c = MIN(len_c, 16);
+ m128 str_w = loadbytes128(sym, loadLength_w);
+ for (size_t i = 0; i < loadLength_c; i++) {
+ tmp[i] = remap[*(c + i)];
+ }
+ m128 str_c = loadbytes128(tmp, loadLength_c);
+
+ u32 z = movemask128(eq128(str_w, str_c));
+ pos = ctz32(~z);
+
+ pos = MIN(pos, MIN(loadLength_w, loadLength_c));
+
+ if (loadLength_w <= loadLength_c) {
+ assert(pos <= loadLength_w);
+ // successful matching.
+ if (pos == loadLength_w) {
+ c -= 1;
+ successful = 1;
+ }
+ // failure, do nothing.
+ } else {
+ assert(pos <= loadLength_c);
+ // successful partial matching.
+ if (pos == loadLength_c) {
+ c -= 1;
+ goto partial;
+ }
+ // failure, do nothing.
+ }
+
+normal:
+ *offset = 0;
+ if (qstate != NULL) {
+ // Internal relative offset.
+ unaligned_store_u16(qstate + 2, *offset);
+ }
+ c += pos;
+ *c_inout = c;
+ return successful ? *trans : *(trans + 1 + remap[*c]);
+
+partial:
+ *offset = sym - symbols + pos;
+ if (qstate != NULL) {
+ // Internal relative offset.
+ unaligned_store_u16(qstate + 2, *offset);
+ }
+ c += pos;
+ *c_inout = c;
+ return *s;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
index 60b3cf028e..482fdb1bc9 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -50,16 +50,16 @@ extern "C"
#define SHERMAN_CHARS_OFFSET 4
#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
-#define WIDE_STATE 2
-#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos))
-#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos))
-
-#define WIDE_WIDTH_OFFSET 0
-#define WIDE_SYMBOL_OFFSET8 1
-#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width))
-#define WIDE_SYMBOL_OFFSET16 2
-#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2))
-
+#define WIDE_STATE 2
+#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos))
+#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos))
+
+#define WIDE_WIDTH_OFFSET 0
+#define WIDE_SYMBOL_OFFSET8 1
+#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width))
+#define WIDE_SYMBOL_OFFSET16 2
+#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2))
+
struct report_list {
u32 count;
ReportID report[];
@@ -89,17 +89,17 @@ struct mcclellan {
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
- u16 wide_limit; /**< 8/16 bit, lowest wide head state */
+ u16 wide_limit; /**< 8/16 bit, lowest wide head state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel plans */
- u8 has_wide; /**< 1 iff there exists any wide state */
+ u8 has_wide; /**< 1 iff there exists any wide state */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
- u32 accel_offset; /**< offset of accel structures from start of McClellan */
+ u32 accel_offset; /**< offset of accel structures from start of McClellan */
u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */
- u32 wide_offset; /**< offset of the wide state entries to the start of the
- * nfa structure */
+ u32 wide_offset; /**< offset of the wide state entries to the start of the
+ * nfa structure */
};
static really_inline
@@ -120,43 +120,43 @@ char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base,
return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
}
-static really_inline
-const char *findWideEntry8(UNUSED const struct mcclellan *m,
- const char *wide_base, u32 wide_limit, u32 s) {
- UNUSED u8 type = *(const u8 *)wide_base;
- assert(type == WIDE_STATE);
- const u32 entry_offset
- = *(const u32 *)(wide_base
- + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32)));
-
- const char *rv = wide_base + entry_offset;
- assert(rv < (const char *)m + m->length - sizeof(struct NFA));
- return rv;
-}
-
-static really_inline
-const char *findWideEntry16(UNUSED const struct mcclellan *m,
- const char *wide_base, u32 wide_limit, u32 s) {
- UNUSED u8 type = *(const u8 *)wide_base;
- assert(type == WIDE_STATE);
- const u32 entry_offset
- = *(const u32 *)(wide_base
- + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32)));
-
- const char *rv = wide_base + entry_offset;
- assert(rv < (const char *)m + m->length - sizeof(struct NFA));
- return rv;
-}
-
-static really_inline
-char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) {
- u32 entry_offset
- = *(const u32 *)(wide_base
- + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32)));
-
- return wide_base + entry_offset;
-}
-
+static really_inline
+const char *findWideEntry8(UNUSED const struct mcclellan *m,
+ const char *wide_base, u32 wide_limit, u32 s) {
+ UNUSED u8 type = *(const u8 *)wide_base;
+ assert(type == WIDE_STATE);
+ const u32 entry_offset
+ = *(const u32 *)(wide_base
+ + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32)));
+
+ const char *rv = wide_base + entry_offset;
+ assert(rv < (const char *)m + m->length - sizeof(struct NFA));
+ return rv;
+}
+
+static really_inline
+const char *findWideEntry16(UNUSED const struct mcclellan *m,
+ const char *wide_base, u32 wide_limit, u32 s) {
+ UNUSED u8 type = *(const u8 *)wide_base;
+ assert(type == WIDE_STATE);
+ const u32 entry_offset
+ = *(const u32 *)(wide_base
+ + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32)));
+
+ const char *rv = wide_base + entry_offset;
+ assert(rv < (const char *)m + m->length - sizeof(struct NFA));
+ return rv;
+}
+
+static really_inline
+char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) {
+ u32 entry_offset
+ = *(const u32 *)(wide_base
+ + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32)));
+
+ return wide_base + entry_offset;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
index 3b73488581..27ec1716e9 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -56,19 +56,19 @@
#include <cstring>
#include <map>
#include <memory>
-#include <queue>
+#include <queue>
#include <set>
#include <vector>
#include <boost/range/adaptor/map.hpp>
-#include "mcclellandump.h"
-#include "util/dump_util.h"
-#include "util/dump_charclass.h"
-
+#include "mcclellandump.h"
+#include "util/dump_util.h"
+#include "util/dump_charclass.h"
+
using namespace std;
using boost::adaptors::map_keys;
-using boost::dynamic_bitset;
+using boost::dynamic_bitset;
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
@@ -88,8 +88,8 @@ namespace /* anon */ {
struct dstate_extra {
u16 daddytaken = 0;
bool shermanState = false;
- bool wideState = false;
- bool wideHead = false;
+ bool wideState = false;
+ bool wideHead = false;
};
struct dfa_info {
@@ -97,8 +97,8 @@ struct dfa_info {
raw_dfa &raw;
vector<dstate> &states;
vector<dstate_extra> extra;
- vector<vector<dstate_id_t>> wide_state_chain;
- vector<vector<symbol_t>> wide_symbol_chain;
+ vector<vector<dstate_id_t>> wide_state_chain;
+ vector<vector<symbol_t>> wide_symbol_chain;
const u16 alpha_size; /* including special symbols */
const array<u16, ALPHABET_SIZE> &alpha_remap;
const u16 impl_alpha_size;
@@ -122,14 +122,14 @@ struct dfa_info {
return extra[raw_id].shermanState;
}
- bool is_widestate(dstate_id_t raw_id) const {
- return extra[raw_id].wideState;
- }
-
- bool is_widehead(dstate_id_t raw_id) const {
- return extra[raw_id].wideHead;
- }
-
+ bool is_widestate(dstate_id_t raw_id) const {
+ return extra[raw_id].wideState;
+ }
+
+ bool is_widehead(dstate_id_t raw_id) const {
+ return extra[raw_id].wideHead;
+ }
+
size_t size(void) const { return states.size(); }
};
@@ -142,35 +142,35 @@ u8 dfa_info::getAlphaShift() const {
}
}
-struct state_prev_info {
- vector<vector<dstate_id_t>> prev_vec;
- explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {}
-};
-
-struct DfaPrevInfo {
- u16 impl_alpha_size;
- u16 state_num;
- vector<state_prev_info> states;
- set<dstate_id_t> accepts;
-
- explicit DfaPrevInfo(raw_dfa &rdfa);
-};
-
-DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa)
- : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()),
- states(state_num, state_prev_info(impl_alpha_size)){
- for (size_t i = 0; i < states.size(); i++) {
- for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
- dstate_id_t curr = rdfa.states[i].next[sym];
- states[curr].prev_vec[sym].push_back(i);
- }
- if (!rdfa.states[i].reports.empty()
- || !rdfa.states[i].reports_eod.empty()) {
- DEBUG_PRINTF("accept raw state: %ld\n", i);
- accepts.insert(i);
- }
- }
-}
+struct state_prev_info {
+ vector<vector<dstate_id_t>> prev_vec;
+ explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {}
+};
+
+struct DfaPrevInfo {
+ u16 impl_alpha_size;
+ u16 state_num;
+ vector<state_prev_info> states;
+ set<dstate_id_t> accepts;
+
+ explicit DfaPrevInfo(raw_dfa &rdfa);
+};
+
+DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa)
+ : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()),
+ states(state_num, state_prev_info(impl_alpha_size)){
+ for (size_t i = 0; i < states.size(); i++) {
+ for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
+ dstate_id_t curr = rdfa.states[i].next[sym];
+ states[curr].prev_vec[sym].push_back(i);
+ }
+ if (!rdfa.states[i].reports.empty()
+ || !rdfa.states[i].reports_eod.empty()) {
+ DEBUG_PRINTF("accept raw state: %ld\n", i);
+ accepts.insert(i);
+ }
+ }
+}
} // namespace
static
@@ -198,11 +198,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
for (size_t j = 0; j < alphaSize; j++) {
size_t c_prime = (i << alphaShift) + j;
- // wide state has no aux structure.
- if (m->has_wide && succ_table[c_prime] >= m->wide_limit) {
- continue;
- }
-
+ // wide state has no aux structure.
+ if (m->has_wide && succ_table[c_prime] >= m->wide_limit) {
+ continue;
+ }
+
mstate_aux *aux = getAux(n, succ_table[c_prime]);
if (aux->accept) {
@@ -217,8 +217,8 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
/* handle the sherman states */
char *sherman_base_offset = (char *)n + m->sherman_offset;
- u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count;
- for (u16 j = m->sherman_limit; j < sherman_ceil; j++) {
+ u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count;
+ for (u16 j = m->sherman_limit; j < sherman_ceil; j++) {
char *sherman_cur
= findMutableShermanState(sherman_base_offset, m->sherman_limit, j);
assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE);
@@ -227,11 +227,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
for (u8 i = 0; i < len; i++) {
u16 succ_i = unaligned_load_u16((u8 *)&succs[i]);
- // wide state has no aux structure.
- if (m->has_wide && succ_i >= m->wide_limit) {
- continue;
- }
-
+ // wide state has no aux structure.
+ if (m->has_wide && succ_i >= m->wide_limit) {
+ continue;
+ }
+
mstate_aux *aux = getAux(n, succ_i);
if (aux->accept) {
@@ -245,51 +245,51 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
unaligned_store_u16((u8 *)&succs[i], succ_i);
}
}
-
- /* handle the wide states */
- if (m->has_wide) {
- u32 wide_limit = m->wide_limit;
- char *wide_base = (char *)n + m->wide_offset;
- assert(*wide_base == WIDE_STATE);
- u16 wide_number = verify_u16(info.wide_symbol_chain.size());
- // traverse over wide head states.
- for (u16 j = wide_limit; j < wide_limit + wide_number; j++) {
- char *wide_cur
- = findMutableWideEntry16(wide_base, wide_limit, j);
- u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET);
- u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width));
-
- // check successful transition
- u16 next = unaligned_load_u16((u8 *)trans);
- if (next < wide_limit) {
- mstate_aux *aux = getAux(n, next);
- if (aux->accept) {
- next |= ACCEPT_FLAG;
- }
- if (aux->accel_offset) {
- next |= ACCEL_FLAG;
- }
- unaligned_store_u16((u8 *)trans, next);
- }
- trans++;
-
- // check failure transition
- for (symbol_t k = 0; k < alphaSize; k++) {
- u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
- if (next_k >= wide_limit) {
- continue;
- }
- mstate_aux *aux_k = getAux(n, next_k);
- if (aux_k->accept) {
- next_k |= ACCEPT_FLAG;
- }
- if (aux_k->accel_offset) {
- next_k |= ACCEL_FLAG;
- }
- unaligned_store_u16((u8 *)&trans[k], next_k);
- }
- }
- }
+
+ /* handle the wide states */
+ if (m->has_wide) {
+ u32 wide_limit = m->wide_limit;
+ char *wide_base = (char *)n + m->wide_offset;
+ assert(*wide_base == WIDE_STATE);
+ u16 wide_number = verify_u16(info.wide_symbol_chain.size());
+ // traverse over wide head states.
+ for (u16 j = wide_limit; j < wide_limit + wide_number; j++) {
+ char *wide_cur
+ = findMutableWideEntry16(wide_base, wide_limit, j);
+ u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET);
+ u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width));
+
+ // check successful transition
+ u16 next = unaligned_load_u16((u8 *)trans);
+ if (next < wide_limit) {
+ mstate_aux *aux = getAux(n, next);
+ if (aux->accept) {
+ next |= ACCEPT_FLAG;
+ }
+ if (aux->accel_offset) {
+ next |= ACCEL_FLAG;
+ }
+ unaligned_store_u16((u8 *)trans, next);
+ }
+ trans++;
+
+ // check failure transition
+ for (symbol_t k = 0; k < alphaSize; k++) {
+ u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
+ if (next_k >= wide_limit) {
+ continue;
+ }
+ mstate_aux *aux_k = getAux(n, next_k);
+ if (aux_k->accept) {
+ next_k |= ACCEPT_FLAG;
+ }
+ if (aux_k->accel_offset) {
+ next_k |= ACCEL_FLAG;
+ }
+ unaligned_store_u16((u8 *)&trans[k], next_k);
+ }
+ }
+ }
}
u32 mcclellan_build_strat::max_allowed_offset_accel() const {
@@ -335,20 +335,20 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
m->start_anchored = info.implId(info.raw.start_anchored);
m->start_floating = info.implId(info.raw.start_floating);
m->has_accel = accel_count ? 1 : 0;
- m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0;
-
- if (state_size == sizeof(u8) && m->has_wide == 1) {
- // allocate 1 more byte for wide state use.
- nfa->scratchStateSize += sizeof(u8);
- nfa->streamStateSize += sizeof(u8);
- }
-
- if (state_size == sizeof(u16) && m->has_wide == 1) {
- // allocate 2 more bytes for wide state use.
- nfa->scratchStateSize += sizeof(u16);
- nfa->streamStateSize += sizeof(u16);
- }
-
+ m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0;
+
+ if (state_size == sizeof(u8) && m->has_wide == 1) {
+ // allocate 1 more byte for wide state use.
+ nfa->scratchStateSize += sizeof(u8);
+ nfa->streamStateSize += sizeof(u8);
+ }
+
+ if (state_size == sizeof(u16) && m->has_wide == 1) {
+ // allocate 2 more bytes for wide state use.
+ nfa->scratchStateSize += sizeof(u16);
+ nfa->streamStateSize += sizeof(u16);
+ }
+
if (single) {
m->flags |= MCCLELLAN_FLAG_SINGLE;
}
@@ -521,24 +521,24 @@ size_t calcShermanRegionSize(const dfa_info &info) {
}
static
-size_t calcWideRegionSize(const dfa_info &info) {
- if (info.wide_state_chain.empty()) {
- return 0;
- }
-
- // wide info header
- size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4;
-
- // wide info body
- for (const auto &chain : info.wide_symbol_chain) {
- rv += ROUNDUP_N(chain.size(), 2) +
- (info.impl_alpha_size + 1) * sizeof(u16) + 2;
- }
-
- return ROUNDUP_16(rv);
-}
-
-static
+size_t calcWideRegionSize(const dfa_info &info) {
+ if (info.wide_state_chain.empty()) {
+ return 0;
+ }
+
+ // wide info header
+ size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4;
+
+ // wide info body
+ for (const auto &chain : info.wide_symbol_chain) {
+ rv += ROUNDUP_N(chain.size(), 2) +
+ (info.impl_alpha_size + 1) * sizeof(u16) + 2;
+ }
+
+ return ROUNDUP_16(rv);
+}
+
+static
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
const vector<u32> &reports, const vector<u32> &reports_eod,
vector<u32> &reportOffsets) {
@@ -552,60 +552,60 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
/* returns false on error */
static
-bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
- dstate_id_t *wide_limit) {
+bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
+ dstate_id_t *wide_limit) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
vector<dstate_id_t> sherm;
- vector<dstate_id_t> wideHead;
- vector<dstate_id_t> wideState;
+ vector<dstate_id_t> wideHead;
+ vector<dstate_id_t> wideState;
if (info.size() > (1 << 16)) {
DEBUG_PRINTF("too many states\n");
- *wide_limit = 0;
+ *wide_limit = 0;
return false;
}
for (u32 i = 1; i < info.size(); i++) {
- if (info.is_widehead(i)) {
- wideHead.push_back(i);
- } else if (info.is_widestate(i)) {
- wideState.push_back(i);
- } else if (info.is_sherman(i)) {
+ if (info.is_widehead(i)) {
+ wideHead.push_back(i);
+ } else if (info.is_widestate(i)) {
+ wideState.push_back(i);
+ } else if (info.is_sherman(i)) {
sherm.push_back(i);
} else {
norm.push_back(i);
}
}
- dstate_id_t next = 1;
+ dstate_id_t next = 1;
for (const dstate_id_t &s : norm) {
- DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next);
- info.states[s].impl_id = next++;
+ DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next);
+ info.states[s].impl_id = next++;
}
- *sherman_base = next;
+ *sherman_base = next;
for (const dstate_id_t &s : sherm) {
- DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next);
- info.states[s].impl_id = next++;
- }
-
- *wide_limit = next;
- for (const dstate_id_t &s : wideHead) {
- DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next);
- info.states[s].impl_id = next++;
- }
-
- for (const dstate_id_t &s : wideState) {
- DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next);
- info.states[s].impl_id = next++;
- }
-
+ DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next);
+ info.states[s].impl_id = next++;
+ }
+
+ *wide_limit = next;
+ for (const dstate_id_t &s : wideHead) {
+ DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next);
+ info.states[s].impl_id = next++;
+ }
+
+ for (const dstate_id_t &s : wideState) {
+ DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next);
+ info.states[s].impl_id = next++;
+ }
+
/* Check to see if we haven't over allocated our states */
- DEBUG_PRINTF("next sherman %u masked %u\n", next,
- (dstate_id_t)(next & STATE_MASK));
- return (next - 1) == ((next - 1) & STATE_MASK);
+ DEBUG_PRINTF("next sherman %u masked %u\n", next,
+ (dstate_id_t)(next & STATE_MASK));
+ return (next - 1) == ((next - 1) & STATE_MASK);
}
static
@@ -622,16 +622,16 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
assert(alphaShift <= 8);
u16 count_real_states;
- u16 wide_limit;
- if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
+ u16 wide_limit;
+ if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size());
return nullptr;
}
- DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
- DEBUG_PRINTF("non_wide_states: %d\n", wide_limit);
-
+ DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
+ DEBUG_PRINTF("non_wide_states: %d\n", wide_limit);
+
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info
= info.strat.getAccelInfo(cc.grey);
@@ -639,7 +639,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
size_t tran_size = (1 << info.getAlphaShift())
* sizeof(u16) * count_real_states;
- size_t aux_size = sizeof(mstate_aux) * wide_limit;
+ size_t aux_size = sizeof(mstate_aux) * wide_limit;
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
@@ -647,24 +647,24 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
+ ri->getReportListSize(), 32);
size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
size_t sherman_size = calcShermanRegionSize(info);
- size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size);
- size_t wide_size = calcWideRegionSize(info);
- size_t total_size = wide_offset + wide_size;
+ size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size);
+ size_t wide_size = calcWideRegionSize(info);
+ size_t total_size = wide_offset + wide_size;
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
- DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
- DEBUG_PRINTF("aux_size %zu\n", aux_size);
- DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
- DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA));
- DEBUG_PRINTF("accel_size %zu\n", accel_size);
- DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset);
- DEBUG_PRINTF("sherman_size %zu\n", sherman_size);
- DEBUG_PRINTF("wide_offset %zu\n", wide_offset);
- DEBUG_PRINTF("wide_size %zu\n", wide_size);
- DEBUG_PRINTF("total_size %zu\n", total_size);
-
+ DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
+ DEBUG_PRINTF("aux_size %zu\n", aux_size);
+ DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
+ DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA));
+ DEBUG_PRINTF("accel_size %zu\n", accel_size);
+ DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset);
+ DEBUG_PRINTF("sherman_size %zu\n", sherman_size);
+ DEBUG_PRINTF("wide_offset %zu\n", wide_offset);
+ DEBUG_PRINTF("wide_size %zu\n", wide_size);
+ DEBUG_PRINTF("total_size %zu\n", total_size);
+
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
char *nfa_base = (char *)nfa.get();
@@ -679,9 +679,9 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
- m->wide_limit = wide_limit;
- m->wide_offset = wide_offset;
-
+ m->wide_limit = wide_limit;
+ m->wide_offset = wide_offset;
+
/* copy in the mc header information */
m->sherman_offset = sherman_offset;
m->sherman_end = total_size;
@@ -689,7 +689,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
/* do normal states */
for (size_t i = 0; i < info.size(); i++) {
- if (info.is_sherman(i) || info.is_widestate(i)) {
+ if (info.is_sherman(i) || info.is_widestate(i)) {
continue;
}
@@ -727,7 +727,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
mstate_aux *this_aux = getAux(nfa.get(), fs);
assert(fs >= count_real_states);
- assert(fs < wide_limit);
+ assert(fs < wide_limit);
char *curr_sherman_entry
= sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
@@ -771,71 +771,71 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
}
}
- if (!info.wide_state_chain.empty()) {
- /* do wide states using info */
- u16 wide_number = verify_u16(info.wide_symbol_chain.size());
- char *wide_base = nfa_base + m->wide_offset;
- assert(ISALIGNED_16(wide_base));
-
- char *wide_top = wide_base;
- *(u8 *)(wide_top++) = WIDE_STATE;
- wide_top = ROUNDUP_PTR(wide_top, 2);
- *(u16 *)(wide_top) = wide_number;
- wide_top += 2;
-
- char *curr_wide_entry = wide_top + wide_number * sizeof(u32);
- u32 *wide_offset_list = (u32 *)wide_top;
-
- /* get the order of writing wide states */
- vector<size_t> order(wide_number);
- for (size_t i = 0; i < wide_number; i++) {
- dstate_id_t head = info.wide_state_chain[i].front();
- size_t pos = info.implId(head) - m->wide_limit;
- order[pos] = i;
- }
-
- for (size_t i : order) {
- vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
- vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
-
- u16 width = verify_u16(symbol_chain.size());
- *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
- u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16);
-
- // store wide state symbol chain
- for (size_t j = 0; j < width; j++) {
- *(chars++) = verify_u8(symbol_chain[j]);
- }
-
- // store wide state transition table
- u16 *trans = (u16 *)(curr_wide_entry
- + WIDE_TRANSITION_OFFSET16(width));
- dstate_id_t tail = state_chain[width - 1];
- symbol_t last = symbol_chain[width -1];
- dstate_id_t tran = info.states[tail].next[last];
- // 1. successful transition
- *trans++ = info.implId(tran);
- // 2. failure transition
- for (size_t j = 0; verify_u16(j) < width - 1; j++) {
- if (symbol_chain[j] != last) {
- tran = info.states[state_chain[j]].next[last];
- }
- }
- for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
- if (sym != last) {
- *trans++ = info.implId(info.states[tail].next[sym]);
- }
- else {
- *trans++ = info.implId(tran);
- }
- }
-
- *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base);
-
- curr_wide_entry = (char *)trans;
- }
- }
-
+ if (!info.wide_state_chain.empty()) {
+ /* do wide states using info */
+ u16 wide_number = verify_u16(info.wide_symbol_chain.size());
+ char *wide_base = nfa_base + m->wide_offset;
+ assert(ISALIGNED_16(wide_base));
+
+ char *wide_top = wide_base;
+ *(u8 *)(wide_top++) = WIDE_STATE;
+ wide_top = ROUNDUP_PTR(wide_top, 2);
+ *(u16 *)(wide_top) = wide_number;
+ wide_top += 2;
+
+ char *curr_wide_entry = wide_top + wide_number * sizeof(u32);
+ u32 *wide_offset_list = (u32 *)wide_top;
+
+ /* get the order of writing wide states */
+ vector<size_t> order(wide_number);
+ for (size_t i = 0; i < wide_number; i++) {
+ dstate_id_t head = info.wide_state_chain[i].front();
+ size_t pos = info.implId(head) - m->wide_limit;
+ order[pos] = i;
+ }
+
+ for (size_t i : order) {
+ vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
+ vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
+
+ u16 width = verify_u16(symbol_chain.size());
+ *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
+ u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16);
+
+ // store wide state symbol chain
+ for (size_t j = 0; j < width; j++) {
+ *(chars++) = verify_u8(symbol_chain[j]);
+ }
+
+ // store wide state transition table
+ u16 *trans = (u16 *)(curr_wide_entry
+ + WIDE_TRANSITION_OFFSET16(width));
+ dstate_id_t tail = state_chain[width - 1];
+ symbol_t last = symbol_chain[width -1];
+ dstate_id_t tran = info.states[tail].next[last];
+ // 1. successful transition
+ *trans++ = info.implId(tran);
+ // 2. failure transition
+ for (size_t j = 0; verify_u16(j) < width - 1; j++) {
+ if (symbol_chain[j] != last) {
+ tran = info.states[state_chain[j]].next[last];
+ }
+ }
+ for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
+ if (sym != last) {
+ *trans++ = info.implId(info.states[tail].next[sym]);
+ }
+ else {
+ *trans++ = info.implId(tran);
+ }
+ }
+
+ *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base);
+
+ curr_wide_entry = (char *)trans;
+ }
+ }
+
markEdges(nfa.get(), succ_table, info);
if (accel_states && nfa) {
@@ -997,7 +997,7 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
return nfa;
}
-#define MAX_SHERMAN_LIST_LEN 9
+#define MAX_SHERMAN_LIST_LEN 9
static
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
@@ -1081,16 +1081,16 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
if (trust_daddy_states) {
// Use the daddy already set for this state so long as it isn't already
// a Sherman state.
- dstate_id_t daddy = currState.daddy;
- if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
+ dstate_id_t daddy = currState.daddy;
+ if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
hinted.insert(currState.daddy);
} else {
// Fall back to granddaddy, which has already been processed (due
// to BFS ordering) and cannot be a Sherman state.
dstate_id_t granddaddy = info.states[currState.daddy].daddy;
- if (info.is_widestate(granddaddy)) {
- return;
- }
+ if (info.is_widestate(granddaddy)) {
+ return;
+ }
assert(!info.is_sherman(granddaddy));
hinted.insert(granddaddy);
}
@@ -1102,7 +1102,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
assert(donor < curr_id);
u32 score = 0;
- if (info.is_sherman(donor) || info.is_widestate(donor)) {
+ if (info.is_sherman(donor) || info.is_widestate(donor)) {
continue;
}
@@ -1175,290 +1175,290 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
return false;
}
-/* \brief Test for only-one-predecessor property. */
-static
-bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size,
- const dstate_id_t curr_id, dstate_id_t &prev_id,
- symbol_t &prev_sym) {
- u32 num_prev = 0;
- bool test_p1 = false;
-
- for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
- num_prev += info.states[curr_id].prev_vec[sym].size();
- DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym,
- info.states[curr_id].prev_vec[sym].size());
- if (num_prev == 1 && !test_p1) {
- test_p1 = true;
- prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure???
- prev_sym = sym;
- }
- }
-
- return num_prev == 1;
-}
-
-/* \brief Test for same-failure-action property. */
-static
-bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size,
- const dstate_id_t curr_id, const dstate_id_t prev_id,
- const symbol_t curr_sym, const symbol_t prev_sym) {
- const dstate &prevState = rdfa.states[prev_id];
- const dstate &currState = rdfa.states[curr_id];
-
- // Compare transition tables between currState and prevState.
- u16 score = 0;
- for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
- if (currState.next[sym] == prevState.next[sym]
- && sym != curr_sym && sym != prev_sym) {
- score++;
- }
- }
- DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size);
-
- // 2 cases.
- if (curr_sym != prev_sym && score >= impl_alpha_size - 2
- && currState.next[prev_sym] == prevState.next[curr_sym]) {
- return true;
- } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) {
- return true;
- }
- return false;
-}
-
-/* \brief Check whether adding current prev_id will generate a circle.*/
-static
-bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size,
- const vector<dstate_id_t> &chain, const dstate_id_t id) {
- const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec;
- const dstate_id_t tail = chain.front();
- for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
- auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail);
- if (iter != prev_vec[sym].end()) {
- // Tail is one of id's predecessors, forming a circle.
- return true;
- }
- }
- return false;
-}
-
-/* \brief Returns a chain of state ids and symbols. */
-static
-dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info,
- const dstate_id_t curr_id,
- const symbol_t curr_sym,
- vector<dstate_id_t> &temp_chain) {
- //Record current id first.
- temp_chain.push_back(curr_id);
-
- const u16 size = info.impl_alpha_size;
-
- // Stop when entering root cloud.
- if (rdfa.start_anchored != DEAD_STATE
- && is_cyclic_near(rdfa, rdfa.start_anchored)
- && curr_id < size) {
- return curr_id;
- }
- if (rdfa.start_floating != DEAD_STATE
- && curr_id >= rdfa.start_floating
- && curr_id < rdfa.start_floating + size * 3) {
- return curr_id;
- }
-
- // Stop when reaching anchored or floating.
- if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) {
- return curr_id;
- }
-
- dstate_id_t prev_id = 0;
- symbol_t prev_sym = ALPHABET_SIZE;
-
- // Check the only-one-predecessor property.
- if (!check_property1(info, size, curr_id, prev_id, prev_sym)) {
- return curr_id;
- }
- assert(prev_id != 0 && prev_sym != ALPHABET_SIZE);
- DEBUG_PRINTF("(P1 test passed.)\n");
-
- // Circle testing for the prev_id that passes the P1 test.
- if (check_circle(info, size, temp_chain, prev_id)) {
- DEBUG_PRINTF("(A circle is found.)\n");
- return curr_id;
- }
-
- // Check the same-failure-action property.
- if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) {
- return curr_id;
- }
- DEBUG_PRINTF("(P2 test passed.)\n");
-
- if (!rdfa.states[prev_id].reports.empty()
- || !rdfa.states[prev_id].reports_eod.empty()) {
- return curr_id;
- } else {
- return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain);
- }
-}
-
-/* \brief Always store the non-extensible chains found till now. */
-static
-bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
- vector<dstate_id_t> &temp_chain,
- dynamic_bitset<> &added, bool head_is_new) {
- dstate_id_t head = temp_chain.front();
- u16 length = temp_chain.size();
-
- if (head_is_new) {
- DEBUG_PRINTF("This is a new chain!\n");
-
- // Add this new chain and get it marked.
- candidate_chain.push_back(temp_chain);
-
- for (auto &id : temp_chain) {
- DEBUG_PRINTF("(Marking s%u ...)\n", id);
- added.set(id);
- }
-
- return true;
- }
-
- DEBUG_PRINTF("This is a longer chain!\n");
- assert(!candidate_chain.empty());
-
- auto chain = find_if(candidate_chain.begin(), candidate_chain.end(),
- [&](const vector<dstate_id_t> &it) {
- return it.front() == head;
- });
-
- // Not a valid head, just do nothing and return.
- if (chain == candidate_chain.end()) {
- return false;
- }
-
- u16 len = chain->size();
-
- if (length > len) {
- // Find out the branch node first.
- size_t piv = 0;
- for (; piv < length; piv++) {
- if ((*chain)[piv] != temp_chain[piv]) {
- break;
- }
- }
-
- for (size_t j = piv + 1; j < length; j++) {
- DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]);
- added.set(temp_chain[j]);
- }
-
- // Unmark old unuseful nodes.
- // (Except the tail node, which is in working queue)
- for (size_t j = piv + 1; j < verify_u16(len - 1); j++) {
- DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]);
- added.reset((*chain)[j]);
- }
-
- chain->assign(temp_chain.begin(), temp_chain.end());
- }
-
- return false;
-}
-
-/* \brief Generate wide_symbol_chain from wide_state_chain. */
-static
-void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
- raw_dfa &rdfa = info.raw;
- assert(chain_tail.size() == info.wide_state_chain.size());
-
- for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
- vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
- vector<symbol_t> symbol_chain;
-
- info.extra[state_chain[0]].wideHead = true;
- size_t width = state_chain.size() - 1;
-
- for (size_t j = 0; j < width; j++) {
- dstate_id_t curr_id = state_chain[j];
- dstate_id_t next_id = state_chain[j + 1];
-
- // The last state of the chain doesn't belong to a wide state.
- info.extra[curr_id].wideState = true;
-
- // The tail symbol comes from vector chain_tail;
- if (j == width - 1) {
- symbol_chain.push_back(chain_tail[i]);
- } else {
- for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
- if (rdfa.states[curr_id].next[sym] == next_id) {
- symbol_chain.push_back(sym);
- break;
- }
- }
- }
- }
-
- info.wide_symbol_chain.push_back(symbol_chain);
- }
-}
-
-/* \brief Find potential regions of states to be packed into wide states. */
-static
-void find_wide_state(dfa_info &info) {
- DfaPrevInfo dinfo(info.raw);
- queue<dstate_id_t> work_queue;
-
- dynamic_bitset<> added(info.raw.states.size());
- for (auto it : dinfo.accepts) {
- work_queue.push(it);
- added.set(it);
- }
-
- vector<symbol_t> chain_tail;
- while (!work_queue.empty()) {
- dstate_id_t curr_id = work_queue.front();
- work_queue.pop();
- DEBUG_PRINTF("Newly popped state: s%u\n", curr_id);
-
- for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) {
- for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) {
- if (added.test(info_it)) {
- DEBUG_PRINTF("(s%u already marked.)\n", info_it);
- continue;
- }
-
- vector<dstate_id_t> temp_chain;
- // Head is a state failing the test of the chain.
- dstate_id_t head = find_chain_candidate(info.raw, dinfo,
- info_it, sym,
- temp_chain);
-
- // A candidate chain should contain 8 substates at least.
- if (temp_chain.size() < 8) {
- DEBUG_PRINTF("(Not enough substates, continue.)\n");
- continue;
- }
-
- bool head_is_new = !added.test(head);
- if (head_is_new) {
- added.set(head);
- work_queue.push(head);
- DEBUG_PRINTF("Newly pushed state: s%u\n", head);
- }
-
- reverse(temp_chain.begin(), temp_chain.end());
- temp_chain.push_back(curr_id);
-
- assert(head > 0 && head == temp_chain.front());
- if (store_chain_longest(info.wide_state_chain, temp_chain,
- added, head_is_new)) {
- chain_tail.push_back(sym);
- }
- }
- }
- }
-
- generate_symbol_chain(info, chain_tail);
-}
-
+/* \brief Test for only-one-predecessor property. */
+static
+bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size,
+ const dstate_id_t curr_id, dstate_id_t &prev_id,
+ symbol_t &prev_sym) {
+ u32 num_prev = 0;
+ bool test_p1 = false;
+
+ for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
+ num_prev += info.states[curr_id].prev_vec[sym].size();
+ DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym,
+ info.states[curr_id].prev_vec[sym].size());
+ if (num_prev == 1 && !test_p1) {
+ test_p1 = true;
+ prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure???
+ prev_sym = sym;
+ }
+ }
+
+ return num_prev == 1;
+}
+
+/* \brief Test for same-failure-action property. */
+static
+bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size,
+ const dstate_id_t curr_id, const dstate_id_t prev_id,
+ const symbol_t curr_sym, const symbol_t prev_sym) {
+ const dstate &prevState = rdfa.states[prev_id];
+ const dstate &currState = rdfa.states[curr_id];
+
+ // Compare transition tables between currState and prevState.
+ u16 score = 0;
+ for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
+ if (currState.next[sym] == prevState.next[sym]
+ && sym != curr_sym && sym != prev_sym) {
+ score++;
+ }
+ }
+ DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size);
+
+ // 2 cases.
+ if (curr_sym != prev_sym && score >= impl_alpha_size - 2
+ && currState.next[prev_sym] == prevState.next[curr_sym]) {
+ return true;
+ } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) {
+ return true;
+ }
+ return false;
+}
+
+/* \brief Check whether adding current prev_id will generate a circle.*/
+static
+bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size,
+ const vector<dstate_id_t> &chain, const dstate_id_t id) {
+ const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec;
+ const dstate_id_t tail = chain.front();
+ for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
+ auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail);
+ if (iter != prev_vec[sym].end()) {
+ // Tail is one of id's predecessors, forming a circle.
+ return true;
+ }
+ }
+ return false;
+}
+
+/* \brief Returns a chain of state ids and symbols. */
+static
+dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info,
+ const dstate_id_t curr_id,
+ const symbol_t curr_sym,
+ vector<dstate_id_t> &temp_chain) {
+ //Record current id first.
+ temp_chain.push_back(curr_id);
+
+ const u16 size = info.impl_alpha_size;
+
+ // Stop when entering root cloud.
+ if (rdfa.start_anchored != DEAD_STATE
+ && is_cyclic_near(rdfa, rdfa.start_anchored)
+ && curr_id < size) {
+ return curr_id;
+ }
+ if (rdfa.start_floating != DEAD_STATE
+ && curr_id >= rdfa.start_floating
+ && curr_id < rdfa.start_floating + size * 3) {
+ return curr_id;
+ }
+
+ // Stop when reaching anchored or floating.
+ if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) {
+ return curr_id;
+ }
+
+ dstate_id_t prev_id = 0;
+ symbol_t prev_sym = ALPHABET_SIZE;
+
+ // Check the only-one-predecessor property.
+ if (!check_property1(info, size, curr_id, prev_id, prev_sym)) {
+ return curr_id;
+ }
+ assert(prev_id != 0 && prev_sym != ALPHABET_SIZE);
+ DEBUG_PRINTF("(P1 test passed.)\n");
+
+ // Circle testing for the prev_id that passes the P1 test.
+ if (check_circle(info, size, temp_chain, prev_id)) {
+ DEBUG_PRINTF("(A circle is found.)\n");
+ return curr_id;
+ }
+
+ // Check the same-failure-action property.
+ if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) {
+ return curr_id;
+ }
+ DEBUG_PRINTF("(P2 test passed.)\n");
+
+ if (!rdfa.states[prev_id].reports.empty()
+ || !rdfa.states[prev_id].reports_eod.empty()) {
+ return curr_id;
+ } else {
+ return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain);
+ }
+}
+
+/* \brief Always store the non-extensible chains found till now. */
+static
+bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
+ vector<dstate_id_t> &temp_chain,
+ dynamic_bitset<> &added, bool head_is_new) {
+ dstate_id_t head = temp_chain.front();
+ u16 length = temp_chain.size();
+
+ if (head_is_new) {
+ DEBUG_PRINTF("This is a new chain!\n");
+
+ // Add this new chain and get it marked.
+ candidate_chain.push_back(temp_chain);
+
+ for (auto &id : temp_chain) {
+ DEBUG_PRINTF("(Marking s%u ...)\n", id);
+ added.set(id);
+ }
+
+ return true;
+ }
+
+ DEBUG_PRINTF("This is a longer chain!\n");
+ assert(!candidate_chain.empty());
+
+ auto chain = find_if(candidate_chain.begin(), candidate_chain.end(),
+ [&](const vector<dstate_id_t> &it) {
+ return it.front() == head;
+ });
+
+ // Not a valid head, just do nothing and return.
+ if (chain == candidate_chain.end()) {
+ return false;
+ }
+
+ u16 len = chain->size();
+
+ if (length > len) {
+ // Find out the branch node first.
+ size_t piv = 0;
+ for (; piv < length; piv++) {
+ if ((*chain)[piv] != temp_chain[piv]) {
+ break;
+ }
+ }
+
+ for (size_t j = piv + 1; j < length; j++) {
+ DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]);
+ added.set(temp_chain[j]);
+ }
+
+ // Unmark old unuseful nodes.
+ // (Except the tail node, which is in working queue)
+ for (size_t j = piv + 1; j < verify_u16(len - 1); j++) {
+ DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]);
+ added.reset((*chain)[j]);
+ }
+
+ chain->assign(temp_chain.begin(), temp_chain.end());
+ }
+
+ return false;
+}
+
+/* \brief Generate wide_symbol_chain from wide_state_chain. */
+static
+void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
+ raw_dfa &rdfa = info.raw;
+ assert(chain_tail.size() == info.wide_state_chain.size());
+
+ for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
+ vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
+ vector<symbol_t> symbol_chain;
+
+ info.extra[state_chain[0]].wideHead = true;
+ size_t width = state_chain.size() - 1;
+
+ for (size_t j = 0; j < width; j++) {
+ dstate_id_t curr_id = state_chain[j];
+ dstate_id_t next_id = state_chain[j + 1];
+
+ // The last state of the chain doesn't belong to a wide state.
+ info.extra[curr_id].wideState = true;
+
+ // The tail symbol comes from vector chain_tail;
+ if (j == width - 1) {
+ symbol_chain.push_back(chain_tail[i]);
+ } else {
+ for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
+ if (rdfa.states[curr_id].next[sym] == next_id) {
+ symbol_chain.push_back(sym);
+ break;
+ }
+ }
+ }
+ }
+
+ info.wide_symbol_chain.push_back(symbol_chain);
+ }
+}
+
+/* \brief Find potential regions of states to be packed into wide states. */
+static
+void find_wide_state(dfa_info &info) {
+ DfaPrevInfo dinfo(info.raw);
+ queue<dstate_id_t> work_queue;
+
+ dynamic_bitset<> added(info.raw.states.size());
+ for (auto it : dinfo.accepts) {
+ work_queue.push(it);
+ added.set(it);
+ }
+
+ vector<symbol_t> chain_tail;
+ while (!work_queue.empty()) {
+ dstate_id_t curr_id = work_queue.front();
+ work_queue.pop();
+ DEBUG_PRINTF("Newly popped state: s%u\n", curr_id);
+
+ for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) {
+ for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) {
+ if (added.test(info_it)) {
+ DEBUG_PRINTF("(s%u already marked.)\n", info_it);
+ continue;
+ }
+
+ vector<dstate_id_t> temp_chain;
+ // Head is a state failing the test of the chain.
+ dstate_id_t head = find_chain_candidate(info.raw, dinfo,
+ info_it, sym,
+ temp_chain);
+
+ // A candidate chain should contain 8 substates at least.
+ if (temp_chain.size() < 8) {
+ DEBUG_PRINTF("(Not enough substates, continue.)\n");
+ continue;
+ }
+
+ bool head_is_new = !added.test(head);
+ if (head_is_new) {
+ added.set(head);
+ work_queue.push(head);
+ DEBUG_PRINTF("Newly pushed state: s%u\n", head);
+ }
+
+ reverse(temp_chain.begin(), temp_chain.end());
+ temp_chain.push_back(curr_id);
+
+ assert(head > 0 && head == temp_chain.front());
+ if (store_chain_longest(info.wide_state_chain, temp_chain,
+ added, head_is_new)) {
+ chain_tail.push_back(sym);
+ }
+ }
+ }
+ }
+
+ generate_symbol_chain(info, chain_tail);
+}
+
bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc,
bool trust_daddy_states,
@@ -1477,31 +1477,31 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
bytecode_ptr<NFA> nfa;
if (!using8bit) {
- // Wide state optimization
- if (cc.grey.allowWideStates && strat.getType() == McClellan
- && !is_triggered(raw.kind)) {
- find_wide_state(info);
- }
-
+ // Wide state optimization
+ if (cc.grey.allowWideStates && strat.getType() == McClellan
+ && !is_triggered(raw.kind)) {
+ find_wide_state(info);
+ }
+
u16 total_daddy = 0;
bool any_cyclic_near_anchored_state
= is_cyclic_near(raw, raw.start_anchored);
- // Sherman optimization
- if (info.impl_alpha_size > 16) {
- for (u32 i = 0; i < info.size(); i++) {
- if (info.is_widestate(i)) {
- continue;
- }
- find_better_daddy(info, i, using8bit,
- any_cyclic_near_anchored_state,
- trust_daddy_states, cc.grey);
- total_daddy += info.extra[i].daddytaken;
- }
-
- DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
- info.size() * info.impl_alpha_size, info.size(),
- info.impl_alpha_size);
+ // Sherman optimization
+ if (info.impl_alpha_size > 16) {
+ for (u32 i = 0; i < info.size(); i++) {
+ if (info.is_widestate(i)) {
+ continue;
+ }
+ find_better_daddy(info, i, using8bit,
+ any_cyclic_near_anchored_state,
+ trust_daddy_states, cc.grey);
+ total_daddy += info.extra[i].daddytaken;
+ }
+
+ DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
+ info.size() * info.impl_alpha_size, info.size(),
+ info.impl_alpha_size);
}
nfa = mcclellanCompile16(info, cc, accel_states);
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
index a56016018b..73cb9fd775 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -60,7 +60,7 @@ public:
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
- DfaType getType() const override { return McClellan; }
+ DfaType getType() const override { return McClellan; }
private:
raw_dfa &rdfa;
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellandump.h b/contrib/libs/hyperscan/src/nfa/mcclellandump.h
index a4cd81c031..5b63a20634 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellandump.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellandump.h
@@ -1,62 +1,62 @@
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MCCLELLAN_DUMP_H
-#define MCCLELLAN_DUMP_H
-
-#ifdef DUMP_SUPPORT
-
-#include "rdfa.h"
-
-#include <cstdio>
-#include <string>
-
-struct mcclellan;
-struct mstate_aux;
-struct NFA;
-union AccelAux;
-
-namespace ue2 {
-
-void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base);
-void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base);
-
-/* These functions are shared with the Gough dump code. */
-
-const mstate_aux *getAux(const NFA *n, dstate_id_t i);
-void describeEdge(FILE *f, const u16 *t, u16 i);
-void dumpAccelText(FILE *f, const union AccelAux *accel);
-void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel);
-void describeAlphabet(FILE *f, const mcclellan *m);
-void dumpDotPreambleDfa(FILE *f);
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
-
-#endif // MCCLELLAN_DUMP_H
+/*
+ * Copyright (c) 2015-2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLAN_DUMP_H
+#define MCCLELLAN_DUMP_H
+
+#ifdef DUMP_SUPPORT
+
+#include "rdfa.h"
+
+#include <cstdio>
+#include <string>
+
+struct mcclellan;
+struct mstate_aux;
+struct NFA;
+union AccelAux;
+
+namespace ue2 {
+
+void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base);
+void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base);
+
+/* These functions are shared with the Gough dump code. */
+
+const mstate_aux *getAux(const NFA *n, dstate_id_t i);
+void describeEdge(FILE *f, const u16 *t, u16 i);
+void dumpAccelText(FILE *f, const union AccelAux *accel);
+void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel);
+void describeAlphabet(FILE *f, const mcclellan *m);
+void dumpDotPreambleDfa(FILE *f);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif // MCCLELLAN_DUMP_H
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.c b/contrib/libs/hyperscan/src/nfa/mcsheng.c
index d285793483..22cac119fb 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng.c
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -173,7 +173,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u32 sheng_limit_x4 = sheng_limit * 0x01010101;
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
- DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
+ DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
m->sheng_accel_limit, sheng_stop_limit);
#endif
@@ -181,7 +181,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
m128 shuffle_mask = masks[*(c++)]; \
s = pshufb_m128(shuffle_mask, s); \
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
- DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
+ DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
if (s_gpr_x4 >= sheng_stop_limit_x4) { \
s_gpr = s_gpr_x4; \
goto exit; \
@@ -191,7 +191,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u8 s_gpr;
while (c < c_end) {
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
- /* This version uses pext for efficiently bitbashing out scaled
+ /* This version uses pext for efficiently bitbashing out scaled
* versions of the bytes to process from a u64a */
u64a data_bytes = unaligned_load_u64a(c);
@@ -201,7 +201,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
s = pshufb_m128(shuffle_mask0, s);
m128 s_max = s;
m128 s_max0 = s_max;
- DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s));
+ DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s));
#define SHENG_SINGLE_UNROLL_ITER(iter) \
assert(iter); \
@@ -217,7 +217,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
s_max = max_u8_m128(s_max, s); \
} \
m128 s_max##iter = s_max; \
- DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \
+ DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \
movd(s), movd(s_max));
SHENG_SINGLE_UNROLL_ITER(1);
@@ -1184,7 +1184,7 @@ char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) {
static
char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux,
- ReportID report) {
+ ReportID report) {
assert(m && aux);
if (!aux->accept) {
@@ -1405,1332 +1405,1332 @@ char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest,
*(u16 *)dest = unaligned_load_u16(src);
return 0;
}
-
-#if defined(HAVE_AVX512VBMI)
-static really_inline
-const struct mstate_aux *get_aux64(const struct mcsheng64 *m, u32 s) {
- const char *nfa = (const char *)m - sizeof(struct NFA);
- const struct mstate_aux *aux
- = s + (const struct mstate_aux *)(nfa + m->aux_offset);
-
- assert(ISALIGNED(aux));
- return aux;
-}
-
-static really_inline
-u32 mcshengEnableStarts64(const struct mcsheng64 *m, u32 s) {
- const struct mstate_aux *aux = get_aux64(m, s);
-
- DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
- return aux->top;
-}
-
-static really_inline
-char doComplexReport64(NfaCallback cb, void *ctxt, const struct mcsheng64 *m,
- u32 s, u64a loc, char eod, u32 *cached_accept_state,
- u32 *cached_accept_id) {
- DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
- s & STATE_MASK, loc, eod);
-
- if (!eod && s == *cached_accept_state) {
- if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- const struct mstate_aux *aux = get_aux64(m, s);
- size_t offset = eod ? aux->accept_eod : aux->accept;
-
- assert(offset);
- const struct report_list *rl
- = (const void *)((const char *)m + offset - sizeof(struct NFA));
- assert(ISALIGNED(rl));
-
- DEBUG_PRINTF("report list size %u\n", rl->count);
- u32 count = rl->count;
-
- if (!eod && count == 1) {
- *cached_accept_state = s;
- *cached_accept_id = rl->report[0];
-
- DEBUG_PRINTF("reporting %u\n", rl->report[0]);
- if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
- if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
-}
-
-static really_inline
-u32 doSheng64(const struct mcsheng64 *m, const u8 **c_inout, const u8 *soft_c_end,
- const u8 *hard_c_end, u32 s_in, char do_accel) {
- assert(s_in < m->sheng_end);
- assert(s_in); /* should not already be dead */
- assert(soft_c_end <= hard_c_end);
- DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1);
- m512 s = set64x8(s_in - 1);
- const u8 *c = *c_inout;
- const u8 *c_end = hard_c_end - SHENG_CHUNK + 1;
- if (!do_accel) {
- c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1);
- }
-
- const m512 *masks = m->sheng_succ_masks;
- u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */
- u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit;
-
- /* When we use movd to get a u32 containing our state, it will have 4 lanes
- * all duplicating the state. We can create versions of our limits with 4
- * copies to directly compare against, this prevents us generating code to
- * extract a single copy of the state from the u32 for checking. */
- u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
-
-#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
- u32 sheng_limit_x4 = sheng_limit * 0x01010101;
- m512 simd_stop_limit = set16x32(sheng_stop_limit_x4);
- m512 accel_delta = set64x8(sheng_limit - sheng_stop_limit);
- DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
- m->sheng_accel_limit, sheng_stop_limit);
-#endif
-
-#define SHENG64_SINGLE_ITER do { \
- m512 succ_mask = masks[*(c++)]; \
- s = vpermb512(s, succ_mask); \
- u32 s_gpr_x4 = movd512(s); /* convert to u8 */ \
- DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
- if (s_gpr_x4 >= sheng_stop_limit_x4) { \
- s_gpr = s_gpr_x4; \
- goto exit; \
- } \
- } while (0)
-
- u8 s_gpr;
- while (c < c_end) {
-#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
- /* This version uses pext for efficiently bitbashing out scaled
- * versions of the bytes to process from a u64a */
-
- u64a data_bytes = unaligned_load_u64a(c);
- u64a cc0 = pdep64(data_bytes, 0x3fc0); /* extract scaled low byte */
- data_bytes &= ~0xffULL; /* clear low bits for scale space */
-
- m512 succ_mask0 = load512((const char *)masks + cc0);
- s = vpermb512(s, succ_mask0);
- m512 s_max = s;
- m512 s_max0 = s_max;
- DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 6, movd512(s));
-
-#define SHENG64_SINGLE_UNROLL_ITER(iter) \
- assert(iter); \
- u64a cc##iter = pext64(data_bytes, mcsheng64_pext_mask[iter]); \
- assert(cc##iter == (u64a)c[iter] << 6); \
- m512 succ_mask##iter = load512((const char *)masks + cc##iter); \
- s = vpermb512(s, succ_mask##iter); \
- if (do_accel && iter == 7) { \
- /* in the final iteration we also have to check against accel */ \
- m512 s_temp = sadd_u8_m512(s, accel_delta); \
- s_max = max_u8_m512(s_max, s_temp); \
- } else { \
- s_max = max_u8_m512(s_max, s); \
- } \
- m512 s_max##iter = s_max; \
- DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 6, \
- movd512(s), movd512(s_max));
-
- SHENG64_SINGLE_UNROLL_ITER(1);
- SHENG64_SINGLE_UNROLL_ITER(2);
- SHENG64_SINGLE_UNROLL_ITER(3);
- SHENG64_SINGLE_UNROLL_ITER(4);
- SHENG64_SINGLE_UNROLL_ITER(5);
- SHENG64_SINGLE_UNROLL_ITER(6);
- SHENG64_SINGLE_UNROLL_ITER(7);
-
- if (movd512(s_max7) >= sheng_limit_x4) {
- DEBUG_PRINTF("exit found\n");
-
- /* Explicitly check the last byte as it is more likely as it also
- * checks for acceleration. */
- if (movd512(s_max6) < sheng_limit_x4) {
- c += SHENG_CHUNK;
- s_gpr = movq512(s);
- assert(s_gpr >= sheng_stop_limit);
- goto exit;
- }
-
- /* use shift-xor to create a register containing all of the max
- * values */
- m512 blended = rshift64_m512(s_max0, 56);
- blended = xor512(blended, rshift64_m512(s_max1, 48));
- blended = xor512(blended, rshift64_m512(s_max2, 40));
- blended = xor512(blended, rshift64_m512(s_max3, 32));
- blended = xor512(blended, rshift64_m512(s_max4, 24));
- blended = xor512(blended, rshift64_m512(s_max5, 16));
- blended = xor512(blended, rshift64_m512(s_max6, 8));
- blended = xor512(blended, s);
- blended = xor512(blended, rshift64_m512(blended, 8));
- DEBUG_PRINTF("blended %016llx\n", movq512(blended));
-
- m512 final = min_u8_m512(blended, simd_stop_limit);
- m512 cmp = sub_u8_m512(final, simd_stop_limit);
- m128 tmp = cast512to128(cmp);
- u64a stops = ~movemask128(tmp);
- assert(stops);
- u32 earliest = ctz32(stops);
- DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest);
- assert(earliest < 8);
- c += earliest + 1;
- s_gpr = movq512(blended) >> (earliest * 8);
- assert(s_gpr >= sheng_stop_limit);
- goto exit;
- } else {
- c += SHENG_CHUNK;
- }
-#else
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
-
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
- SHENG64_SINGLE_ITER;
-#endif
- }
-
- assert(c_end - c < SHENG_CHUNK);
- if (c < soft_c_end) {
- assert(soft_c_end - c < SHENG_CHUNK);
- switch (soft_c_end - c) {
- case 7:
- SHENG64_SINGLE_ITER; // fallthrough
- case 6:
- SHENG64_SINGLE_ITER; // fallthrough
- case 5:
- SHENG64_SINGLE_ITER; // fallthrough
- case 4:
- SHENG64_SINGLE_ITER; // fallthrough
- case 3:
- SHENG64_SINGLE_ITER; // fallthrough
- case 2:
- SHENG64_SINGLE_ITER; // fallthrough
- case 1:
- SHENG64_SINGLE_ITER; // fallthrough
- }
- }
-
- assert(c >= soft_c_end);
-
- s_gpr = movq512(s);
-exit:
- assert(c <= hard_c_end);
- DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr);
- assert(c >= soft_c_end || s_gpr >= sheng_stop_limit);
- /* undo state adjustment to match mcclellan view */
- if (s_gpr == sheng_limit) {
- s_gpr = 0;
- } else if (s_gpr < sheng_limit) {
- s_gpr++;
- }
-
- *c_inout = c;
- return s_gpr;
-}
-
-static really_inline
-const char *findShermanState64(UNUSED const struct mcsheng64 *m,
- const char *sherman_base_offset,
- u32 sherman_base, u32 s) {
- const char *rv
- = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
- assert(rv < (const char *)m + m->length - sizeof(struct NFA));
- UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
- assert(type == SHERMAN_STATE);
- return rv;
-}
-
-static really_inline
-const u8 *run_mcsheng_accel64(const struct mcsheng64 *m,
- const struct mstate_aux *aux, u32 s,
- const u8 **min_accel_offset,
- const u8 *c, const u8 *c_end) {
- DEBUG_PRINTF("skipping\n");
- u32 accel_offset = aux[s].accel_offset;
-
- assert(aux[s].accel_offset);
- assert(accel_offset >= m->aux_offset);
- assert(!m->sherman_offset || accel_offset < m->sherman_offset);
-
- const union AccelAux *aaux = (const void *)((const char *)m + accel_offset);
- const u8 *c2 = run_accel(aaux, c, c_end);
-
- if (c2 < *min_accel_offset + BAD_ACCEL_DIST) {
- *min_accel_offset = c2 + BIG_ACCEL_PENALTY;
- } else {
- *min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
- }
-
- if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) {
- *min_accel_offset = c_end;
- }
-
- DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
- c2 - c, *min_accel_offset - c2, c_end - c2);
-
- return c2;
-}
-
-static really_inline
-u32 doNormal64_16(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end,
- u32 s, char do_accel, enum MatchMode mode) {
- const u8 *c = *c_inout;
- const u16 *succ_table
- = (const u16 *)((const char *)m + sizeof(struct mcsheng64));
- assert(ISALIGNED_N(succ_table, 2));
- u32 sheng_end = m->sheng_end;
- u32 sherman_base = m->sherman_limit;
- const char *sherman_base_offset
- = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
- u32 as = m->alphaShift;
-
- /* Adjust start of succ table so we can index into using state id (rather
- * than adjust to normal id). As we will not be processing states with low
- * state ids, we will not be accessing data before the succ table. Note: due
- * to the size of the sheng tables, the succ_table pointer will still be
- * inside the engine.*/
- succ_table -= sheng_end << as;
- s &= STATE_MASK;
- while (c < end && s >= sheng_end) {
- u8 cprime = m->remap[*c];
- DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
- ourisprint(*c) ? *c : '?', cprime, s);
- if (s < sherman_base) {
- DEBUG_PRINTF("doing normal\n");
- assert(s < m->state_count);
- s = succ_table[(s << as) + cprime];
- } else {
- const char *sherman_state
- = findShermanState64(m, sherman_base_offset, sherman_base, s);
- DEBUG_PRINTF("doing sherman (%u)\n", s);
- s = doSherman16(sherman_state, cprime, succ_table, as);
- }
-
- DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
- c++;
-
- if (do_accel && (s & ACCEL_FLAG)) {
- break;
- }
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- break;
- }
-
- s &= STATE_MASK;
- }
-
- *c_inout = c;
- return s;
-}
-
-static really_inline
-char mcsheng64Exec16_i(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **c_final, enum MatchMode mode) {
- assert(ISALIGNED_N(state, 2));
- if (!len) {
- if (mode == STOP_AT_MATCH) {
- *c_final = buf;
- }
- return MO_ALIVE;
- }
-
- u32 s = *state;
- const u8 *c = buf;
- const u8 *c_end = buf + len;
- const u8 sheng_end = m->sheng_end;
- const struct mstate_aux *aux
- = (const struct mstate_aux *)((const char *)m + m->aux_offset
- - sizeof(struct NFA));
-
- s &= STATE_MASK;
-
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- DEBUG_PRINTF("s: %u, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
- do {
- assert(c < min_accel_offset);
- int do_accept;
- if (!s) {
- goto exit;
- } else if (s < sheng_end) {
- s = doSheng64(m, &c, min_accel_offset, c_end, s, 0);
- do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept;
- } else {
- s = doNormal64_16(m, &c, min_accel_offset, s, 0, mode);
-
- do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
- }
-
- if (do_accept) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
- return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
-
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_DEAD; /* termination requested */
- }
- } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0,
- &cached_accept_state,
- &cached_accept_id)
- == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
- } while (c < min_accel_offset);
-
- if (c == c_end) {
- goto exit;
- }
-
-with_accel:
- do {
- assert(c < c_end);
- int do_accept;
-
- if (!s) {
- goto exit;
- } else if (s < sheng_end) {
- if (s > m->sheng_accel_limit) {
- c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
- if (c == c_end) {
- goto exit;
- } else {
- goto without_accel;
- }
- }
- s = doSheng64(m, &c, c_end, c_end, s, 1);
- do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept;
- } else {
- if (s & ACCEL_FLAG) {
- DEBUG_PRINTF("skipping\n");
- s &= STATE_MASK;
- c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
- if (c == c_end) {
- goto exit;
- } else {
- goto without_accel;
- }
- }
-
- s = doNormal64_16(m, &c, c_end, s, 1, mode);
- do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
- }
-
- if (do_accept) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
- return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
-
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_DEAD; /* termination requested */
- }
- } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0,
- &cached_accept_state,
- &cached_accept_id)
- == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- assert(c <= c_end);
- } while (c < c_end);
-
-exit:
- s &= STATE_MASK;
-
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
- *state = s;
-
- return MO_ALIVE;
-}
-
-static never_inline
-char mcsheng64Exec16_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, CALLBACK_OUTPUT);
-}
-
-static never_inline
-char mcsheng64Exec16_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, STOP_AT_MATCH);
-}
-
-static never_inline
-char mcsheng64Exec16_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, NO_MATCHES);
-}
-
-static really_inline
-char mcsheng64Exec16_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point,
- enum MatchMode mode) {
- if (mode == CALLBACK_OUTPUT) {
- return mcsheng64Exec16_i_cb(m, state, buf, len, offAdj, cb, ctxt,
- single, final_point);
- } else if (mode == STOP_AT_MATCH) {
- return mcsheng64Exec16_i_sam(m, state, buf, len, offAdj, cb, ctxt,
- single, final_point);
- } else {
- assert (mode == NO_MATCHES);
- return mcsheng64Exec16_i_nm(m, state, buf, len, offAdj, cb, ctxt,
- single, final_point);
- }
-}
-
-static really_inline
-u32 doNormal64_8(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, u32 s,
- char do_accel, enum MatchMode mode) {
- const u8 *c = *c_inout;
- u32 sheng_end = m->sheng_end;
- u32 accel_limit = m->accel_limit_8;
- u32 accept_limit = m->accept_limit_8;
-
- const u32 as = m->alphaShift;
- const u8 *succ_table = (const u8 *)((const char *)m
- + sizeof(struct mcsheng64));
- /* Adjust start of succ table so we can index into using state id (rather
- * than adjust to normal id). As we will not be processing states with low
- * state ids, we will not be accessing data before the succ table. Note: due
- * to the size of the sheng tables, the succ_table pointer will still be
- * inside the engine.*/
- succ_table -= sheng_end << as;
-
- assert(s >= sheng_end);
- while (c < end && s >= sheng_end) {
- u8 cprime = m->remap[*c];
- DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
- ourisprint(*c) ? *c : '?', cprime);
- s = succ_table[(s << as) + cprime];
-
- DEBUG_PRINTF("s: %u\n", s);
- c++;
- if (do_accel) {
- if (s >= accel_limit) {
- break;
- }
- } else {
- if (mode != NO_MATCHES && s >= accept_limit) {
- break;
- }
- }
- }
- *c_inout = c;
- return s;
-}
-
-static really_inline
-char mcsheng64Exec8_i(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **c_final, enum MatchMode mode) {
- if (!len) {
- *c_final = buf;
- return MO_ALIVE;
- }
- u32 s = *state;
- const u8 *c = buf;
- const u8 *c_end = buf + len;
- const u8 sheng_end = m->sheng_end;
-
- const struct mstate_aux *aux
- = (const struct mstate_aux *)((const char *)m + m->aux_offset
- - sizeof(struct NFA));
- u32 accept_limit = m->accept_limit_8;
-
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
-
- DEBUG_PRINTF("s: %u, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
- do {
- assert(c < min_accel_offset);
- if (!s) {
- goto exit;
- } else if (s < sheng_end) {
- s = doSheng64(m, &c, min_accel_offset, c_end, s, 0);
- } else {
- s = doNormal64_8(m, &c, min_accel_offset, s, 0, mode);
- assert(c <= min_accel_offset);
- }
-
- if (mode != NO_MATCHES && s >= accept_limit) {
- if (mode == STOP_AT_MATCH) {
- DEBUG_PRINTF("match - pausing\n");
- *state = s;
- *c_final = c - 1;
- return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- } else if (doComplexReport64(cb, ctxt, m, s, loc, 0,
- &cached_accept_state,
- &cached_accept_id)
- == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
- } while (c < min_accel_offset);
-
- if (c == c_end) {
- goto exit;
- }
-
-with_accel:
- do {
- u32 accel_limit = m->accel_limit_8;
-
- assert(c < c_end);
- if (!s) {
- goto exit;
- } else if (s < sheng_end) {
- if (s > m->sheng_accel_limit) {
- c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
- if (c == c_end) {
- goto exit;
- } else {
- goto without_accel;
- }
- }
- s = doSheng64(m, &c, c_end, c_end, s, 1);
- } else {
- if (s >= accel_limit && aux[s].accel_offset) {
- c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
- if (c == c_end) {
- goto exit;
- } else {
- goto without_accel;
- }
- }
- s = doNormal64_8(m, &c, c_end, s, 1, mode);
- }
-
- if (mode != NO_MATCHES && s >= accept_limit) {
- if (mode == STOP_AT_MATCH) {
- DEBUG_PRINTF("match - pausing\n");
- *state = s;
- *c_final = c - 1;
- return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- } else if (doComplexReport64(cb, ctxt, m, s, loc, 0,
- &cached_accept_state,
- &cached_accept_id)
- == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- assert(c <= c_end);
- } while (c < c_end);
-
-exit:
- *state = s;
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
- return MO_ALIVE;
-}
-
-static never_inline
-char mcsheng64Exec8_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, CALLBACK_OUTPUT);
-}
-
-static never_inline
-char mcsheng64Exec8_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, STOP_AT_MATCH);
-}
-
-static never_inline
-char mcsheng64Exec8_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point, NO_MATCHES);
-}
-
-static really_inline
-char mcsheng64Exec8_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point,
- enum MatchMode mode) {
- if (mode == CALLBACK_OUTPUT) {
- return mcsheng64Exec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point);
- } else if (mode == STOP_AT_MATCH) {
- return mcsheng64Exec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
- single, final_point);
- } else {
- assert(mode == NO_MATCHES);
- return mcsheng64Exec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point);
- }
-}
-
-static really_inline
-char mcshengCheckEOD64(const struct NFA *nfa, u32 s, u64a offset,
- NfaCallback cb, void *ctxt) {
- const struct mcsheng64 *m = getImplNfa(nfa);
- const struct mstate_aux *aux = get_aux64(m, s);
-
- if (!aux->accept_eod) {
- return MO_CONTINUE_MATCHING;
- }
- return doComplexReport64(cb, ctxt, m, s, offset, 1, NULL, NULL);
-}
-
-static really_inline
-char nfaExecMcSheng64_16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
- const u8 *hend, NfaCallback cb, void *context,
- struct mq *q, char single, s64a end,
- enum MatchMode mode) {
- assert(n->type == MCSHENG_64_NFA_16);
- const struct mcsheng64 *m = getImplNfa(n);
- s64a sp;
-
- assert(ISALIGNED_N(q->state, 2));
- u32 s = *(u16 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(get_aux64(m, s)->accept);
-
- int rv;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- rv = cb(0, q_cur_offset(q), m->arb_report, context);
- } else {
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id);
- }
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- /* do main buffer region */
- const u8 *final_look;
- char rv = mcsheng64Exec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
- offset + sp, cb, context, single,
- &final_look, mode);
- if (rv == MO_DEAD) {
- *(u16 *)q->state = 0;
- return MO_DEAD;
- }
- if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("this is as far as we go\n");
- DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
-
- assert(q->cur);
- assert(final_look != cur_buf + local_ep);
-
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u16 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
- assert(rv == MO_ALIVE);
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(sp + offset || !s);
- if (sp + offset == 0) {
- s = m->start_anchored;
- break;
- }
- s = mcshengEnableStarts64(m, s);
- break;
- case MQE_END:
- *(u16 *)q->state = s;
- q->cur++;
- return s ? MO_ALIVE : MO_DEAD;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
-static really_inline
-char nfaExecMcSheng64_8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
- const u8 *hend, NfaCallback cb, void *context,
- struct mq *q, char single, s64a end,
- enum MatchMode mode) {
- assert(n->type == MCSHENG_64_NFA_8);
- const struct mcsheng64 *m = getImplNfa(n);
- s64a sp;
-
- u32 s = *(u8 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(s >= m->accept_limit_8);
-
- int rv;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
-
- rv = cb(0, q_cur_offset(q), m->arb_report, context);
- } else {
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id);
- }
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
- q->items[q->cur].type == MQE_END ? "END" : "???",
- q->items[q->cur].location + offset);
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- const u8 *final_look;
- char rv = mcsheng64Exec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
- offset + sp, cb, context, single,
- &final_look, mode);
- if (rv == MO_HALT_MATCHING) {
- *(u8 *)q->state = 0;
- return MO_DEAD;
- }
- if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("this is as far as we go\n");
- DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
-
- assert(q->cur);
- assert(final_look != cur_buf + local_ep);
-
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u8 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
- assert(rv == MO_ALIVE);
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- assert(q->cur);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(sp + offset || !s);
- if (sp + offset == 0) {
- s = (u8)m->start_anchored;
- break;
- }
- s = mcshengEnableStarts64(m, s);
- break;
- case MQE_END:
- *(u8 *)q->state = s;
- q->cur++;
- return s ? MO_ALIVE : MO_DEAD;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
-char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_8);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_16);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mcsheng64 *m = getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u32 s = *(u8 *)q->state;
- u8 single = m->flags & MCSHENG_FLAG_SINGLE;
- u64a offset = q_cur_offset(q);
- assert(q_cur_type(q) == MQE_START);
- assert(s);
-
- if (s >= m->accept_limit_8) {
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- cb(0, offset, m->arb_report, ctxt);
- } else {
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state,
- &cached_accept_id);
- }
- }
-
- return 0;
-}
-
-char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mcsheng64 *m = getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u32 s = *(u16 *)q->state;
- const struct mstate_aux *aux = get_aux64(m, s);
- u8 single = m->flags & MCSHENG_FLAG_SINGLE;
- u64a offset = q_cur_offset(q);
- assert(q_cur_type(q) == MQE_START);
- DEBUG_PRINTF("state %u\n", s);
- assert(s);
-
- if (aux->accept) {
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
- cb(0, offset, m->arb_report, ctxt);
- } else {
- u32 cached_accept_id = 0;
- u32 cached_accept_state = 0;
-
- doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state,
- &cached_accept_id);
- }
- }
-
- return 0;
-}
-
-static
-char mcshengHasAccept64(const struct mcsheng64 *m, const struct mstate_aux *aux,
- ReportID report) {
- assert(m && aux);
-
- if (!aux->accept) {
- return 0;
- }
-
- const struct report_list *rl = (const struct report_list *)
- ((const char *)m + aux->accept - sizeof(struct NFA));
- assert(ISALIGNED_N(rl, 4));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
-
- for (u32 i = 0; i < rl->count; i++) {
- if (rl->report[i] == report) {
- return 1;
- }
- }
-
- return 0;
-}
-
-char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
- const struct mcsheng64 *m = getImplNfa(n);
- u8 s = *(u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %hhu\n", s);
-
- return mcshengHasAccept64(m, get_aux64(m, s), report);
-}
-
-char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q) {
- assert(n && q);
-
- const struct mcsheng64 *m = getImplNfa(n);
- u8 s = *(u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %hhu\n", s);
-
- return !!get_aux64(m, s)->accept;
-}
-
-char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
- const struct mcsheng64 *m = getImplNfa(n);
- u16 s = *(u16 *)q->state;
- DEBUG_PRINTF("checking accepts for %hu\n", s);
-
- return mcshengHasAccept64(m, get_aux64(m, s), report);
-}
-
-char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q) {
- assert(n && q);
-
- const struct mcsheng64 *m = getImplNfa(n);
- u16 s = *(u16 *)q->state;
- DEBUG_PRINTF("checking accepts for %hu\n", s);
-
- return !!get_aux64(m, s)->accept;
-}
-
-char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_8);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_16);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_8);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE,
- 0 /* end */, NO_MATCHES);
- if (rv && nfaExecMcSheng64_8_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCSHENG_64_NFA_16);
- const struct mcsheng64 *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCSHENG_FLAG_SINGLE,
- 0 /* end */, NO_MATCHES);
-
- if (rv && nfaExecMcSheng64_16_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecMcSheng64_8_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct mcsheng64 *m = getImplNfa(nfa);
- u8 s = offset ? m->start_floating : m->start_anchored;
- if (s) {
- *(u8 *)state = s;
- return 1;
- }
- return 0;
-}
-
-char nfaExecMcSheng64_16_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct mcsheng64 *m = getImplNfa(nfa);
- u16 s = offset ? m->start_floating : m->start_anchored;
- if (s) {
- unaligned_store_u16(state, s);
- return 1;
- }
- return 0;
-}
-
-char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
- UNUSED const char *streamState, u64a offset,
- NfaCallback callback, void *context) {
- return mcshengCheckEOD64(nfa, *(const u8 *)state, offset, callback,
- context);
-}
-
-char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
- UNUSED const char *streamState, u64a offset,
- NfaCallback callback, void *context) {
- assert(ISALIGNED_N(state, 2));
- return mcshengCheckEOD64(nfa, *(const u16 *)state, offset, callback,
- context);
-}
-
-char nfaExecMcSheng64_8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
- assert(nfa->scratchStateSize == 1);
- *(u8 *)q->state = 0;
- return 0;
-}
-
-char nfaExecMcSheng64_16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
- assert(nfa->scratchStateSize == 2);
- assert(ISALIGNED_N(q->state, 2));
- *(u16 *)q->state = 0;
- return 0;
-}
-
-char nfaExecMcSheng64_8_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q, UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecMcSheng64_8_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecMcSheng64_16_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q,
- UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- assert(nfa->scratchStateSize == 2);
- assert(nfa->streamStateSize == 2);
- assert(ISALIGNED_N(src, 2));
- unaligned_store_u16(dest, *(const u16 *)(src));
- return 0;
-}
-
-char nfaExecMcSheng64_16_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
- assert(nfa->scratchStateSize == 2);
- assert(nfa->streamStateSize == 2);
- assert(ISALIGNED_N(dest, 2));
- *(u16 *)dest = unaligned_load_u16(src);
- return 0;
-}
-#endif
+
+#if defined(HAVE_AVX512VBMI)
+static really_inline
+const struct mstate_aux *get_aux64(const struct mcsheng64 *m, u32 s) {
+ const char *nfa = (const char *)m - sizeof(struct NFA);
+ const struct mstate_aux *aux
+ = s + (const struct mstate_aux *)(nfa + m->aux_offset);
+
+ assert(ISALIGNED(aux));
+ return aux;
+}
+
+static really_inline
+u32 mcshengEnableStarts64(const struct mcsheng64 *m, u32 s) {
+ const struct mstate_aux *aux = get_aux64(m, s);
+
+ DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
+ return aux->top;
+}
+
+static really_inline
+char doComplexReport64(NfaCallback cb, void *ctxt, const struct mcsheng64 *m,
+ u32 s, u64a loc, char eod, u32 *cached_accept_state,
+ u32 *cached_accept_id) {
+ DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
+ s & STATE_MASK, loc, eod);
+
+ if (!eod && s == *cached_accept_state) {
+ if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ const struct mstate_aux *aux = get_aux64(m, s);
+ size_t offset = eod ? aux->accept_eod : aux->accept;
+
+ assert(offset);
+ const struct report_list *rl
+ = (const void *)((const char *)m + offset - sizeof(struct NFA));
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list size %u\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = s;
+ *cached_accept_id = rl->report[0];
+
+ DEBUG_PRINTF("reporting %u\n", rl->report[0]);
+ if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+
+static really_inline
+u32 doSheng64(const struct mcsheng64 *m, const u8 **c_inout, const u8 *soft_c_end,
+ const u8 *hard_c_end, u32 s_in, char do_accel) {
+ assert(s_in < m->sheng_end);
+ assert(s_in); /* should not already be dead */
+ assert(soft_c_end <= hard_c_end);
+ DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1);
+ m512 s = set64x8(s_in - 1);
+ const u8 *c = *c_inout;
+ const u8 *c_end = hard_c_end - SHENG_CHUNK + 1;
+ if (!do_accel) {
+ c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1);
+ }
+
+ const m512 *masks = m->sheng_succ_masks;
+ u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */
+ u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit;
+
+ /* When we use movd to get a u32 containing our state, it will have 4 lanes
+ * all duplicating the state. We can create versions of our limits with 4
+ * copies to directly compare against, this prevents us generating code to
+ * extract a single copy of the state from the u32 for checking. */
+ u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
+
+#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
+ u32 sheng_limit_x4 = sheng_limit * 0x01010101;
+ m512 simd_stop_limit = set16x32(sheng_stop_limit_x4);
+ m512 accel_delta = set64x8(sheng_limit - sheng_stop_limit);
+ DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
+ m->sheng_accel_limit, sheng_stop_limit);
+#endif
+
+#define SHENG64_SINGLE_ITER do { \
+ m512 succ_mask = masks[*(c++)]; \
+ s = vpermb512(s, succ_mask); \
+ u32 s_gpr_x4 = movd512(s); /* convert to u8 */ \
+ DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
+ if (s_gpr_x4 >= sheng_stop_limit_x4) { \
+ s_gpr = s_gpr_x4; \
+ goto exit; \
+ } \
+ } while (0)
+
+ u8 s_gpr;
+ while (c < c_end) {
+#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
+ /* This version uses pext for efficiently bitbashing out scaled
+ * versions of the bytes to process from a u64a */
+
+ u64a data_bytes = unaligned_load_u64a(c);
+ u64a cc0 = pdep64(data_bytes, 0x3fc0); /* extract scaled low byte */
+ data_bytes &= ~0xffULL; /* clear low bits for scale space */
+
+ m512 succ_mask0 = load512((const char *)masks + cc0);
+ s = vpermb512(s, succ_mask0);
+ m512 s_max = s;
+ m512 s_max0 = s_max;
+ DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 6, movd512(s));
+
+#define SHENG64_SINGLE_UNROLL_ITER(iter) \
+ assert(iter); \
+ u64a cc##iter = pext64(data_bytes, mcsheng64_pext_mask[iter]); \
+ assert(cc##iter == (u64a)c[iter] << 6); \
+ m512 succ_mask##iter = load512((const char *)masks + cc##iter); \
+ s = vpermb512(s, succ_mask##iter); \
+ if (do_accel && iter == 7) { \
+ /* in the final iteration we also have to check against accel */ \
+ m512 s_temp = sadd_u8_m512(s, accel_delta); \
+ s_max = max_u8_m512(s_max, s_temp); \
+ } else { \
+ s_max = max_u8_m512(s_max, s); \
+ } \
+ m512 s_max##iter = s_max; \
+ DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 6, \
+ movd512(s), movd512(s_max));
+
+ SHENG64_SINGLE_UNROLL_ITER(1);
+ SHENG64_SINGLE_UNROLL_ITER(2);
+ SHENG64_SINGLE_UNROLL_ITER(3);
+ SHENG64_SINGLE_UNROLL_ITER(4);
+ SHENG64_SINGLE_UNROLL_ITER(5);
+ SHENG64_SINGLE_UNROLL_ITER(6);
+ SHENG64_SINGLE_UNROLL_ITER(7);
+
+ if (movd512(s_max7) >= sheng_limit_x4) {
+ DEBUG_PRINTF("exit found\n");
+
+ /* Explicitly check the last byte as it is more likely as it also
+ * checks for acceleration. */
+ if (movd512(s_max6) < sheng_limit_x4) {
+ c += SHENG_CHUNK;
+ s_gpr = movq512(s);
+ assert(s_gpr >= sheng_stop_limit);
+ goto exit;
+ }
+
+ /* use shift-xor to create a register containing all of the max
+ * values */
+ m512 blended = rshift64_m512(s_max0, 56);
+ blended = xor512(blended, rshift64_m512(s_max1, 48));
+ blended = xor512(blended, rshift64_m512(s_max2, 40));
+ blended = xor512(blended, rshift64_m512(s_max3, 32));
+ blended = xor512(blended, rshift64_m512(s_max4, 24));
+ blended = xor512(blended, rshift64_m512(s_max5, 16));
+ blended = xor512(blended, rshift64_m512(s_max6, 8));
+ blended = xor512(blended, s);
+ blended = xor512(blended, rshift64_m512(blended, 8));
+ DEBUG_PRINTF("blended %016llx\n", movq512(blended));
+
+ m512 final = min_u8_m512(blended, simd_stop_limit);
+ m512 cmp = sub_u8_m512(final, simd_stop_limit);
+ m128 tmp = cast512to128(cmp);
+ u64a stops = ~movemask128(tmp);
+ assert(stops);
+ u32 earliest = ctz32(stops);
+ DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest);
+ assert(earliest < 8);
+ c += earliest + 1;
+ s_gpr = movq512(blended) >> (earliest * 8);
+ assert(s_gpr >= sheng_stop_limit);
+ goto exit;
+ } else {
+ c += SHENG_CHUNK;
+ }
+#else
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+ SHENG64_SINGLE_ITER;
+#endif
+ }
+
+ assert(c_end - c < SHENG_CHUNK);
+ if (c < soft_c_end) {
+ assert(soft_c_end - c < SHENG_CHUNK);
+ switch (soft_c_end - c) {
+ case 7:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 6:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 5:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 4:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 3:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 2:
+ SHENG64_SINGLE_ITER; // fallthrough
+ case 1:
+ SHENG64_SINGLE_ITER; // fallthrough
+ }
+ }
+
+ assert(c >= soft_c_end);
+
+ s_gpr = movq512(s);
+exit:
+ assert(c <= hard_c_end);
+ DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr);
+ assert(c >= soft_c_end || s_gpr >= sheng_stop_limit);
+ /* undo state adjustment to match mcclellan view */
+ if (s_gpr == sheng_limit) {
+ s_gpr = 0;
+ } else if (s_gpr < sheng_limit) {
+ s_gpr++;
+ }
+
+ *c_inout = c;
+ return s_gpr;
+}
+
+static really_inline
+const char *findShermanState64(UNUSED const struct mcsheng64 *m,
+ const char *sherman_base_offset,
+ u32 sherman_base, u32 s) {
+ const char *rv
+ = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
+ assert(rv < (const char *)m + m->length - sizeof(struct NFA));
+ UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
+ assert(type == SHERMAN_STATE);
+ return rv;
+}
+
+static really_inline
+const u8 *run_mcsheng_accel64(const struct mcsheng64 *m,
+ const struct mstate_aux *aux, u32 s,
+ const u8 **min_accel_offset,
+ const u8 *c, const u8 *c_end) {
+ DEBUG_PRINTF("skipping\n");
+ u32 accel_offset = aux[s].accel_offset;
+
+ assert(aux[s].accel_offset);
+ assert(accel_offset >= m->aux_offset);
+ assert(!m->sherman_offset || accel_offset < m->sherman_offset);
+
+ const union AccelAux *aaux = (const void *)((const char *)m + accel_offset);
+ const u8 *c2 = run_accel(aaux, c, c_end);
+
+ if (c2 < *min_accel_offset + BAD_ACCEL_DIST) {
+ *min_accel_offset = c2 + BIG_ACCEL_PENALTY;
+ } else {
+ *min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
+ }
+
+ if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) {
+ *min_accel_offset = c_end;
+ }
+
+ DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
+ c2 - c, *min_accel_offset - c2, c_end - c2);
+
+ return c2;
+}
+
+static really_inline
+u32 doNormal64_16(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end,
+ u32 s, char do_accel, enum MatchMode mode) {
+ const u8 *c = *c_inout;
+ const u16 *succ_table
+ = (const u16 *)((const char *)m + sizeof(struct mcsheng64));
+ assert(ISALIGNED_N(succ_table, 2));
+ u32 sheng_end = m->sheng_end;
+ u32 sherman_base = m->sherman_limit;
+ const char *sherman_base_offset
+ = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
+ u32 as = m->alphaShift;
+
+ /* Adjust start of succ table so we can index into using state id (rather
+ * than adjust to normal id). As we will not be processing states with low
+ * state ids, we will not be accessing data before the succ table. Note: due
+ * to the size of the sheng tables, the succ_table pointer will still be
+ * inside the engine.*/
+ succ_table -= sheng_end << as;
+ s &= STATE_MASK;
+ while (c < end && s >= sheng_end) {
+ u8 cprime = m->remap[*c];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
+ ourisprint(*c) ? *c : '?', cprime, s);
+ if (s < sherman_base) {
+ DEBUG_PRINTF("doing normal\n");
+ assert(s < m->state_count);
+ s = succ_table[(s << as) + cprime];
+ } else {
+ const char *sherman_state
+ = findShermanState64(m, sherman_base_offset, sherman_base, s);
+ DEBUG_PRINTF("doing sherman (%u)\n", s);
+ s = doSherman16(sherman_state, cprime, succ_table, as);
+ }
+
+ DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
+ c++;
+
+ if (do_accel && (s & ACCEL_FLAG)) {
+ break;
+ }
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ break;
+ }
+
+ s &= STATE_MASK;
+ }
+
+ *c_inout = c;
+ return s;
+}
+
+static really_inline
+char mcsheng64Exec16_i(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **c_final, enum MatchMode mode) {
+ assert(ISALIGNED_N(state, 2));
+ if (!len) {
+ if (mode == STOP_AT_MATCH) {
+ *c_final = buf;
+ }
+ return MO_ALIVE;
+ }
+
+ u32 s = *state;
+ const u8 *c = buf;
+ const u8 *c_end = buf + len;
+ const u8 sheng_end = m->sheng_end;
+ const struct mstate_aux *aux
+ = (const struct mstate_aux *)((const char *)m + m->aux_offset
+ - sizeof(struct NFA));
+
+ s &= STATE_MASK;
+
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ DEBUG_PRINTF("s: %u, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ do {
+ assert(c < min_accel_offset);
+ int do_accept;
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ s = doSheng64(m, &c, min_accel_offset, c_end, s, 0);
+ do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept;
+ } else {
+ s = doNormal64_16(m, &c, min_accel_offset, s, 0, mode);
+
+ do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
+ }
+
+ if (do_accept) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD; /* termination requested */
+ }
+ } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ &cached_accept_state,
+ &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
+ } while (c < min_accel_offset);
+
+ if (c == c_end) {
+ goto exit;
+ }
+
+with_accel:
+ do {
+ assert(c < c_end);
+ int do_accept;
+
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ if (s > m->sheng_accel_limit) {
+ c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doSheng64(m, &c, c_end, c_end, s, 1);
+ do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept;
+ } else {
+ if (s & ACCEL_FLAG) {
+ DEBUG_PRINTF("skipping\n");
+ s &= STATE_MASK;
+ c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+
+ s = doNormal64_16(m, &c, c_end, s, 1, mode);
+ do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
+ }
+
+ if (do_accept) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD; /* termination requested */
+ }
+ } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ &cached_accept_state,
+ &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end);
+ } while (c < c_end);
+
+exit:
+ s &= STATE_MASK;
+
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ *state = s;
+
+ return MO_ALIVE;
+}
+
+static never_inline
+char mcsheng64Exec16_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, CALLBACK_OUTPUT);
+}
+
+static never_inline
+char mcsheng64Exec16_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, STOP_AT_MATCH);
+}
+
+static never_inline
+char mcsheng64Exec16_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, NO_MATCHES);
+}
+
+static really_inline
+char mcsheng64Exec16_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point,
+ enum MatchMode mode) {
+ if (mode == CALLBACK_OUTPUT) {
+ return mcsheng64Exec16_i_cb(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else if (mode == STOP_AT_MATCH) {
+ return mcsheng64Exec16_i_sam(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else {
+ assert (mode == NO_MATCHES);
+ return mcsheng64Exec16_i_nm(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ }
+}
+
+static really_inline
+u32 doNormal64_8(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, u32 s,
+ char do_accel, enum MatchMode mode) {
+ const u8 *c = *c_inout;
+ u32 sheng_end = m->sheng_end;
+ u32 accel_limit = m->accel_limit_8;
+ u32 accept_limit = m->accept_limit_8;
+
+ const u32 as = m->alphaShift;
+ const u8 *succ_table = (const u8 *)((const char *)m
+ + sizeof(struct mcsheng64));
+ /* Adjust start of succ table so we can index into using state id (rather
+ * than adjust to normal id). As we will not be processing states with low
+ * state ids, we will not be accessing data before the succ table. Note: due
+ * to the size of the sheng tables, the succ_table pointer will still be
+ * inside the engine.*/
+ succ_table -= sheng_end << as;
+
+ assert(s >= sheng_end);
+ while (c < end && s >= sheng_end) {
+ u8 cprime = m->remap[*c];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
+ ourisprint(*c) ? *c : '?', cprime);
+ s = succ_table[(s << as) + cprime];
+
+ DEBUG_PRINTF("s: %u\n", s);
+ c++;
+ if (do_accel) {
+ if (s >= accel_limit) {
+ break;
+ }
+ } else {
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ break;
+ }
+ }
+ }
+ *c_inout = c;
+ return s;
+}
+
+static really_inline
+char mcsheng64Exec8_i(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **c_final, enum MatchMode mode) {
+ if (!len) {
+ *c_final = buf;
+ return MO_ALIVE;
+ }
+ u32 s = *state;
+ const u8 *c = buf;
+ const u8 *c_end = buf + len;
+ const u8 sheng_end = m->sheng_end;
+
+ const struct mstate_aux *aux
+ = (const struct mstate_aux *)((const char *)m + m->aux_offset
+ - sizeof(struct NFA));
+ u32 accept_limit = m->accept_limit_8;
+
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
+
+ DEBUG_PRINTF("s: %u, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ do {
+ assert(c < min_accel_offset);
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ s = doSheng64(m, &c, min_accel_offset, c_end, s, 0);
+ } else {
+ s = doNormal64_8(m, &c, min_accel_offset, s, 0, mode);
+ assert(c <= min_accel_offset);
+ }
+
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ } else if (doComplexReport64(cb, ctxt, m, s, loc, 0,
+ &cached_accept_state,
+ &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
+ } while (c < min_accel_offset);
+
+ if (c == c_end) {
+ goto exit;
+ }
+
+with_accel:
+ do {
+ u32 accel_limit = m->accel_limit_8;
+
+ assert(c < c_end);
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ if (s > m->sheng_accel_limit) {
+ c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doSheng64(m, &c, c_end, c_end, s, 1);
+ } else {
+ if (s >= accel_limit && aux[s].accel_offset) {
+ c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doNormal64_8(m, &c, c_end, s, 1, mode);
+ }
+
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ } else if (doComplexReport64(cb, ctxt, m, s, loc, 0,
+ &cached_accept_state,
+ &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end);
+ } while (c < c_end);
+
+exit:
+ *state = s;
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ return MO_ALIVE;
+}
+
+static never_inline
+char mcsheng64Exec8_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, CALLBACK_OUTPUT);
+}
+
+static never_inline
+char mcsheng64Exec8_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, STOP_AT_MATCH);
+}
+
+static never_inline
+char mcsheng64Exec8_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, NO_MATCHES);
+}
+
+static really_inline
+char mcsheng64Exec8_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point,
+ enum MatchMode mode) {
+ if (mode == CALLBACK_OUTPUT) {
+ return mcsheng64Exec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ } else if (mode == STOP_AT_MATCH) {
+ return mcsheng64Exec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else {
+ assert(mode == NO_MATCHES);
+ return mcsheng64Exec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ }
+}
+
+static really_inline
+char mcshengCheckEOD64(const struct NFA *nfa, u32 s, u64a offset,
+ NfaCallback cb, void *ctxt) {
+ const struct mcsheng64 *m = getImplNfa(nfa);
+ const struct mstate_aux *aux = get_aux64(m, s);
+
+ if (!aux->accept_eod) {
+ return MO_CONTINUE_MATCHING;
+ }
+ return doComplexReport64(cb, ctxt, m, s, offset, 1, NULL, NULL);
+}
+
+static really_inline
+char nfaExecMcSheng64_16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ const u8 *hend, NfaCallback cb, void *context,
+ struct mq *q, char single, s64a end,
+ enum MatchMode mode) {
+ assert(n->type == MCSHENG_64_NFA_16);
+ const struct mcsheng64 *m = getImplNfa(n);
+ s64a sp;
+
+ assert(ISALIGNED_N(q->state, 2));
+ u32 s = *(u16 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(get_aux64(m, s)->accept);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ rv = cb(0, q_cur_offset(q), m->arb_report, context);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ /* do main buffer region */
+ const u8 *final_look;
+ char rv = mcsheng64Exec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_DEAD) {
+ *(u16 *)q->state = 0;
+ return MO_DEAD;
+ }
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
+ assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u16 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(rv == MO_ALIVE);
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = m->start_anchored;
+ break;
+ }
+ s = mcshengEnableStarts64(m, s);
+ break;
+ case MQE_END:
+ *(u16 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : MO_DEAD;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+static really_inline
+char nfaExecMcSheng64_8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ const u8 *hend, NfaCallback cb, void *context,
+ struct mq *q, char single, s64a end,
+ enum MatchMode mode) {
+ assert(n->type == MCSHENG_64_NFA_8);
+ const struct mcsheng64 *m = getImplNfa(n);
+ s64a sp;
+
+ u32 s = *(u8 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(s >= m->accept_limit_8);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+
+ rv = cb(0, q_cur_offset(q), m->arb_report, context);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
+ q->items[q->cur].type == MQE_END ? "END" : "???",
+ q->items[q->cur].location + offset);
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ const u8 *final_look;
+ char rv = mcsheng64Exec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_HALT_MATCHING) {
+ *(u8 *)q->state = 0;
+ return MO_DEAD;
+ }
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
+ assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u8 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(rv == MO_ALIVE);
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = (u8)m->start_anchored;
+ break;
+ }
+ s = mcshengEnableStarts64(m, s);
+ break;
+ case MQE_END:
+ *(u8 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : MO_DEAD;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_8);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_16);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcsheng64 *m = getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u32 s = *(u8 *)q->state;
+ u8 single = m->flags & MCSHENG_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
+ assert(s);
+
+ if (s >= m->accept_limit_8) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ cb(0, offset, m->arb_report, ctxt);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcsheng64 *m = getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u32 s = *(u16 *)q->state;
+ const struct mstate_aux *aux = get_aux64(m, s);
+ u8 single = m->flags & MCSHENG_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
+ DEBUG_PRINTF("state %u\n", s);
+ assert(s);
+
+ if (aux->accept) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ cb(0, offset, m->arb_report, ctxt);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+static
+char mcshengHasAccept64(const struct mcsheng64 *m, const struct mstate_aux *aux,
+ ReportID report) {
+ assert(m && aux);
+
+ if (!aux->accept) {
+ return 0;
+ }
+
+ const struct report_list *rl = (const struct report_list *)
+ ((const char *)m + aux->accept - sizeof(struct NFA));
+ assert(ISALIGNED_N(rl, 4));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+
+ for (u32 i = 0; i < rl->count; i++) {
+ if (rl->report[i] == report) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng64 *m = getImplNfa(n);
+ u8 s = *(u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hhu\n", s);
+
+ return mcshengHasAccept64(m, get_aux64(m, s), report);
+}
+
+char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng64 *m = getImplNfa(n);
+ u8 s = *(u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hhu\n", s);
+
+ return !!get_aux64(m, s)->accept;
+}
+
+char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng64 *m = getImplNfa(n);
+ u16 s = *(u16 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hu\n", s);
+
+ return mcshengHasAccept64(m, get_aux64(m, s), report);
+}
+
+char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng64 *m = getImplNfa(n);
+ u16 s = *(u16 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hu\n", s);
+
+ return !!get_aux64(m, s)->accept;
+}
+
+char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_8);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_16);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_8);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE,
+ 0 /* end */, NO_MATCHES);
+ if (rv && nfaExecMcSheng64_8_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_64_NFA_16);
+ const struct mcsheng64 *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE,
+ 0 /* end */, NO_MATCHES);
+
+ if (rv && nfaExecMcSheng64_16_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcSheng64_8_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcsheng64 *m = getImplNfa(nfa);
+ u8 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ *(u8 *)state = s;
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecMcSheng64_16_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcsheng64 *m = getImplNfa(nfa);
+ u16 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ unaligned_store_u16(state, s);
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback callback, void *context) {
+ return mcshengCheckEOD64(nfa, *(const u8 *)state, offset, callback,
+ context);
+}
+
+char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback callback, void *context) {
+ assert(ISALIGNED_N(state, 2));
+ return mcshengCheckEOD64(nfa, *(const u16 *)state, offset, callback,
+ context);
+}
+
+char nfaExecMcSheng64_8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 1);
+ *(u8 *)q->state = 0;
+ return 0;
+}
+
+char nfaExecMcSheng64_16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 2);
+ assert(ISALIGNED_N(q->state, 2));
+ *(u16 *)q->state = 0;
+ return 0;
+}
+
+char nfaExecMcSheng64_8_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q, UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcSheng64_8_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcSheng64_16_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q,
+ UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 2);
+ assert(nfa->streamStateSize == 2);
+ assert(ISALIGNED_N(src, 2));
+ unaligned_store_u16(dest, *(const u16 *)(src));
+ return 0;
+}
+
+char nfaExecMcSheng64_16_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 2);
+ assert(nfa->streamStateSize == 2);
+ assert(ISALIGNED_N(dest, 2));
+ *(u16 *)dest = unaligned_load_u16(src);
+ return 0;
+}
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.h b/contrib/libs/hyperscan/src/nfa/mcsheng.h
index 91872779cd..0329e12128 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng.h
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -80,78 +80,78 @@ char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#if defined(HAVE_AVX512VBMI)
-/* 64-8 bit Sheng-McClellan hybrid */
-char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
- NfaCallback callback, void *context);
-char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
-char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
+#if defined(HAVE_AVX512VBMI)
+/* 64-8 bit Sheng-McClellan hybrid */
+char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+/* 64-16 bit Sheng-McClellan hybrid */
+char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
+#else // !HAVE_AVX512VBMI
+#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
+#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL
+#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL
+
+#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
+#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL
+#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL
+
+#endif //end of HAVE_AVX512VBMI
-#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-/* 64-16 bit Sheng-McClellan hybrid */
-char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
- NfaCallback callback, void *context);
-char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
-char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#else // !HAVE_AVX512VBMI
-#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL
-#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL
-
-#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL
-#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL
-
-#endif //end of HAVE_AVX512VBMI
-
#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp
index ffe630c554..fb75e49a35 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -244,106 +244,106 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
}
static
-mstate_aux *getAux64(NFA *n, dstate_id_t i) {
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
- mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
-
- mstate_aux *aux = aux_base + i;
- assert((const char *)aux < (const char *)n + m->length);
- return aux;
-}
-
-static
-void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
- dstate_id_t sheng_end,
- const map<dstate_id_t, AccelScheme> &accel_escape_info) {
- DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
- assert(sheng_end > DEAD_STATE + 1);
- assert(sheng_end <= sizeof(m512) + 1);
- vector<array<u8, sizeof(m512)>> masks;
- masks.resize(info.alpha_size);
- /* -1 to avoid wasting a slot as we do not include dead state */
- vector<dstate_id_t> raw_ids;
- raw_ids.resize(sheng_end - 1);
- for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
- assert(info.implId(s)); /* should not map to DEAD_STATE */
- if (info.is_sheng(s)) {
- raw_ids[info.extra[s].sheng_id] = s;
- }
- }
- for (u32 i = 0; i < info.alpha_size; i++) {
- if (i == info.alpha_remap[TOP]) {
- continue;
- }
- auto &mask = masks[i];
- assert(sizeof(mask) == sizeof(m512));
- mask.fill(0);
-
- for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
- dstate_id_t raw_id = raw_ids[sheng_id];
- dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
- if (next_id == DEAD_STATE) {
- next_id = sheng_end - 1;
- } else if (next_id < sheng_end) {
- next_id--;
- }
- DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
- mask[sheng_id] = verify_u8(next_id);
- }
- }
- for (u32 i = 0; i < N_CHARS; i++) {
- assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
- memcpy((u8 *)&m->sheng_succ_masks[i],
- (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
- }
- m->sheng_end = sheng_end;
- m->sheng_accel_limit = sheng_end - 1;
-
- for (dstate_id_t s : raw_ids) {
- if (contains(accel_escape_info, s)) {
- LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
- }
- }
-}
-
-static
-void populateBasicInfo64(size_t state_size, const dfa_info &info,
- u32 total_size, u32 aux_offset, u32 accel_offset,
- u32 accel_count, ReportID arb, bool single, NFA *nfa) {
- assert(state_size == sizeof(u16) || state_size == sizeof(u8));
-
- nfa->length = total_size;
- nfa->nPositions = info.states.size();
-
- nfa->scratchStateSize = verify_u32(state_size);
- nfa->streamStateSize = verify_u32(state_size);
-
- if (state_size == sizeof(u8)) {
- nfa->type = MCSHENG_64_NFA_8;
- } else {
- nfa->type = MCSHENG_64_NFA_16;
- }
-
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
- for (u32 i = 0; i < 256; i++) {
- m->remap[i] = verify_u8(info.alpha_remap[i]);
- }
- m->alphaShift = info.getAlphaShift();
- m->length = total_size;
- m->aux_offset = aux_offset;
- m->accel_offset = accel_offset;
- m->arb_report = arb;
- m->state_count = verify_u16(info.size());
- m->start_anchored = info.implId(info.raw.start_anchored);
- m->start_floating = info.implId(info.raw.start_floating);
- m->has_accel = accel_count ? 1 : 0;
-
- if (single) {
- m->flags |= MCSHENG_FLAG_SINGLE;
- }
-}
-
-static
+mstate_aux *getAux64(NFA *n, dstate_id_t i) {
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
+ mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
+
+ mstate_aux *aux = aux_base + i;
+ assert((const char *)aux < (const char *)n + m->length);
+ return aux;
+}
+
+static
+void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
+ dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info) {
+ DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
+ assert(sheng_end > DEAD_STATE + 1);
+ assert(sheng_end <= sizeof(m512) + 1);
+ vector<array<u8, sizeof(m512)>> masks;
+ masks.resize(info.alpha_size);
+ /* -1 to avoid wasting a slot as we do not include dead state */
+ vector<dstate_id_t> raw_ids;
+ raw_ids.resize(sheng_end - 1);
+ for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
+ assert(info.implId(s)); /* should not map to DEAD_STATE */
+ if (info.is_sheng(s)) {
+ raw_ids[info.extra[s].sheng_id] = s;
+ }
+ }
+ for (u32 i = 0; i < info.alpha_size; i++) {
+ if (i == info.alpha_remap[TOP]) {
+ continue;
+ }
+ auto &mask = masks[i];
+ assert(sizeof(mask) == sizeof(m512));
+ mask.fill(0);
+
+ for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
+ dstate_id_t raw_id = raw_ids[sheng_id];
+ dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
+ if (next_id == DEAD_STATE) {
+ next_id = sheng_end - 1;
+ } else if (next_id < sheng_end) {
+ next_id--;
+ }
+ DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
+ mask[sheng_id] = verify_u8(next_id);
+ }
+ }
+ for (u32 i = 0; i < N_CHARS; i++) {
+ assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
+ memcpy((u8 *)&m->sheng_succ_masks[i],
+ (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
+ }
+ m->sheng_end = sheng_end;
+ m->sheng_accel_limit = sheng_end - 1;
+
+ for (dstate_id_t s : raw_ids) {
+ if (contains(accel_escape_info, s)) {
+ LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
+ }
+ }
+}
+
+static
+void populateBasicInfo64(size_t state_size, const dfa_info &info,
+ u32 total_size, u32 aux_offset, u32 accel_offset,
+ u32 accel_count, ReportID arb, bool single, NFA *nfa) {
+ assert(state_size == sizeof(u16) || state_size == sizeof(u8));
+
+ nfa->length = total_size;
+ nfa->nPositions = info.states.size();
+
+ nfa->scratchStateSize = verify_u32(state_size);
+ nfa->streamStateSize = verify_u32(state_size);
+
+ if (state_size == sizeof(u8)) {
+ nfa->type = MCSHENG_64_NFA_8;
+ } else {
+ nfa->type = MCSHENG_64_NFA_16;
+ }
+
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
+ for (u32 i = 0; i < 256; i++) {
+ m->remap[i] = verify_u8(info.alpha_remap[i]);
+ }
+ m->alphaShift = info.getAlphaShift();
+ m->length = total_size;
+ m->aux_offset = aux_offset;
+ m->accel_offset = accel_offset;
+ m->arb_report = arb;
+ m->state_count = verify_u16(info.size());
+ m->start_anchored = info.implId(info.raw.start_anchored);
+ m->start_floating = info.implId(info.raw.start_floating);
+ m->has_accel = accel_count ? 1 : 0;
+
+ if (single) {
+ m->flags |= MCSHENG_FLAG_SINGLE;
+ }
+}
+
+static
size_t calcShermanRegionSize(const dfa_info &info) {
size_t rv = 0;
@@ -371,7 +371,7 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
/* returns false on error */
static
bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
- dstate_id_t *sherman_base) {
+ dstate_id_t *sherman_base) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
@@ -481,7 +481,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
}
#define MAX_SHENG_STATES 16
-#define MAX_SHENG64_STATES 64
+#define MAX_SHENG64_STATES 64
#define MAX_SHENG_LEAKINESS 0.05
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
@@ -535,8 +535,8 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
static
dstate_id_t find_sheng_states(dfa_info &info,
- map<dstate_id_t, AccelScheme> &accel_escape_info,
- size_t max_sheng_states) {
+ map<dstate_id_t, AccelScheme> &accel_escape_info,
+ size_t max_sheng_states) {
RdfaGraph g(info.raw);
auto cyclics = find_vertices_in_cycles(g);
@@ -571,7 +571,7 @@ dstate_id_t find_sheng_states(dfa_info &info,
flat_set<dstate_id_t> considered = { DEAD_STATE };
bool seen_back_edge = false;
while (!to_consider.empty()
- && sheng_states.size() < max_sheng_states) {
+ && sheng_states.size() < max_sheng_states) {
auto v = to_consider.front();
to_consider.pop_front();
if (!considered.insert(g[v].index).second) {
@@ -717,80 +717,80 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
}
}
-static
-void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
- const map<dstate_id_t, AccelScheme> &accel_escape_info,
- u32 accel_offset, UNUSED u32 accel_end_offset,
- const vector<u32> &reports,
- const vector<u32> &reports_eod,
- u32 report_base_offset,
- const raw_report_info &ri) {
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
-
- vector<u32> reportOffsets;
-
- ri.fillReportLists(nfa, report_base_offset, reportOffsets);
-
- for (u32 i = 0; i < info.size(); i++) {
- u16 impl_id = info.implId(i);
- mstate_aux *this_aux = getAux64(nfa, impl_id);
-
- fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
- if (contains(accel_escape_info, i)) {
- this_aux->accel_offset = accel_offset;
- accel_offset += info.strat.accelSize();
- assert(accel_offset <= accel_end_offset);
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
- info.strat.buildAccel(i, accel_escape_info.at(i),
- (void *)((char *)m + this_aux->accel_offset));
- }
- }
-}
-
-static
-u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
- mstate_aux *aux = getAux64(nfa, target_impl_id);
- u16 flags = 0;
-
- if (aux->accept) {
- flags |= ACCEPT_FLAG;
- }
-
- if (aux->accel_offset) {
- flags |= ACCEL_FLAG;
- }
-
- return flags;
-}
-
-static
-void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
- dstate_id_t sheng_end,
- UNUSED dstate_id_t sherman_base) {
- u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
-
- u8 alphaShift = info.getAlphaShift();
- assert(alphaShift <= 8);
-
- for (size_t i = 0; i < info.size(); i++) {
- if (!info.is_normal(i)) {
- assert(info.implId(i) < sheng_end || info.is_sherman(i));
- continue;
- }
-
- assert(info.implId(i) < sherman_base);
- u16 normal_id = verify_u16(info.implId(i) - sheng_end);
-
- for (size_t s = 0; s < info.impl_alpha_size; s++) {
- dstate_id_t raw_succ = info.states[i].next[s];
- u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
-
- entry = info.implId(raw_succ);
- entry |= get_edge_flags64(nfa, entry);
- }
- }
-}
-
+static
+void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info,
+ u32 accel_offset, UNUSED u32 accel_end_offset,
+ const vector<u32> &reports,
+ const vector<u32> &reports_eod,
+ u32 report_base_offset,
+ const raw_report_info &ri) {
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
+
+ vector<u32> reportOffsets;
+
+ ri.fillReportLists(nfa, report_base_offset, reportOffsets);
+
+ for (u32 i = 0; i < info.size(); i++) {
+ u16 impl_id = info.implId(i);
+ mstate_aux *this_aux = getAux64(nfa, impl_id);
+
+ fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
+ if (contains(accel_escape_info, i)) {
+ this_aux->accel_offset = accel_offset;
+ accel_offset += info.strat.accelSize();
+ assert(accel_offset <= accel_end_offset);
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+ info.strat.buildAccel(i, accel_escape_info.at(i),
+ (void *)((char *)m + this_aux->accel_offset));
+ }
+ }
+}
+
+static
+u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
+ mstate_aux *aux = getAux64(nfa, target_impl_id);
+ u16 flags = 0;
+
+ if (aux->accept) {
+ flags |= ACCEPT_FLAG;
+ }
+
+ if (aux->accel_offset) {
+ flags |= ACCEL_FLAG;
+ }
+
+ return flags;
+}
+
+static
+void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
+ dstate_id_t sheng_end,
+ UNUSED dstate_id_t sherman_base) {
+ u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
+
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ for (size_t i = 0; i < info.size(); i++) {
+ if (!info.is_normal(i)) {
+ assert(info.implId(i) < sheng_end || info.is_sherman(i));
+ continue;
+ }
+
+ assert(info.implId(i) < sherman_base);
+ u16 normal_id = verify_u16(info.implId(i) - sheng_end);
+
+ for (size_t s = 0; s < info.impl_alpha_size; s++) {
+ dstate_id_t raw_succ = info.states[i].next[s];
+ u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
+
+ entry = info.implId(raw_succ);
+ entry |= get_edge_flags64(nfa, entry);
+ }
+ }
+}
+
#define MAX_SHERMAN_LIST_LEN 8
static
@@ -1017,19 +1017,19 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
assert(info.getAlphaShift() <= 8);
- // Sherman optimization
- if (info.impl_alpha_size > 16) {
- u16 total_daddy = 0;
- for (u32 i = 0; i < info.size(); i++) {
- find_better_daddy(info, i,
- is_cyclic_near(info.raw, info.raw.start_anchored),
- grey);
- total_daddy += info.extra[i].daddytaken;
- }
-
- DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
- info.size() * info.impl_alpha_size, info.size(),
- info.impl_alpha_size);
+ // Sherman optimization
+ if (info.impl_alpha_size > 16) {
+ u16 total_daddy = 0;
+ for (u32 i = 0; i < info.size(); i++) {
+ find_better_daddy(info, i,
+ is_cyclic_near(info.raw, info.raw.start_anchored),
+ grey);
+ total_daddy += info.extra[i].daddytaken;
+ }
+
+ DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
+ info.size() * info.impl_alpha_size, info.size(),
+ info.impl_alpha_size);
}
u16 sherman_limit;
@@ -1110,160 +1110,160 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
}
static
-void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
- char *nfa_base = (char *)nfa;
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
- char *sherman_table = nfa_base + m->sherman_offset;
-
- assert(ISALIGNED_16(sherman_table));
- for (size_t i = 0; i < info.size(); i++) {
- if (!info.is_sherman(i)) {
- continue;
- }
- u16 fs = verify_u16(info.implId(i));
- DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
-
- assert(fs >= sherman_limit);
-
- char *curr_sherman_entry
- = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
- assert(curr_sherman_entry <= nfa_base + m->length);
-
- u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
- assert(len <= 9);
- dstate_id_t d = info.states[i].daddy;
-
- *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
- *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
- *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
- u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
-
- for (u16 s = 0; s < info.impl_alpha_size; s++) {
- if (info.states[i].next[s] != info.states[d].next[s]) {
- *(chars++) = (u8)s;
- }
- }
-
- u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
- for (u16 s = 0; s < info.impl_alpha_size; s++) {
- if (info.states[i].next[s] != info.states[d].next[s]) {
- DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
- info.implId(d),
- info.implId(info.states[i].next[s]));
- u16 entry_val = info.implId(info.states[i].next[s]);
- entry_val |= get_edge_flags64(nfa, entry_val);
- unaligned_store_u16((u8 *)states++, entry_val);
- }
- }
- }
-}
-
-static
-bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
- const map<dstate_id_t, AccelScheme>&accel_escape_info,
- const Grey &grey) {
- DEBUG_PRINTF("building mcsheng 64-16\n");
-
- vector<u32> reports; /* index in ri for the appropriate report list */
- vector<u32> reports_eod; /* as above */
- ReportID arb;
- u8 single;
-
- assert(info.getAlphaShift() <= 8);
-
- // Sherman optimization
- if (info.impl_alpha_size > 16) {
- u16 total_daddy = 0;
- for (u32 i = 0; i < info.size(); i++) {
- find_better_daddy(info, i,
- is_cyclic_near(info.raw, info.raw.start_anchored),
- grey);
- total_daddy += info.extra[i].daddytaken;
- }
-
- DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
- info.size() * info.impl_alpha_size, info.size(),
- info.impl_alpha_size);
- }
-
- u16 sherman_limit;
- if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
- DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
- info.size());
- return nullptr;
- }
- u16 count_real_states = sherman_limit - sheng_end;
-
- auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
-
- size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
- * count_real_states;
-
- size_t aux_size = sizeof(mstate_aux) * info.size();
-
- size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
- size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
- size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
- + ri->getReportListSize(), 32);
- size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
- size_t sherman_size = calcShermanRegionSize(info);
-
- size_t total_size = sherman_offset + sherman_size;
-
- accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-
- auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
-
- populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
- accel_escape_info.size(), arb, single, nfa.get());
- createShuffleMasks64(m, info, sheng_end, accel_escape_info);
-
- /* copy in the mc header information */
- m->sherman_offset = sherman_offset;
- m->sherman_end = total_size;
- m->sherman_limit = sherman_limit;
-
- DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
- count_real_states, info.size());
-
- fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
- sherman_offset - sizeof(NFA), reports, reports_eod,
- aux_offset + aux_size, *ri);
-
- fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit);
-
- fill_in_sherman64(nfa.get(), info, sherman_limit);
-
- return nfa;
-}
-
-static
-void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
- dstate_id_t sheng_end) {
- u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
-
- u8 alphaShift = info.getAlphaShift();
- assert(alphaShift <= 8);
-
- for (size_t i = 0; i < info.size(); i++) {
- assert(!info.is_sherman(i));
- if (!info.is_normal(i)) {
- assert(info.implId(i) < sheng_end);
- continue;
- }
- u8 normal_id = verify_u8(info.implId(i) - sheng_end);
-
- for (size_t s = 0; s < info.impl_alpha_size; s++) {
- dstate_id_t raw_succ = info.states[i].next[s];
- succ_table[((size_t)normal_id << alphaShift) + s]
- = info.implId(raw_succ);
- }
- }
-}
-
-static
+void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
+ char *nfa_base = (char *)nfa;
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
+ char *sherman_table = nfa_base + m->sherman_offset;
+
+ assert(ISALIGNED_16(sherman_table));
+ for (size_t i = 0; i < info.size(); i++) {
+ if (!info.is_sherman(i)) {
+ continue;
+ }
+ u16 fs = verify_u16(info.implId(i));
+ DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
+
+ assert(fs >= sherman_limit);
+
+ char *curr_sherman_entry
+ = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
+ assert(curr_sherman_entry <= nfa_base + m->length);
+
+ u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
+ assert(len <= 9);
+ dstate_id_t d = info.states[i].daddy;
+
+ *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
+ *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
+ *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
+ u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
+
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ *(chars++) = (u8)s;
+ }
+ }
+
+ u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
+ info.implId(d),
+ info.implId(info.states[i].next[s]));
+ u16 entry_val = info.implId(info.states[i].next[s]);
+ entry_val |= get_edge_flags64(nfa, entry_val);
+ unaligned_store_u16((u8 *)states++, entry_val);
+ }
+ }
+ }
+}
+
+static
+bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme>&accel_escape_info,
+ const Grey &grey) {
+ DEBUG_PRINTF("building mcsheng 64-16\n");
+
+ vector<u32> reports; /* index in ri for the appropriate report list */
+ vector<u32> reports_eod; /* as above */
+ ReportID arb;
+ u8 single;
+
+ assert(info.getAlphaShift() <= 8);
+
+ // Sherman optimization
+ if (info.impl_alpha_size > 16) {
+ u16 total_daddy = 0;
+ for (u32 i = 0; i < info.size(); i++) {
+ find_better_daddy(info, i,
+ is_cyclic_near(info.raw, info.raw.start_anchored),
+ grey);
+ total_daddy += info.extra[i].daddytaken;
+ }
+
+ DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
+ info.size() * info.impl_alpha_size, info.size(),
+ info.impl_alpha_size);
+ }
+
+ u16 sherman_limit;
+ if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
+ DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
+ info.size());
+ return nullptr;
+ }
+ u16 count_real_states = sherman_limit - sheng_end;
+
+ auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
+
+ size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
+ * count_real_states;
+
+ size_t aux_size = sizeof(mstate_aux) * info.size();
+
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
+ size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
+ size_t sherman_size = calcShermanRegionSize(info);
+
+ size_t total_size = sherman_offset + sherman_size;
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
+ auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
+
+ populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
+ accel_escape_info.size(), arb, single, nfa.get());
+ createShuffleMasks64(m, info, sheng_end, accel_escape_info);
+
+ /* copy in the mc header information */
+ m->sherman_offset = sherman_offset;
+ m->sherman_end = total_size;
+ m->sherman_limit = sherman_limit;
+
+ DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
+ count_real_states, info.size());
+
+ fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
+ sherman_offset - sizeof(NFA), reports, reports_eod,
+ aux_offset + aux_size, *ri);
+
+ fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit);
+
+ fill_in_sherman64(nfa.get(), info, sherman_limit);
+
+ return nfa;
+}
+
+static
+void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
+ dstate_id_t sheng_end) {
+ u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
+
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ for (size_t i = 0; i < info.size(); i++) {
+ assert(!info.is_sherman(i));
+ if (!info.is_normal(i)) {
+ assert(info.implId(i) < sheng_end);
+ continue;
+ }
+ u8 normal_id = verify_u8(info.implId(i) - sheng_end);
+
+ for (size_t s = 0; s < info.impl_alpha_size; s++) {
+ dstate_id_t raw_succ = info.states[i].next[s];
+ succ_table[((size_t)normal_id << alphaShift) + s]
+ = info.implId(raw_succ);
+ }
+ }
+}
+
+static
void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
const map<dstate_id_t, AccelScheme> &accel_escape_info,
u16 *accel_limit, u16 *accept_limit) {
@@ -1360,58 +1360,58 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
return nfa;
}
-static
-bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
- const map<dstate_id_t, AccelScheme> &accel_escape_info) {
- DEBUG_PRINTF("building mcsheng 64-8\n");
-
- vector<u32> reports;
- vector<u32> reports_eod;
- ReportID arb;
- u8 single;
-
- auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
-
- size_t normal_count = info.size() - sheng_end;
-
- size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
- size_t aux_size = sizeof(mstate_aux) * info.size();
- size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
- size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
- size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
- + ri->getReportListSize(), 32);
- size_t total_size = accel_offset + accel_size;
-
- DEBUG_PRINTF("aux_size %zu\n", aux_size);
- DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
- DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
- DEBUG_PRINTF("accel_size %zu\n", accel_size);
- DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
- DEBUG_PRINTF("total_size %zu\n", total_size);
-
- accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-
- auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
- mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
-
- allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
- &m->accept_limit_8);
-
- populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset,
- accel_escape_info.size(), arb, single, nfa.get());
- createShuffleMasks64(m, info, sheng_end, accel_escape_info);
-
- fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
- total_size - sizeof(NFA), reports, reports_eod,
- aux_offset + aux_size, *ri);
-
- fill_in_succ_table_64_8(nfa.get(), info, sheng_end);
- DEBUG_PRINTF("rl size %zu\n", ri->size());
-
- return nfa;
-}
-
+static
+bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info) {
+ DEBUG_PRINTF("building mcsheng 64-8\n");
+
+ vector<u32> reports;
+ vector<u32> reports_eod;
+ ReportID arb;
+ u8 single;
+
+ auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
+
+ size_t normal_count = info.size() - sheng_end;
+
+ size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
+ size_t aux_size = sizeof(mstate_aux) * info.size();
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
+ size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t total_size = accel_offset + accel_size;
+
+ DEBUG_PRINTF("aux_size %zu\n", aux_size);
+ DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
+ DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
+ DEBUG_PRINTF("accel_size %zu\n", accel_size);
+ DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
+ DEBUG_PRINTF("total_size %zu\n", total_size);
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
+ auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
+ mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
+
+ allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
+ &m->accept_limit_8);
+
+ populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset,
+ accel_escape_info.size(), arb, single, nfa.get());
+ createShuffleMasks64(m, info, sheng_end, accel_escape_info);
+
+ fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
+ total_size - sizeof(NFA), reports, reports_eod,
+ aux_offset + aux_size, *ri);
+
+ fill_in_succ_table_64_8(nfa.get(), info, sheng_end);
+ DEBUG_PRINTF("rl size %zu\n", ri->size());
+
+ return nfa;
+}
+
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm) {
if (!cc.grey.allowMcSheng) {
@@ -1431,16 +1431,16 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
map<dstate_id_t, AccelScheme> accel_escape_info
= info.strat.getAccelInfo(cc.grey);
- auto old_states = info.states;
- dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
+ auto old_states = info.states;
+ dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
if (sheng_end <= DEAD_STATE + 1) {
- info.states = old_states;
+ info.states = old_states;
return nullptr;
}
bytecode_ptr<NFA> nfa;
-
+
if (!using8bit) {
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
} else {
@@ -1448,7 +1448,67 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
}
if (!nfa) {
- info.states = old_states;
+ info.states = old_states;
+ return nfa;
+ }
+
+ if (has_eod_reports) {
+ nfa->flags |= NFA_ACCEPTS_EOD;
+ }
+
+ DEBUG_PRINTF("compile done\n");
+ return nfa;
+}
+
+bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm) {
+ if (!cc.grey.allowMcSheng) {
+ return nullptr;
+ }
+
+ if (!cc.target_info.has_avx512vbmi()) {
+ DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
+ return nullptr;
+ }
+
+ mcclellan_build_strat mbs(raw, rm, false);
+ dfa_info info(mbs);
+ bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
+
+ if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
+ * mode with our semantics */
+ raw.stripExtraEodReports();
+ }
+
+ bool has_eod_reports = raw.hasEodReports();
+
+ map<dstate_id_t, AccelScheme> accel_escape_info
+ = info.strat.getAccelInfo(cc.grey);
+ bool using64state = false; /*default flag*/
+ dstate_id_t sheng_end64;
+ sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
+
+ if (sheng_end64 <= DEAD_STATE + 1) {
+ return nullptr;
+ } else {
+ using64state = true;
+ }
+
+ bytecode_ptr<NFA> nfa;
+
+ if (using64state) {
+ assert((sheng_end64 > 17) && (sheng_end64 <= 65));
+ if (!using8bit) {
+ nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey);
+ } else {
+ assert(using8bit);
+ nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info);
+ assert(nfa);
+ assert(nfa->type == MCSHENG_64_NFA_8);
+ }
+ }
+
+ if (!nfa) {
return nfa;
}
@@ -1460,66 +1520,6 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
return nfa;
}
-bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm) {
- if (!cc.grey.allowMcSheng) {
- return nullptr;
- }
-
- if (!cc.target_info.has_avx512vbmi()) {
- DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
- return nullptr;
- }
-
- mcclellan_build_strat mbs(raw, rm, false);
- dfa_info info(mbs);
- bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
-
- if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
- * mode with our semantics */
- raw.stripExtraEodReports();
- }
-
- bool has_eod_reports = raw.hasEodReports();
-
- map<dstate_id_t, AccelScheme> accel_escape_info
- = info.strat.getAccelInfo(cc.grey);
- bool using64state = false; /*default flag*/
- dstate_id_t sheng_end64;
- sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
-
- if (sheng_end64 <= DEAD_STATE + 1) {
- return nullptr;
- } else {
- using64state = true;
- }
-
- bytecode_ptr<NFA> nfa;
-
- if (using64state) {
- assert((sheng_end64 > 17) && (sheng_end64 <= 65));
- if (!using8bit) {
- nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey);
- } else {
- assert(using8bit);
- nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info);
- assert(nfa);
- assert(nfa->type == MCSHENG_64_NFA_8);
- }
- }
-
- if (!nfa) {
- return nfa;
- }
-
- if (has_eod_reports) {
- nfa->flags |= NFA_ACCEPTS_EOD;
- }
-
- DEBUG_PRINTF("compile done\n");
- return nfa;
-}
-
bool has_accel_mcsheng(const NFA *) {
return true; /* consider the sheng region as accelerated */
}
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h
index 7de7c14568..3a79b46a23 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h
@@ -42,8 +42,8 @@ struct raw_dfa;
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm);
-bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm);
+bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm);
bool has_accel_mcsheng(const NFA *nfa);
} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c
index 304e383736..0701b4b313 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -41,15 +41,15 @@ const u64a mcsheng_pext_mask[8] = {
0x00ff00000000000f,
0xff0000000000000f,
};
-#if defined(HAVE_AVX512VBMI)
-const u64a mcsheng64_pext_mask[8] = {
- 0, /* dummy */
- 0x000000000000ff3f,
- 0x0000000000ff003f,
- 0x00000000ff00003f,
- 0x000000ff0000003f,
- 0x0000ff000000003f,
- 0x00ff00000000003f,
- 0xff0000000000003f,
-};
-#endif
+#if defined(HAVE_AVX512VBMI)
+const u64a mcsheng64_pext_mask[8] = {
+ 0, /* dummy */
+ 0x000000000000ff3f,
+ 0x0000000000ff003f,
+ 0x00000000ff00003f,
+ 0x000000ff0000003f,
+ 0x0000ff000000003f,
+ 0x00ff00000000003f,
+ 0xff0000000000003f,
+};
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h
index 646229709d..d985574624 100644
--- a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -84,7 +84,7 @@ struct mcsheng {
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
- u32 accel_offset; /**< offset of accel structures from start of McClellan */
+ u32 accel_offset; /**< offset of accel structures from start of McClellan */
m128 sheng_masks[N_CHARS];
};
@@ -92,33 +92,33 @@ struct mcsheng {
* representing the data from a u64a. */
extern const u64a mcsheng_pext_mask[8];
-struct mcsheng64 {
- u16 state_count; /**< total number of states */
- u32 length; /**< length of dfa in bytes */
- u16 start_anchored; /**< anchored start state */
- u16 start_floating; /**< floating start state */
- u32 aux_offset; /**< offset of the aux structures relative to the start of
- * the nfa structure */
- u32 sherman_offset; /**< offset of array of sherman state offsets the
- * state_info structures relative to the start of the
- * nfa structure */
- u32 sherman_end; /**< offset of the end of the state_info structures
- * relative to the start of the nfa structure */
- u16 sheng_end; /**< first non-sheng state */
- u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
- * internal sheng ids */
- u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
- u16 accept_limit_8; /**< 8 bit, lowest accept state */
- u16 sherman_limit; /**< lowest sherman state */
- u8 alphaShift;
- u8 flags;
- u8 has_accel; /**< 1 iff there are any accel plans */
- u8 remap[256]; /**< remaps characters to a smaller alphabet */
- ReportID arb_report; /**< one of the accepts that this dfa may raise */
- u32 accel_offset; /**< offset of accel structures from start of McClellan */
- m512 sheng_succ_masks[N_CHARS];
-};
-
-extern const u64a mcsheng64_pext_mask[8];
-
+struct mcsheng64 {
+ u16 state_count; /**< total number of states */
+ u32 length; /**< length of dfa in bytes */
+ u16 start_anchored; /**< anchored start state */
+ u16 start_floating; /**< floating start state */
+ u32 aux_offset; /**< offset of the aux structures relative to the start of
+ * the nfa structure */
+ u32 sherman_offset; /**< offset of array of sherman state offsets the
+ * state_info structures relative to the start of the
+ * nfa structure */
+ u32 sherman_end; /**< offset of the end of the state_info structures
+ * relative to the start of the nfa structure */
+ u16 sheng_end; /**< first non-sheng state */
+ u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
+ * internal sheng ids */
+ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
+ u16 accept_limit_8; /**< 8 bit, lowest accept state */
+ u16 sherman_limit; /**< lowest sherman state */
+ u8 alphaShift;
+ u8 flags;
+ u8 has_accel; /**< 1 iff there are any accel plans */
+ u8 remap[256]; /**< remaps characters to a smaller alphabet */
+ ReportID arb_report; /**< one of the accepts that this dfa may raise */
+ u32 accel_offset; /**< offset of accel structures from start of McClellan */
+ m512 sheng_succ_masks[N_CHARS];
+};
+
+extern const u64a mcsheng64_pext_mask[8];
+
#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
index 7de11f3e97..75cac4b481 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
+++ b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -76,10 +76,10 @@
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
- DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
- DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
- DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
- DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
+ DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
+ DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
default: \
assert(0); \
}
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
index 2645cdefab..47153163e9 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
+++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -430,65 +430,65 @@ const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts =
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
#endif
-template<> struct NFATraits<SHENG_NFA_32> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 1;
- static const nfa_dispatch_fn has_accel;
- static const nfa_dispatch_fn has_repeats;
- static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
-const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_accel = has_accel_sheng;
-const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats = dispatch_false;
-const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<SHENG_NFA_32>::name = "Sheng 32";
-#endif
-
-template<> struct NFATraits<SHENG_NFA_64> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 1;
- static const nfa_dispatch_fn has_accel;
- static const nfa_dispatch_fn has_repeats;
- static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
-const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_accel = has_accel_sheng;
-const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats = dispatch_false;
-const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64";
-#endif
-
-template<> struct NFATraits<MCSHENG_64_NFA_8> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 1;
- static const nfa_dispatch_fn has_accel;
- static const nfa_dispatch_fn has_repeats;
- static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng;
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false;
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8";
-#endif
-
-template<> struct NFATraits<MCSHENG_64_NFA_16> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 2;
- static const nfa_dispatch_fn has_accel;
- static const nfa_dispatch_fn has_repeats;
- static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng;
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false;
-const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16";
-#endif
+template<> struct NFATraits<SHENG_NFA_32> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 1;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_accel = has_accel_sheng;
+const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<SHENG_NFA_32>::name = "Sheng 32";
+#endif
+
+template<> struct NFATraits<SHENG_NFA_64> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 1;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_accel = has_accel_sheng;
+const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64";
+#endif
+
+template<> struct NFATraits<MCSHENG_64_NFA_8> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 1;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng;
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8";
+#endif
+
+template<> struct NFATraits<MCSHENG_64_NFA_16> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 2;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng;
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16";
+#endif
} // namespace
#if defined(DUMP_SUPPORT)
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
index 15a30becc9..ee7a309494 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_internal.h b/contrib/libs/hyperscan/src/nfa/nfa_internal.h
index 46dbbecacc..ad27e28b14 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -72,10 +72,10 @@ enum NFAEngineType {
TAMARAMA_NFA, /**< magic nfa container */
MCSHENG_NFA_8, /**< magic pseudo nfa */
MCSHENG_NFA_16, /**< magic pseudo nfa */
- SHENG_NFA_32, /**< magic pseudo nfa */
- SHENG_NFA_64, /**< magic pseudo nfa */
- MCSHENG_64_NFA_8, /**< magic pseudo nfa */
- MCSHENG_64_NFA_16, /**< magic pseudo nfa */
+ SHENG_NFA_32, /**< magic pseudo nfa */
+ SHENG_NFA_64, /**< magic pseudo nfa */
+ MCSHENG_64_NFA_8, /**< magic pseudo nfa */
+ MCSHENG_64_NFA_16, /**< magic pseudo nfa */
/** \brief bogus NFA - not used */
INVALID_NFA
};
@@ -152,8 +152,8 @@ static really_inline int isMcClellanType(u8 t) {
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
* DFA. */
static really_inline int isShengMcClellanType(u8 t) {
- return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 ||
- t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
+ return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 ||
+ t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
@@ -162,25 +162,25 @@ static really_inline int isGoughType(u8 t) {
}
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
-static really_inline int isSheng16Type(u8 t) {
+static really_inline int isSheng16Type(u8 t) {
return t == SHENG_NFA;
}
-/** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */
-static really_inline int isSheng32Type(u8 t) {
- return t == SHENG_NFA_32;
-}
-
-/** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */
-static really_inline int isSheng64Type(u8 t) {
- return t == SHENG_NFA_64;
-}
-
-/** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */
-static really_inline int isShengType(u8 t) {
- return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64;
-}
-
+/** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */
+static really_inline int isSheng32Type(u8 t) {
+ return t == SHENG_NFA_32;
+}
+
+/** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */
+static really_inline int isSheng64Type(u8 t) {
+ return t == SHENG_NFA_64;
+}
+
+/** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */
+static really_inline int isShengType(u8 t) {
+ return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64;
+}
+
/**
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
* Sheng DFA.
diff --git a/contrib/libs/hyperscan/src/nfa/sheng.c b/contrib/libs/hyperscan/src/nfa/sheng.c
index 7673131501..3f36e21891 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng.c
+++ b/contrib/libs/hyperscan/src/nfa/sheng.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -154,205 +154,205 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; /* continue execution */
}
-#if defined(HAVE_AVX512VBMI)
-// Sheng32
-static really_inline
-const struct sheng32 *get_sheng32(const struct NFA *n) {
- return (const struct sheng32 *)getImplNfa(n);
-}
-
-static really_inline
-const struct sstate_aux *get_aux32(const struct sheng32 *sh, u8 id) {
- u32 offset = sh->aux_offset - sizeof(struct NFA) +
- (id & SHENG32_STATE_MASK) * sizeof(struct sstate_aux);
- DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
- id & SHENG32_STATE_MASK, (u64a)offset + sizeof(struct NFA));
- return (const struct sstate_aux *)((const char *) sh + offset);
-}
-
-static really_inline
-const union AccelAux *get_accel32(const struct sheng32 *sh, u8 id) {
- const struct sstate_aux *saux = get_aux32(sh, id);
- DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
- const union AccelAux *aux = (const union AccelAux *)
- ((const char *)sh + saux->accel - sizeof(struct NFA));
- return aux;
-}
-
-static really_inline
-const struct report_list *get_rl32(const struct sheng32 *sh,
- const struct sstate_aux *aux) {
- DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
- return (const struct report_list *)
- ((const char *)sh + aux->accept - sizeof(struct NFA));
-}
-
-static really_inline
-const struct report_list *get_eod_rl32(const struct sheng32 *sh,
- const struct sstate_aux *aux) {
- DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
- return (const struct report_list *)
- ((const char *)sh + aux->accept_eod - sizeof(struct NFA));
-}
-
-static really_inline
-char sheng32HasAccept(const struct sheng32 *sh, const struct sstate_aux *aux,
- ReportID report) {
- assert(sh && aux);
-
- const struct report_list *rl = get_rl32(sh, aux);
- assert(ISALIGNED_N(rl, 4));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
-
- for (u32 i = 0; i < rl->count; i++) {
- if (rl->report[i] == report) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
- return 1;
- }
- }
-
- return 0;
-}
-
-static really_inline
-char fireReports32(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
- const u8 state, u64a loc, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, char eod) {
- DEBUG_PRINTF("reporting matches @ %llu\n", loc);
-
- if (!eod && state == *cached_accept_state) {
- DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
- if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
- const struct sstate_aux *aux = get_aux32(sh, state);
- const struct report_list *rl = eod ? get_eod_rl32(sh, aux) :
- get_rl32(sh, aux);
- assert(ISALIGNED(rl));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
- u32 count = rl->count;
-
- if (!eod && count == 1) {
- *cached_accept_state = state;
- *cached_accept_id = rl->report[0];
-
- DEBUG_PRINTF("reporting %u\n", rl->report[0]);
- if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
- if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
- }
- return MO_CONTINUE_MATCHING; /* continue execution */
-}
-
-// Sheng64
-static really_inline
-const struct sheng64 *get_sheng64(const struct NFA *n) {
- return (const struct sheng64 *)getImplNfa(n);
-}
-
-static really_inline
-const struct sstate_aux *get_aux64(const struct sheng64 *sh, u8 id) {
- u32 offset = sh->aux_offset - sizeof(struct NFA) +
- (id & SHENG64_STATE_MASK) * sizeof(struct sstate_aux);
- DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
- id & SHENG64_STATE_MASK, (u64a)offset + sizeof(struct NFA));
- return (const struct sstate_aux *)((const char *) sh + offset);
-}
-
-static really_inline
-const struct report_list *get_rl64(const struct sheng64 *sh,
- const struct sstate_aux *aux) {
- DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
- return (const struct report_list *)
- ((const char *)sh + aux->accept - sizeof(struct NFA));
-}
-
-static really_inline
-const struct report_list *get_eod_rl64(const struct sheng64 *sh,
- const struct sstate_aux *aux) {
- DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
- return (const struct report_list *)
- ((const char *)sh + aux->accept_eod - sizeof(struct NFA));
-}
-
-static really_inline
-char sheng64HasAccept(const struct sheng64 *sh, const struct sstate_aux *aux,
- ReportID report) {
- assert(sh && aux);
-
- const struct report_list *rl = get_rl64(sh, aux);
- assert(ISALIGNED_N(rl, 4));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
-
- for (u32 i = 0; i < rl->count; i++) {
- if (rl->report[i] == report) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
- return 1;
- }
- }
-
- return 0;
-}
-
-static really_inline
-char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
- const u8 state, u64a loc, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, char eod) {
- DEBUG_PRINTF("reporting matches @ %llu\n", loc);
-
- if (!eod && state == *cached_accept_state) {
- DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
- if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
- const struct sstate_aux *aux = get_aux64(sh, state);
- const struct report_list *rl = eod ? get_eod_rl64(sh, aux) :
- get_rl64(sh, aux);
- assert(ISALIGNED(rl));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
- u32 count = rl->count;
-
- if (!eod && count == 1) {
- *cached_accept_state = state;
- *cached_accept_id = rl->report[0];
-
- DEBUG_PRINTF("reporting %u\n", rl->report[0]);
- if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
- if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
- }
- return MO_CONTINUE_MATCHING; /* continue execution */
-}
-#endif // end of HAVE_AVX512VBMI
-
+#if defined(HAVE_AVX512VBMI)
+// Sheng32
+static really_inline
+const struct sheng32 *get_sheng32(const struct NFA *n) {
+ return (const struct sheng32 *)getImplNfa(n);
+}
+
+static really_inline
+const struct sstate_aux *get_aux32(const struct sheng32 *sh, u8 id) {
+ u32 offset = sh->aux_offset - sizeof(struct NFA) +
+ (id & SHENG32_STATE_MASK) * sizeof(struct sstate_aux);
+ DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
+ id & SHENG32_STATE_MASK, (u64a)offset + sizeof(struct NFA));
+ return (const struct sstate_aux *)((const char *) sh + offset);
+}
+
+static really_inline
+const union AccelAux *get_accel32(const struct sheng32 *sh, u8 id) {
+ const struct sstate_aux *saux = get_aux32(sh, id);
+ DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
+ const union AccelAux *aux = (const union AccelAux *)
+ ((const char *)sh + saux->accel - sizeof(struct NFA));
+ return aux;
+}
+
+static really_inline
+const struct report_list *get_rl32(const struct sheng32 *sh,
+ const struct sstate_aux *aux) {
+ DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
+ return (const struct report_list *)
+ ((const char *)sh + aux->accept - sizeof(struct NFA));
+}
+
+static really_inline
+const struct report_list *get_eod_rl32(const struct sheng32 *sh,
+ const struct sstate_aux *aux) {
+ DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
+ return (const struct report_list *)
+ ((const char *)sh + aux->accept_eod - sizeof(struct NFA));
+}
+
+static really_inline
+char sheng32HasAccept(const struct sheng32 *sh, const struct sstate_aux *aux,
+ ReportID report) {
+ assert(sh && aux);
+
+ const struct report_list *rl = get_rl32(sh, aux);
+ assert(ISALIGNED_N(rl, 4));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+
+ for (u32 i = 0; i < rl->count; i++) {
+ if (rl->report[i] == report) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static really_inline
+char fireReports32(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
+ const u8 state, u64a loc, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, char eod) {
+ DEBUG_PRINTF("reporting matches @ %llu\n", loc);
+
+ if (!eod && state == *cached_accept_state) {
+ DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
+ if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+ const struct sstate_aux *aux = get_aux32(sh, state);
+ const struct report_list *rl = eod ? get_eod_rl32(sh, aux) :
+ get_rl32(sh, aux);
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = state;
+ *cached_accept_id = rl->report[0];
+
+ DEBUG_PRINTF("reporting %u\n", rl->report[0]);
+ if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+
+// Sheng64
+static really_inline
+const struct sheng64 *get_sheng64(const struct NFA *n) {
+ return (const struct sheng64 *)getImplNfa(n);
+}
+
+static really_inline
+const struct sstate_aux *get_aux64(const struct sheng64 *sh, u8 id) {
+ u32 offset = sh->aux_offset - sizeof(struct NFA) +
+ (id & SHENG64_STATE_MASK) * sizeof(struct sstate_aux);
+ DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
+ id & SHENG64_STATE_MASK, (u64a)offset + sizeof(struct NFA));
+ return (const struct sstate_aux *)((const char *) sh + offset);
+}
+
+static really_inline
+const struct report_list *get_rl64(const struct sheng64 *sh,
+ const struct sstate_aux *aux) {
+ DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
+ return (const struct report_list *)
+ ((const char *)sh + aux->accept - sizeof(struct NFA));
+}
+
+static really_inline
+const struct report_list *get_eod_rl64(const struct sheng64 *sh,
+ const struct sstate_aux *aux) {
+ DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
+ return (const struct report_list *)
+ ((const char *)sh + aux->accept_eod - sizeof(struct NFA));
+}
+
+static really_inline
+char sheng64HasAccept(const struct sheng64 *sh, const struct sstate_aux *aux,
+ ReportID report) {
+ assert(sh && aux);
+
+ const struct report_list *rl = get_rl64(sh, aux);
+ assert(ISALIGNED_N(rl, 4));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+
+ for (u32 i = 0; i < rl->count; i++) {
+ if (rl->report[i] == report) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static really_inline
+char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
+ const u8 state, u64a loc, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, char eod) {
+ DEBUG_PRINTF("reporting matches @ %llu\n", loc);
+
+ if (!eod && state == *cached_accept_state) {
+ DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
+ if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+ const struct sstate_aux *aux = get_aux64(sh, state);
+ const struct report_list *rl = eod ? get_eod_rl64(sh, aux) :
+ get_rl64(sh, aux);
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = state;
+ *cached_accept_id = rl->report[0];
+
+ DEBUG_PRINTF("reporting %u\n", rl->report[0]);
+ if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+#endif // end of HAVE_AVX512VBMI
+
/* include Sheng function definitions */
#include "sheng_defs.h"
@@ -827,7 +827,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
fireSingleReport(cb, ctxt, sh->report, offset);
} else {
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
- &cached_report_id, 0);
+ &cached_report_id, 0);
}
}
@@ -870,1008 +870,1008 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
*(u8 *)dest = *(const u8 *)src;
return 0;
}
-
-#if defined(HAVE_AVX512VBMI)
-// Sheng32
-static really_inline
-char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die,
- u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
- !!has_accel, !!single);
- int rv;
- /* scan and report all matches */
- if (can_die) {
- if (has_accel) {
- rv = sheng32_4_coda(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- } else {
- rv = sheng32_4_cod(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- rv = sheng32_cod(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- } else {
- if (has_accel) {
- rv = sheng32_4_coa(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- } else {
- rv = sheng32_4_co(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- rv = sheng32_co(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- return MO_ALIVE;
-}
-
-static really_inline
-void runSheng32Nm(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
- u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
- !!has_accel, !!single);
- /* just scan the buffer */
- if (can_die) {
- if (has_accel) {
- sheng32_4_nmda(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- } else {
- sheng32_4_nmd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- }
- sheng32_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, *scanned, end, scanned);
- } else {
- sheng32_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, start, end, scanned);
- sheng32_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, *scanned, end, scanned);
- }
-}
-
-static really_inline
-char runSheng32Sam(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
- u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
- !!has_accel, !!single);
- int rv;
- /* scan until first match */
- if (can_die) {
- if (has_accel) {
- rv = sheng32_4_samda(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- } else {
- rv = sheng32_4_samd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
-
- rv = sheng32_samd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- } else {
- if (has_accel) {
- rv = sheng32_4_sama(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- } else {
- rv = sheng32_4_sam(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
-
- rv = sheng32_sam(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
- return MO_ALIVE;
-}
-
-static never_inline
-char runSheng32(const struct sheng32 *sh, struct mq *q, s64a b_end,
- enum MatchMode mode) {
- u8 state = *(u8 *)q->state;
- u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
- u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
- u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
-
- u8 cached_accept_state = 0;
- ReportID cached_accept_id = 0;
-
- DEBUG_PRINTF("starting Sheng32 execution in state %u\n",
- state & SHENG32_STATE_MASK);
-
- if (q->report_current) {
- DEBUG_PRINTF("reporting current pending matches\n");
- assert(sh);
-
- q->report_current = 0;
-
- int rv;
- if (single) {
- rv = fireSingleReport(q->cb, q->context, sh->report,
- q_cur_offset(q));
- } else {
- rv = fireReports32(sh, q->cb, q->context, state, q_cur_offset(q),
- &cached_accept_state, &cached_accept_id, 0);
- }
- if (rv == MO_HALT_MATCHING) {
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
- return MO_DEAD;
- }
-
- DEBUG_PRINTF("proceeding with matching\n");
- }
-
- assert(q_cur_type(q) == MQE_START);
- s64a start = q_cur_loc(q);
-
- DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
- mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
- mode == NO_MATCHES ? "NO MATCHES" :
- mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
-
- DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
- q_cur_type(q) == MQE_START ? "START" :
- q_cur_type(q) == MQE_TOP ? "TOP" :
- q_cur_type(q) == MQE_END ? "END" : "???");
-
- const u8* cur_buf;
- if (start < 0) {
- DEBUG_PRINTF("negative location, scanning history\n");
- DEBUG_PRINTF("min location: %zd\n", -q->hlength);
- cur_buf = q->history + q->hlength;
- } else {
- DEBUG_PRINTF("positive location, scanning buffer\n");
- DEBUG_PRINTF("max location: %lli\n", b_end);
- cur_buf = q->buffer;
- }
-
- /* if we our queue event is past our end */
- if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
- DEBUG_PRINTF("current location past buffer end\n");
- DEBUG_PRINTF("setting q location to %llu\n", b_end);
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
- q->items[q->cur].location = b_end;
- return MO_ALIVE;
- }
-
- q->cur++;
-
- s64a cur_start = start;
-
- while (1) {
- DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
- q_cur_type(q) == MQE_START ? "START" :
- q_cur_type(q) == MQE_TOP ? "TOP" :
- q_cur_type(q) == MQE_END ? "END" : "???");
- s64a end = q_cur_loc(q);
- if (mode != NO_MATCHES) {
- end = MIN(end, b_end);
- }
- assert(end <= (s64a) q->length);
- s64a cur_end = end;
-
- /* we may cross the border between history and current buffer */
- if (cur_start < 0) {
- cur_end = MIN(0, cur_end);
- }
-
- DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
-
- /* don't scan zero length buffer */
- if (cur_start != cur_end) {
- const u8 * scanned = cur_buf;
- char rv;
-
- if (mode == NO_MATCHES) {
- runSheng32Nm(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id, cur_buf,
- cur_buf + cur_start, cur_buf + cur_end, can_die,
- has_accel, single, &scanned, &state);
- } else if (mode == CALLBACK_OUTPUT) {
- rv = runSheng32Cb(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id,
- cur_buf, cur_buf + cur_start, cur_buf + cur_end,
- can_die, has_accel, single, &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG32_STATE_MASK);
- return MO_DEAD;
- }
- } else if (mode == STOP_AT_MATCH) {
- rv = runSheng32Sam(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id,
- cur_buf, cur_buf + cur_start,
- cur_buf + cur_end, can_die, has_accel, single,
- &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG32_STATE_MASK);
- return rv;
- } else if (rv == MO_MATCHES_PENDING) {
- assert(q->cur);
- DEBUG_PRINTF("found a match, setting q location to %zd\n",
- scanned - cur_buf + 1);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location =
- scanned - cur_buf + 1; /* due to exiting early */
- *(u8 *)q->state = state;
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG32_STATE_MASK);
- return rv;
- }
- } else {
- assert(!"invalid scanning mode!");
- }
- assert(scanned == cur_buf + cur_end);
-
- cur_start = cur_end;
- }
-
- /* if we our queue event is past our end */
- if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
- DEBUG_PRINTF("current location past buffer end\n");
- DEBUG_PRINTF("setting q location to %llu\n", b_end);
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = b_end;
- *(u8 *)q->state = state;
- return MO_ALIVE;
- }
-
- /* crossing over into actual buffer */
- if (cur_start == 0) {
- DEBUG_PRINTF("positive location, scanning buffer\n");
- DEBUG_PRINTF("max offset: %lli\n", b_end);
- cur_buf = q->buffer;
- }
-
- /* continue scanning the same buffer */
- if (end != cur_end) {
- continue;
- }
-
- switch (q_cur_type(q)) {
- case MQE_END:
- *(u8 *)q->state = state;
- q->cur++;
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
- if (can_die) {
- return (state & SHENG32_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
- }
- return MO_ALIVE;
- case MQE_TOP:
- if (q->offset + cur_start == 0) {
- DEBUG_PRINTF("Anchored start, going to state %u\n",
- sh->anchored);
- state = sh->anchored;
- } else {
- u8 new_state = get_aux32(sh, state)->top;
- DEBUG_PRINTF("Top event %u->%u\n", state & SHENG32_STATE_MASK,
- new_state & SHENG32_STATE_MASK);
- state = new_state;
- }
- break;
- default:
- assert(!"invalid queue event");
- break;
- }
- q->cur++;
- }
-}
-
-char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context) {
- DEBUG_PRINTF("smallwrite Sheng32\n");
- assert(n->type == SHENG_NFA_32);
- const struct sheng32 *sh = getImplNfa(n);
- u8 state = sh->anchored;
- u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
- u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
- u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
- u8 cached_accept_state = 0;
- ReportID cached_accept_id = 0;
-
- /* scan and report all matches */
- int rv;
- s64a end = length;
- const u8 *scanned;
-
- rv = runSheng32Cb(sh, cb, context, offset, &cached_accept_state,
- &cached_accept_id, buffer, buffer, buffer + end, can_die,
- has_accel, single, &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG32_STATE_MASK);
- return MO_DEAD;
- }
-
- DEBUG_PRINTF("%u\n", state & SHENG32_STATE_MASK);
-
- const struct sstate_aux *aux = get_aux32(sh, state);
-
- if (aux->accept_eod) {
- DEBUG_PRINTF("Reporting EOD matches\n");
- fireReports32(sh, cb, context, state, end + offset,
- &cached_accept_state, &cached_accept_id, 1);
- }
-
- return state & SHENG32_STATE_DEAD ? MO_DEAD : MO_ALIVE;
-}
-
-char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end) {
- const struct sheng32 *sh = get_sheng32(n);
- char rv = runSheng32(sh, q, end, CALLBACK_OUTPUT);
- return rv;
-}
-
-char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end) {
- const struct sheng32 *sh = get_sheng32(n);
- char rv = runSheng32(sh, q, end, STOP_AT_MATCH);
- return rv;
-}
-
-char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report) {
- assert(q_cur_type(q) == MQE_START);
-
- const struct sheng32 *sh = get_sheng32(n);
- char rv = runSheng32(sh, q, 0 /* end */, NO_MATCHES);
-
- if (rv && nfaExecSheng32_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- }
- return rv;
-}
-
-char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
- const struct sheng32 *sh = get_sheng32(n);
- u8 s = *(const u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux32(sh, s);
-
- if (!aux->accept) {
- return 0;
- }
-
- return sheng32HasAccept(sh, aux, report);
-}
-
-char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q) {
- assert(n && q);
-
- const struct sheng32 *sh = get_sheng32(n);
- u8 s = *(const u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux32(sh, s);
- return !!aux->accept;
-}
-
-char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state,
- UNUSED const char *streamState, u64a offset,
- NfaCallback cb, void *ctxt) {
- assert(nfa);
-
- const struct sheng32 *sh = get_sheng32(nfa);
- u8 s = *(const u8 *)state;
- DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux32(sh, s);
-
- if (!aux->accept_eod) {
- return MO_CONTINUE_MATCHING;
- }
-
- return fireReports32(sh, cb, ctxt, s, offset, NULL, NULL, 1);
-}
-
-char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct sheng32 *sh = (const struct sheng32 *)getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u8 s = *(u8 *)q->state;
- const struct sstate_aux *aux = get_aux32(sh, s);
- u64a offset = q_cur_offset(q);
- u8 cached_state_id = 0;
- ReportID cached_report_id = 0;
- assert(q_cur_type(q) == MQE_START);
-
- if (aux->accept) {
- if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
- fireSingleReport(cb, ctxt, sh->report, offset);
- } else {
- fireReports32(sh, cb, ctxt, s, offset, &cached_state_id,
- &cached_report_id, 0);
- }
- }
-
- return 0;
-}
-
-char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct sheng32 *sh = get_sheng32(nfa);
- u8 *s = (u8 *)state;
- *s = offset ? sh->floating: sh->anchored;
- return !(*s & SHENG32_STATE_DEAD);
-}
-
-char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q) {
- assert(nfa->scratchStateSize == 1);
-
- /* starting in floating state */
- const struct sheng32 *sh = get_sheng32(nfa);
- *(u8 *)q->state = sh->floating;
- DEBUG_PRINTF("starting in floating state\n");
- return 0;
-}
-
-char nfaExecSheng32_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q, UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecSheng32_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-// Sheng64
-static really_inline
-char runSheng64Cb(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die,
- u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
- int rv;
- /* scan and report all matches */
- if (can_die) {
- rv = sheng64_4_cod(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- rv = sheng64_cod(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- } else {
- rv = sheng64_4_co(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- rv = sheng64_co(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- return MO_ALIVE;
-}
-
-static really_inline
-void runSheng64Nm(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die,
- u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
- /* just scan the buffer */
- if (can_die) {
- sheng64_4_nmd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- sheng64_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, *scanned, end, scanned);
- } else {
- sheng64_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, start, end, scanned);
- sheng64_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
- single, offset, cur_buf, *scanned, end, scanned);
- }
-}
-
-static really_inline
-char runSheng64Sam(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
- u64a offset, u8 *const cached_accept_state,
- ReportID *const cached_accept_id, const u8 *cur_buf,
- const u8 *start, const u8 *end, u8 can_die,
- u8 single, const u8 **scanned, u8 *state) {
- DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
- (u64a)(end - start), offset);
- DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
- (s64a)(end - cur_buf));
- DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
- int rv;
- /* scan until first match */
- if (can_die) {
- rv = sheng64_4_samd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
-
- rv = sheng64_samd(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- } else {
- rv = sheng64_4_sam(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- start, end, scanned);
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
-
- rv = sheng64_sam(state, cb, ctxt, sh, cached_accept_state,
- cached_accept_id, single, offset, cur_buf,
- *scanned, end, scanned);
- }
- if (rv == MO_HALT_MATCHING) {
- return MO_DEAD;
- }
- /* if we stopped before we expected, we found a match */
- if (rv == MO_MATCHES_PENDING) {
- return MO_MATCHES_PENDING;
- }
- return MO_ALIVE;
-}
-
-static never_inline
-char runSheng64(const struct sheng64 *sh, struct mq *q, s64a b_end,
- enum MatchMode mode) {
- u8 state = *(u8 *)q->state;
- u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
- u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
-
- u8 cached_accept_state = 0;
- ReportID cached_accept_id = 0;
-
- DEBUG_PRINTF("starting Sheng64 execution in state %u\n",
- state & SHENG64_STATE_MASK);
-
- if (q->report_current) {
- DEBUG_PRINTF("reporting current pending matches\n");
- assert(sh);
-
- q->report_current = 0;
-
- int rv;
- if (single) {
- rv = fireSingleReport(q->cb, q->context, sh->report,
- q_cur_offset(q));
- } else {
- rv = fireReports64(sh, q->cb, q->context, state, q_cur_offset(q),
- &cached_accept_state, &cached_accept_id, 0);
- }
- if (rv == MO_HALT_MATCHING) {
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
- return MO_DEAD;
- }
-
- DEBUG_PRINTF("proceeding with matching\n");
- }
-
- assert(q_cur_type(q) == MQE_START);
- s64a start = q_cur_loc(q);
-
- DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
- mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
- mode == NO_MATCHES ? "NO MATCHES" :
- mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
-
- DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
- q_cur_type(q) == MQE_START ? "START" :
- q_cur_type(q) == MQE_TOP ? "TOP" :
- q_cur_type(q) == MQE_END ? "END" : "???");
-
- const u8* cur_buf;
- if (start < 0) {
- DEBUG_PRINTF("negative location, scanning history\n");
- DEBUG_PRINTF("min location: %zd\n", -q->hlength);
- cur_buf = q->history + q->hlength;
- } else {
- DEBUG_PRINTF("positive location, scanning buffer\n");
- DEBUG_PRINTF("max location: %lli\n", b_end);
- cur_buf = q->buffer;
- }
-
- /* if we our queue event is past our end */
- if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
- DEBUG_PRINTF("current location past buffer end\n");
- DEBUG_PRINTF("setting q location to %llu\n", b_end);
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
- q->items[q->cur].location = b_end;
- return MO_ALIVE;
- }
-
- q->cur++;
-
- s64a cur_start = start;
-
- while (1) {
- DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
- q_cur_type(q) == MQE_START ? "START" :
- q_cur_type(q) == MQE_TOP ? "TOP" :
- q_cur_type(q) == MQE_END ? "END" : "???");
- s64a end = q_cur_loc(q);
- if (mode != NO_MATCHES) {
- end = MIN(end, b_end);
- }
- assert(end <= (s64a) q->length);
- s64a cur_end = end;
-
- /* we may cross the border between history and current buffer */
- if (cur_start < 0) {
- cur_end = MIN(0, cur_end);
- }
-
- DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
-
- /* don't scan zero length buffer */
- if (cur_start != cur_end) {
- const u8 * scanned = cur_buf;
- char rv;
-
- if (mode == NO_MATCHES) {
- runSheng64Nm(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id, cur_buf,
- cur_buf + cur_start, cur_buf + cur_end, can_die,
- single, &scanned, &state);
- } else if (mode == CALLBACK_OUTPUT) {
- rv = runSheng64Cb(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id,
- cur_buf, cur_buf + cur_start, cur_buf + cur_end,
- can_die, single, &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG64_STATE_MASK);
- return MO_DEAD;
- }
- } else if (mode == STOP_AT_MATCH) {
- rv = runSheng64Sam(sh, q->cb, q->context, q->offset,
- &cached_accept_state, &cached_accept_id,
- cur_buf, cur_buf + cur_start,
- cur_buf + cur_end, can_die, single,
- &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG64_STATE_MASK);
- return rv;
- } else if (rv == MO_MATCHES_PENDING) {
- assert(q->cur);
- DEBUG_PRINTF("found a match, setting q location to %zd\n",
- scanned - cur_buf + 1);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location =
- scanned - cur_buf + 1; /* due to exiting early */
- *(u8 *)q->state = state;
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG64_STATE_MASK);
- return rv;
- }
- } else {
- assert(!"invalid scanning mode!");
- }
- assert(scanned == cur_buf + cur_end);
-
- cur_start = cur_end;
- }
-
- /* if we our queue event is past our end */
- if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
- DEBUG_PRINTF("current location past buffer end\n");
- DEBUG_PRINTF("setting q location to %llu\n", b_end);
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = b_end;
- *(u8 *)q->state = state;
- return MO_ALIVE;
- }
-
- /* crossing over into actual buffer */
- if (cur_start == 0) {
- DEBUG_PRINTF("positive location, scanning buffer\n");
- DEBUG_PRINTF("max offset: %lli\n", b_end);
- cur_buf = q->buffer;
- }
-
- /* continue scanning the same buffer */
- if (end != cur_end) {
- continue;
- }
-
- switch (q_cur_type(q)) {
- case MQE_END:
- *(u8 *)q->state = state;
- q->cur++;
- DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
- if (can_die) {
- return (state & SHENG64_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
- }
- return MO_ALIVE;
- case MQE_TOP:
- if (q->offset + cur_start == 0) {
- DEBUG_PRINTF("Anchored start, going to state %u\n",
- sh->anchored);
- state = sh->anchored;
- } else {
- u8 new_state = get_aux64(sh, state)->top;
- DEBUG_PRINTF("Top event %u->%u\n", state & SHENG64_STATE_MASK,
- new_state & SHENG64_STATE_MASK);
- state = new_state;
- }
- break;
- default:
- assert(!"invalid queue event");
- break;
- }
- q->cur++;
- }
-}
-
-char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context) {
- DEBUG_PRINTF("smallwrite Sheng64\n");
- assert(n->type == SHENG_NFA_64);
- const struct sheng64 *sh = getImplNfa(n);
- u8 state = sh->anchored;
- u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
- u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
- u8 cached_accept_state = 0;
- ReportID cached_accept_id = 0;
-
- /* scan and report all matches */
- int rv;
- s64a end = length;
- const u8 *scanned;
-
- rv = runSheng64Cb(sh, cb, context, offset, &cached_accept_state,
- &cached_accept_id, buffer, buffer, buffer + end, can_die,
- single, &scanned, &state);
- if (rv == MO_DEAD) {
- DEBUG_PRINTF("exiting in state %u\n",
- state & SHENG64_STATE_MASK);
- return MO_DEAD;
- }
-
- DEBUG_PRINTF("%u\n", state & SHENG64_STATE_MASK);
-
- const struct sstate_aux *aux = get_aux64(sh, state);
-
- if (aux->accept_eod) {
- DEBUG_PRINTF("Reporting EOD matches\n");
- fireReports64(sh, cb, context, state, end + offset,
- &cached_accept_state, &cached_accept_id, 1);
- }
-
- return state & SHENG64_STATE_DEAD ? MO_DEAD : MO_ALIVE;
-}
-
-char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end) {
- const struct sheng64 *sh = get_sheng64(n);
- char rv = runSheng64(sh, q, end, CALLBACK_OUTPUT);
- return rv;
-}
-
-char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end) {
- const struct sheng64 *sh = get_sheng64(n);
- char rv = runSheng64(sh, q, end, STOP_AT_MATCH);
- return rv;
-}
-
-char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report) {
- assert(q_cur_type(q) == MQE_START);
-
- const struct sheng64 *sh = get_sheng64(n);
- char rv = runSheng64(sh, q, 0 /* end */, NO_MATCHES);
-
- if (rv && nfaExecSheng64_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- }
- return rv;
-}
-
-char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
- const struct sheng64 *sh = get_sheng64(n);
- u8 s = *(const u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux64(sh, s);
-
- if (!aux->accept) {
- return 0;
- }
-
- return sheng64HasAccept(sh, aux, report);
-}
-
-char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q) {
- assert(n && q);
-
- const struct sheng64 *sh = get_sheng64(n);
- u8 s = *(const u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux64(sh, s);
- return !!aux->accept;
-}
-
-char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state,
- UNUSED const char *streamState, u64a offset,
- NfaCallback cb, void *ctxt) {
- assert(nfa);
-
- const struct sheng64 *sh = get_sheng64(nfa);
- u8 s = *(const u8 *)state;
- DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
-
- const struct sstate_aux *aux = get_aux64(sh, s);
-
- if (!aux->accept_eod) {
- return MO_CONTINUE_MATCHING;
- }
-
- return fireReports64(sh, cb, ctxt, s, offset, NULL, NULL, 1);
-}
-
-char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct sheng64 *sh = (const struct sheng64 *)getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u8 s = *(u8 *)q->state;
- const struct sstate_aux *aux = get_aux64(sh, s);
- u64a offset = q_cur_offset(q);
- u8 cached_state_id = 0;
- ReportID cached_report_id = 0;
- assert(q_cur_type(q) == MQE_START);
-
- if (aux->accept) {
- if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
- fireSingleReport(cb, ctxt, sh->report, offset);
- } else {
- fireReports64(sh, cb, ctxt, s, offset, &cached_state_id,
- &cached_report_id, 0);
- }
- }
-
- return 0;
-}
-
-char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct sheng64 *sh = get_sheng64(nfa);
- u8 *s = (u8 *)state;
- *s = offset ? sh->floating: sh->anchored;
- return !(*s & SHENG64_STATE_DEAD);
-}
-
-char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q) {
- assert(nfa->scratchStateSize == 1);
-
- /* starting in floating state */
- const struct sheng64 *sh = get_sheng64(nfa);
- *(u8 *)q->state = sh->floating;
- DEBUG_PRINTF("starting in floating state\n");
- return 0;
-}
-
-char nfaExecSheng64_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q, UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-#endif // end of HAVE_AVX512VBMI
+
+#if defined(HAVE_AVX512VBMI)
+// Sheng32
+static really_inline
+char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die,
+ u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
+ !!has_accel, !!single);
+ int rv;
+ /* scan and report all matches */
+ if (can_die) {
+ if (has_accel) {
+ rv = sheng32_4_coda(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ } else {
+ rv = sheng32_4_cod(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ rv = sheng32_cod(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ } else {
+ if (has_accel) {
+ rv = sheng32_4_coa(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ } else {
+ rv = sheng32_4_co(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ rv = sheng32_co(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ return MO_ALIVE;
+}
+
+static really_inline
+void runSheng32Nm(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
+ u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
+ !!has_accel, !!single);
+ /* just scan the buffer */
+ if (can_die) {
+ if (has_accel) {
+ sheng32_4_nmda(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ } else {
+ sheng32_4_nmd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ }
+ sheng32_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, *scanned, end, scanned);
+ } else {
+ sheng32_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, start, end, scanned);
+ sheng32_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, *scanned, end, scanned);
+ }
+}
+
+static really_inline
+char runSheng32Sam(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
+ u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
+ !!has_accel, !!single);
+ int rv;
+ /* scan until first match */
+ if (can_die) {
+ if (has_accel) {
+ rv = sheng32_4_samda(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ } else {
+ rv = sheng32_4_samd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+
+ rv = sheng32_samd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ } else {
+ if (has_accel) {
+ rv = sheng32_4_sama(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ } else {
+ rv = sheng32_4_sam(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+
+ rv = sheng32_sam(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+ return MO_ALIVE;
+}
+
+static never_inline
+char runSheng32(const struct sheng32 *sh, struct mq *q, s64a b_end,
+ enum MatchMode mode) {
+ u8 state = *(u8 *)q->state;
+ u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
+ u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
+ u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
+
+ u8 cached_accept_state = 0;
+ ReportID cached_accept_id = 0;
+
+ DEBUG_PRINTF("starting Sheng32 execution in state %u\n",
+ state & SHENG32_STATE_MASK);
+
+ if (q->report_current) {
+ DEBUG_PRINTF("reporting current pending matches\n");
+ assert(sh);
+
+ q->report_current = 0;
+
+ int rv;
+ if (single) {
+ rv = fireSingleReport(q->cb, q->context, sh->report,
+ q_cur_offset(q));
+ } else {
+ rv = fireReports32(sh, q->cb, q->context, state, q_cur_offset(q),
+ &cached_accept_state, &cached_accept_id, 0);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
+ return MO_DEAD;
+ }
+
+ DEBUG_PRINTF("proceeding with matching\n");
+ }
+
+ assert(q_cur_type(q) == MQE_START);
+ s64a start = q_cur_loc(q);
+
+ DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
+ mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
+ mode == NO_MATCHES ? "NO MATCHES" :
+ mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
+
+ DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
+ q_cur_type(q) == MQE_START ? "START" :
+ q_cur_type(q) == MQE_TOP ? "TOP" :
+ q_cur_type(q) == MQE_END ? "END" : "???");
+
+ const u8* cur_buf;
+ if (start < 0) {
+ DEBUG_PRINTF("negative location, scanning history\n");
+ DEBUG_PRINTF("min location: %zd\n", -q->hlength);
+ cur_buf = q->history + q->hlength;
+ } else {
+ DEBUG_PRINTF("positive location, scanning buffer\n");
+ DEBUG_PRINTF("max location: %lli\n", b_end);
+ cur_buf = q->buffer;
+ }
+
+ /* if we our queue event is past our end */
+ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
+ DEBUG_PRINTF("current location past buffer end\n");
+ DEBUG_PRINTF("setting q location to %llu\n", b_end);
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
+ q->items[q->cur].location = b_end;
+ return MO_ALIVE;
+ }
+
+ q->cur++;
+
+ s64a cur_start = start;
+
+ while (1) {
+ DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
+ q_cur_type(q) == MQE_START ? "START" :
+ q_cur_type(q) == MQE_TOP ? "TOP" :
+ q_cur_type(q) == MQE_END ? "END" : "???");
+ s64a end = q_cur_loc(q);
+ if (mode != NO_MATCHES) {
+ end = MIN(end, b_end);
+ }
+ assert(end <= (s64a) q->length);
+ s64a cur_end = end;
+
+ /* we may cross the border between history and current buffer */
+ if (cur_start < 0) {
+ cur_end = MIN(0, cur_end);
+ }
+
+ DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
+
+ /* don't scan zero length buffer */
+ if (cur_start != cur_end) {
+ const u8 * scanned = cur_buf;
+ char rv;
+
+ if (mode == NO_MATCHES) {
+ runSheng32Nm(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id, cur_buf,
+ cur_buf + cur_start, cur_buf + cur_end, can_die,
+ has_accel, single, &scanned, &state);
+ } else if (mode == CALLBACK_OUTPUT) {
+ rv = runSheng32Cb(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id,
+ cur_buf, cur_buf + cur_start, cur_buf + cur_end,
+ can_die, has_accel, single, &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG32_STATE_MASK);
+ return MO_DEAD;
+ }
+ } else if (mode == STOP_AT_MATCH) {
+ rv = runSheng32Sam(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id,
+ cur_buf, cur_buf + cur_start,
+ cur_buf + cur_end, can_die, has_accel, single,
+ &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG32_STATE_MASK);
+ return rv;
+ } else if (rv == MO_MATCHES_PENDING) {
+ assert(q->cur);
+ DEBUG_PRINTF("found a match, setting q location to %zd\n",
+ scanned - cur_buf + 1);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location =
+ scanned - cur_buf + 1; /* due to exiting early */
+ *(u8 *)q->state = state;
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG32_STATE_MASK);
+ return rv;
+ }
+ } else {
+ assert(!"invalid scanning mode!");
+ }
+ assert(scanned == cur_buf + cur_end);
+
+ cur_start = cur_end;
+ }
+
+ /* if we our queue event is past our end */
+ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
+ DEBUG_PRINTF("current location past buffer end\n");
+ DEBUG_PRINTF("setting q location to %llu\n", b_end);
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = b_end;
+ *(u8 *)q->state = state;
+ return MO_ALIVE;
+ }
+
+ /* crossing over into actual buffer */
+ if (cur_start == 0) {
+ DEBUG_PRINTF("positive location, scanning buffer\n");
+ DEBUG_PRINTF("max offset: %lli\n", b_end);
+ cur_buf = q->buffer;
+ }
+
+ /* continue scanning the same buffer */
+ if (end != cur_end) {
+ continue;
+ }
+
+ switch (q_cur_type(q)) {
+ case MQE_END:
+ *(u8 *)q->state = state;
+ q->cur++;
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK);
+ if (can_die) {
+ return (state & SHENG32_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
+ }
+ return MO_ALIVE;
+ case MQE_TOP:
+ if (q->offset + cur_start == 0) {
+ DEBUG_PRINTF("Anchored start, going to state %u\n",
+ sh->anchored);
+ state = sh->anchored;
+ } else {
+ u8 new_state = get_aux32(sh, state)->top;
+ DEBUG_PRINTF("Top event %u->%u\n", state & SHENG32_STATE_MASK,
+ new_state & SHENG32_STATE_MASK);
+ state = new_state;
+ }
+ break;
+ default:
+ assert(!"invalid queue event");
+ break;
+ }
+ q->cur++;
+ }
+}
+
+char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context) {
+ DEBUG_PRINTF("smallwrite Sheng32\n");
+ assert(n->type == SHENG_NFA_32);
+ const struct sheng32 *sh = getImplNfa(n);
+ u8 state = sh->anchored;
+ u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
+ u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
+ u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
+ u8 cached_accept_state = 0;
+ ReportID cached_accept_id = 0;
+
+ /* scan and report all matches */
+ int rv;
+ s64a end = length;
+ const u8 *scanned;
+
+ rv = runSheng32Cb(sh, cb, context, offset, &cached_accept_state,
+ &cached_accept_id, buffer, buffer, buffer + end, can_die,
+ has_accel, single, &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG32_STATE_MASK);
+ return MO_DEAD;
+ }
+
+ DEBUG_PRINTF("%u\n", state & SHENG32_STATE_MASK);
+
+ const struct sstate_aux *aux = get_aux32(sh, state);
+
+ if (aux->accept_eod) {
+ DEBUG_PRINTF("Reporting EOD matches\n");
+ fireReports32(sh, cb, context, state, end + offset,
+ &cached_accept_state, &cached_accept_id, 1);
+ }
+
+ return state & SHENG32_STATE_DEAD ? MO_DEAD : MO_ALIVE;
+}
+
+char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end) {
+ const struct sheng32 *sh = get_sheng32(n);
+ char rv = runSheng32(sh, q, end, CALLBACK_OUTPUT);
+ return rv;
+}
+
+char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ const struct sheng32 *sh = get_sheng32(n);
+ char rv = runSheng32(sh, q, end, STOP_AT_MATCH);
+ return rv;
+}
+
+char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ assert(q_cur_type(q) == MQE_START);
+
+ const struct sheng32 *sh = get_sheng32(n);
+ char rv = runSheng32(sh, q, 0 /* end */, NO_MATCHES);
+
+ if (rv && nfaExecSheng32_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ }
+ return rv;
+}
+
+char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct sheng32 *sh = get_sheng32(n);
+ u8 s = *(const u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux32(sh, s);
+
+ if (!aux->accept) {
+ return 0;
+ }
+
+ return sheng32HasAccept(sh, aux, report);
+}
+
+char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct sheng32 *sh = get_sheng32(n);
+ u8 s = *(const u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux32(sh, s);
+ return !!aux->accept;
+}
+
+char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback cb, void *ctxt) {
+ assert(nfa);
+
+ const struct sheng32 *sh = get_sheng32(nfa);
+ u8 s = *(const u8 *)state;
+ DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG32_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux32(sh, s);
+
+ if (!aux->accept_eod) {
+ return MO_CONTINUE_MATCHING;
+ }
+
+ return fireReports32(sh, cb, ctxt, s, offset, NULL, NULL, 1);
+}
+
+char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct sheng32 *sh = (const struct sheng32 *)getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u8 s = *(u8 *)q->state;
+ const struct sstate_aux *aux = get_aux32(sh, s);
+ u64a offset = q_cur_offset(q);
+ u8 cached_state_id = 0;
+ ReportID cached_report_id = 0;
+ assert(q_cur_type(q) == MQE_START);
+
+ if (aux->accept) {
+ if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
+ fireSingleReport(cb, ctxt, sh->report, offset);
+ } else {
+ fireReports32(sh, cb, ctxt, s, offset, &cached_state_id,
+ &cached_report_id, 0);
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct sheng32 *sh = get_sheng32(nfa);
+ u8 *s = (u8 *)state;
+ *s = offset ? sh->floating: sh->anchored;
+ return !(*s & SHENG32_STATE_DEAD);
+}
+
+char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 1);
+
+ /* starting in floating state */
+ const struct sheng32 *sh = get_sheng32(nfa);
+ *(u8 *)q->state = sh->floating;
+ DEBUG_PRINTF("starting in floating state\n");
+ return 0;
+}
+
+char nfaExecSheng32_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q, UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecSheng32_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+// Sheng64
+static really_inline
+char runSheng64Cb(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die,
+ u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
+ int rv;
+ /* scan and report all matches */
+ if (can_die) {
+ rv = sheng64_4_cod(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ rv = sheng64_cod(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ } else {
+ rv = sheng64_4_co(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ rv = sheng64_co(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ return MO_ALIVE;
+}
+
+static really_inline
+void runSheng64Nm(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die,
+ u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
+ /* just scan the buffer */
+ if (can_die) {
+ sheng64_4_nmd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ sheng64_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, *scanned, end, scanned);
+ } else {
+ sheng64_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, start, end, scanned);
+ sheng64_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
+ single, offset, cur_buf, *scanned, end, scanned);
+ }
+}
+
+static really_inline
+char runSheng64Sam(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
+ u64a offset, u8 *const cached_accept_state,
+ ReportID *const cached_accept_id, const u8 *cur_buf,
+ const u8 *start, const u8 *end, u8 can_die,
+ u8 single, const u8 **scanned, u8 *state) {
+ DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
+ (u64a)(end - start), offset);
+ DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
+ (s64a)(end - cur_buf));
+ DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single);
+ int rv;
+ /* scan until first match */
+ if (can_die) {
+ rv = sheng64_4_samd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+
+ rv = sheng64_samd(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ } else {
+ rv = sheng64_4_sam(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ start, end, scanned);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+
+ rv = sheng64_sam(state, cb, ctxt, sh, cached_accept_state,
+ cached_accept_id, single, offset, cur_buf,
+ *scanned, end, scanned);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ /* if we stopped before we expected, we found a match */
+ if (rv == MO_MATCHES_PENDING) {
+ return MO_MATCHES_PENDING;
+ }
+ return MO_ALIVE;
+}
+
+static never_inline
+char runSheng64(const struct sheng64 *sh, struct mq *q, s64a b_end,
+ enum MatchMode mode) {
+ u8 state = *(u8 *)q->state;
+ u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
+ u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
+
+ u8 cached_accept_state = 0;
+ ReportID cached_accept_id = 0;
+
+ DEBUG_PRINTF("starting Sheng64 execution in state %u\n",
+ state & SHENG64_STATE_MASK);
+
+ if (q->report_current) {
+ DEBUG_PRINTF("reporting current pending matches\n");
+ assert(sh);
+
+ q->report_current = 0;
+
+ int rv;
+ if (single) {
+ rv = fireSingleReport(q->cb, q->context, sh->report,
+ q_cur_offset(q));
+ } else {
+ rv = fireReports64(sh, q->cb, q->context, state, q_cur_offset(q),
+ &cached_accept_state, &cached_accept_id, 0);
+ }
+ if (rv == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
+ return MO_DEAD;
+ }
+
+ DEBUG_PRINTF("proceeding with matching\n");
+ }
+
+ assert(q_cur_type(q) == MQE_START);
+ s64a start = q_cur_loc(q);
+
+ DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
+ mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
+ mode == NO_MATCHES ? "NO MATCHES" :
+ mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
+
+ DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
+ q_cur_type(q) == MQE_START ? "START" :
+ q_cur_type(q) == MQE_TOP ? "TOP" :
+ q_cur_type(q) == MQE_END ? "END" : "???");
+
+ const u8* cur_buf;
+ if (start < 0) {
+ DEBUG_PRINTF("negative location, scanning history\n");
+ DEBUG_PRINTF("min location: %zd\n", -q->hlength);
+ cur_buf = q->history + q->hlength;
+ } else {
+ DEBUG_PRINTF("positive location, scanning buffer\n");
+ DEBUG_PRINTF("max location: %lli\n", b_end);
+ cur_buf = q->buffer;
+ }
+
+ /* if we our queue event is past our end */
+ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
+ DEBUG_PRINTF("current location past buffer end\n");
+ DEBUG_PRINTF("setting q location to %llu\n", b_end);
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
+ q->items[q->cur].location = b_end;
+ return MO_ALIVE;
+ }
+
+ q->cur++;
+
+ s64a cur_start = start;
+
+ while (1) {
+ DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
+ q_cur_type(q) == MQE_START ? "START" :
+ q_cur_type(q) == MQE_TOP ? "TOP" :
+ q_cur_type(q) == MQE_END ? "END" : "???");
+ s64a end = q_cur_loc(q);
+ if (mode != NO_MATCHES) {
+ end = MIN(end, b_end);
+ }
+ assert(end <= (s64a) q->length);
+ s64a cur_end = end;
+
+ /* we may cross the border between history and current buffer */
+ if (cur_start < 0) {
+ cur_end = MIN(0, cur_end);
+ }
+
+ DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
+
+ /* don't scan zero length buffer */
+ if (cur_start != cur_end) {
+ const u8 * scanned = cur_buf;
+ char rv;
+
+ if (mode == NO_MATCHES) {
+ runSheng64Nm(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id, cur_buf,
+ cur_buf + cur_start, cur_buf + cur_end, can_die,
+ single, &scanned, &state);
+ } else if (mode == CALLBACK_OUTPUT) {
+ rv = runSheng64Cb(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id,
+ cur_buf, cur_buf + cur_start, cur_buf + cur_end,
+ can_die, single, &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG64_STATE_MASK);
+ return MO_DEAD;
+ }
+ } else if (mode == STOP_AT_MATCH) {
+ rv = runSheng64Sam(sh, q->cb, q->context, q->offset,
+ &cached_accept_state, &cached_accept_id,
+ cur_buf, cur_buf + cur_start,
+ cur_buf + cur_end, can_die, single,
+ &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG64_STATE_MASK);
+ return rv;
+ } else if (rv == MO_MATCHES_PENDING) {
+ assert(q->cur);
+ DEBUG_PRINTF("found a match, setting q location to %zd\n",
+ scanned - cur_buf + 1);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location =
+ scanned - cur_buf + 1; /* due to exiting early */
+ *(u8 *)q->state = state;
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG64_STATE_MASK);
+ return rv;
+ }
+ } else {
+ assert(!"invalid scanning mode!");
+ }
+ assert(scanned == cur_buf + cur_end);
+
+ cur_start = cur_end;
+ }
+
+ /* if we our queue event is past our end */
+ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
+ DEBUG_PRINTF("current location past buffer end\n");
+ DEBUG_PRINTF("setting q location to %llu\n", b_end);
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = b_end;
+ *(u8 *)q->state = state;
+ return MO_ALIVE;
+ }
+
+ /* crossing over into actual buffer */
+ if (cur_start == 0) {
+ DEBUG_PRINTF("positive location, scanning buffer\n");
+ DEBUG_PRINTF("max offset: %lli\n", b_end);
+ cur_buf = q->buffer;
+ }
+
+ /* continue scanning the same buffer */
+ if (end != cur_end) {
+ continue;
+ }
+
+ switch (q_cur_type(q)) {
+ case MQE_END:
+ *(u8 *)q->state = state;
+ q->cur++;
+ DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK);
+ if (can_die) {
+ return (state & SHENG64_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
+ }
+ return MO_ALIVE;
+ case MQE_TOP:
+ if (q->offset + cur_start == 0) {
+ DEBUG_PRINTF("Anchored start, going to state %u\n",
+ sh->anchored);
+ state = sh->anchored;
+ } else {
+ u8 new_state = get_aux64(sh, state)->top;
+ DEBUG_PRINTF("Top event %u->%u\n", state & SHENG64_STATE_MASK,
+ new_state & SHENG64_STATE_MASK);
+ state = new_state;
+ }
+ break;
+ default:
+ assert(!"invalid queue event");
+ break;
+ }
+ q->cur++;
+ }
+}
+
+char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context) {
+ DEBUG_PRINTF("smallwrite Sheng64\n");
+ assert(n->type == SHENG_NFA_64);
+ const struct sheng64 *sh = getImplNfa(n);
+ u8 state = sh->anchored;
+ u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
+ u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
+ u8 cached_accept_state = 0;
+ ReportID cached_accept_id = 0;
+
+ /* scan and report all matches */
+ int rv;
+ s64a end = length;
+ const u8 *scanned;
+
+ rv = runSheng64Cb(sh, cb, context, offset, &cached_accept_state,
+ &cached_accept_id, buffer, buffer, buffer + end, can_die,
+ single, &scanned, &state);
+ if (rv == MO_DEAD) {
+ DEBUG_PRINTF("exiting in state %u\n",
+ state & SHENG64_STATE_MASK);
+ return MO_DEAD;
+ }
+
+ DEBUG_PRINTF("%u\n", state & SHENG64_STATE_MASK);
+
+ const struct sstate_aux *aux = get_aux64(sh, state);
+
+ if (aux->accept_eod) {
+ DEBUG_PRINTF("Reporting EOD matches\n");
+ fireReports64(sh, cb, context, state, end + offset,
+ &cached_accept_state, &cached_accept_id, 1);
+ }
+
+ return state & SHENG64_STATE_DEAD ? MO_DEAD : MO_ALIVE;
+}
+
+char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end) {
+ const struct sheng64 *sh = get_sheng64(n);
+ char rv = runSheng64(sh, q, end, CALLBACK_OUTPUT);
+ return rv;
+}
+
+char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ const struct sheng64 *sh = get_sheng64(n);
+ char rv = runSheng64(sh, q, end, STOP_AT_MATCH);
+ return rv;
+}
+
+char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ assert(q_cur_type(q) == MQE_START);
+
+ const struct sheng64 *sh = get_sheng64(n);
+ char rv = runSheng64(sh, q, 0 /* end */, NO_MATCHES);
+
+ if (rv && nfaExecSheng64_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ }
+ return rv;
+}
+
+char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct sheng64 *sh = get_sheng64(n);
+ u8 s = *(const u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux64(sh, s);
+
+ if (!aux->accept) {
+ return 0;
+ }
+
+ return sheng64HasAccept(sh, aux, report);
+}
+
+char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct sheng64 *sh = get_sheng64(n);
+ u8 s = *(const u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux64(sh, s);
+ return !!aux->accept;
+}
+
+char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback cb, void *ctxt) {
+ assert(nfa);
+
+ const struct sheng64 *sh = get_sheng64(nfa);
+ u8 s = *(const u8 *)state;
+ DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG64_STATE_MASK));
+
+ const struct sstate_aux *aux = get_aux64(sh, s);
+
+ if (!aux->accept_eod) {
+ return MO_CONTINUE_MATCHING;
+ }
+
+ return fireReports64(sh, cb, ctxt, s, offset, NULL, NULL, 1);
+}
+
+char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct sheng64 *sh = (const struct sheng64 *)getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u8 s = *(u8 *)q->state;
+ const struct sstate_aux *aux = get_aux64(sh, s);
+ u64a offset = q_cur_offset(q);
+ u8 cached_state_id = 0;
+ ReportID cached_report_id = 0;
+ assert(q_cur_type(q) == MQE_START);
+
+ if (aux->accept) {
+ if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
+ fireSingleReport(cb, ctxt, sh->report, offset);
+ } else {
+ fireReports64(sh, cb, ctxt, s, offset, &cached_state_id,
+ &cached_report_id, 0);
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct sheng64 *sh = get_sheng64(nfa);
+ u8 *s = (u8 *)state;
+ *s = offset ? sh->floating: sh->anchored;
+ return !(*s & SHENG64_STATE_DEAD);
+}
+
+char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 1);
+
+ /* starting in floating state */
+ const struct sheng64 *sh = get_sheng64(nfa);
+ *(u8 *)q->state = sh->floating;
+ DEBUG_PRINTF("starting in floating state\n");
+ return 0;
+}
+
+char nfaExecSheng64_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q, UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+#endif // end of HAVE_AVX512VBMI
diff --git a/contrib/libs/hyperscan/src/nfa/sheng.h b/contrib/libs/hyperscan/src/nfa/sheng.h
index 6111c6dec5..7b90e3034f 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng.h
+++ b/contrib/libs/hyperscan/src/nfa/sheng.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -58,86 +58,86 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
-#if defined(HAVE_AVX512VBMI)
-#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
-#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
-char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q);
-char nfaExecSheng32_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecSheng32_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, u8 key);
-char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
- NfaCallback callback, void *context);
-char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q);
-
-char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context);
-
-#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
-#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
-char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q);
-char nfaExecSheng64_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecSheng64_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, u8 key);
-char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
- NfaCallback callback, void *context);
-char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
-
-char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context);
-
-#else // !HAVE_AVX512VBMI
-
-#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
-#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#define nfaExecSheng32_Q NFA_API_NO_IMPL
-#define nfaExecSheng32_Q2 NFA_API_NO_IMPL
-#define nfaExecSheng32_QR NFA_API_NO_IMPL
-#define nfaExecSheng32_inAccept NFA_API_NO_IMPL
-#define nfaExecSheng32_inAnyAccept NFA_API_NO_IMPL
-#define nfaExecSheng32_queueInitState NFA_API_NO_IMPL
-#define nfaExecSheng32_queueCompressState NFA_API_NO_IMPL
-#define nfaExecSheng32_expandState NFA_API_NO_IMPL
-#define nfaExecSheng32_initCompressedState NFA_API_NO_IMPL
-#define nfaExecSheng32_testEOD NFA_API_NO_IMPL
-#define nfaExecSheng32_reportCurrent NFA_API_NO_IMPL
-#define nfaExecSheng32_B NFA_API_NO_IMPL
-
-#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
-#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
-#define nfaExecSheng64_Q NFA_API_NO_IMPL
-#define nfaExecSheng64_Q2 NFA_API_NO_IMPL
-#define nfaExecSheng64_QR NFA_API_NO_IMPL
-#define nfaExecSheng64_inAccept NFA_API_NO_IMPL
-#define nfaExecSheng64_inAnyAccept NFA_API_NO_IMPL
-#define nfaExecSheng64_queueInitState NFA_API_NO_IMPL
-#define nfaExecSheng64_queueCompressState NFA_API_NO_IMPL
-#define nfaExecSheng64_expandState NFA_API_NO_IMPL
-#define nfaExecSheng64_initCompressedState NFA_API_NO_IMPL
-#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
-#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
-#define nfaExecSheng64_B NFA_API_NO_IMPL
-#endif // end of HAVE_AVX512VBMI
-
+#if defined(HAVE_AVX512VBMI)
+#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
+#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q);
+char nfaExecSheng32_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecSheng32_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, u8 key);
+char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q);
+
+char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context);
+
+#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
+#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q);
+char nfaExecSheng64_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecSheng64_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, u8 key);
+char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
+
+char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context);
+
+#else // !HAVE_AVX512VBMI
+
+#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
+#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
+#define nfaExecSheng32_Q NFA_API_NO_IMPL
+#define nfaExecSheng32_Q2 NFA_API_NO_IMPL
+#define nfaExecSheng32_QR NFA_API_NO_IMPL
+#define nfaExecSheng32_inAccept NFA_API_NO_IMPL
+#define nfaExecSheng32_inAnyAccept NFA_API_NO_IMPL
+#define nfaExecSheng32_queueInitState NFA_API_NO_IMPL
+#define nfaExecSheng32_queueCompressState NFA_API_NO_IMPL
+#define nfaExecSheng32_expandState NFA_API_NO_IMPL
+#define nfaExecSheng32_initCompressedState NFA_API_NO_IMPL
+#define nfaExecSheng32_testEOD NFA_API_NO_IMPL
+#define nfaExecSheng32_reportCurrent NFA_API_NO_IMPL
+#define nfaExecSheng32_B NFA_API_NO_IMPL
+
+#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
+#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
+#define nfaExecSheng64_Q NFA_API_NO_IMPL
+#define nfaExecSheng64_Q2 NFA_API_NO_IMPL
+#define nfaExecSheng64_QR NFA_API_NO_IMPL
+#define nfaExecSheng64_inAccept NFA_API_NO_IMPL
+#define nfaExecSheng64_inAnyAccept NFA_API_NO_IMPL
+#define nfaExecSheng64_queueInitState NFA_API_NO_IMPL
+#define nfaExecSheng64_queueCompressState NFA_API_NO_IMPL
+#define nfaExecSheng64_expandState NFA_API_NO_IMPL
+#define nfaExecSheng64_initCompressedState NFA_API_NO_IMPL
+#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
+#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
+#define nfaExecSheng64_B NFA_API_NO_IMPL
+#endif // end of HAVE_AVX512VBMI
+
#endif /* SHENG_H_ */
diff --git a/contrib/libs/hyperscan/src/nfa/sheng_defs.h b/contrib/libs/hyperscan/src/nfa/sheng_defs.h
index d14018829c..390af75221 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng_defs.h
+++ b/contrib/libs/hyperscan/src/nfa/sheng_defs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -52,43 +52,43 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
-#if defined(HAVE_AVX512VBMI)
-static really_inline
-u8 isDeadState32(const u8 a) {
- return a & SHENG32_STATE_DEAD;
-}
-
-static really_inline
-u8 isAcceptState32(const u8 a) {
- return a & SHENG32_STATE_ACCEPT;
-}
-
-static really_inline
-u8 isAccelState32(const u8 a) {
- return a & SHENG32_STATE_ACCEL;
-}
-
-static really_inline
-u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) {
- return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK);
-}
-
-static really_inline
-u8 isDeadState64(const u8 a) {
- return a & SHENG64_STATE_DEAD;
-}
-
-static really_inline
-u8 isAcceptState64(const u8 a) {
- return a & SHENG64_STATE_ACCEPT;
-}
-
-static really_inline
-u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) {
- return (a | b | c | d) & (SHENG64_STATE_FLAG_MASK);
-}
-#endif
-
+#if defined(HAVE_AVX512VBMI)
+static really_inline
+u8 isDeadState32(const u8 a) {
+ return a & SHENG32_STATE_DEAD;
+}
+
+static really_inline
+u8 isAcceptState32(const u8 a) {
+ return a & SHENG32_STATE_ACCEPT;
+}
+
+static really_inline
+u8 isAccelState32(const u8 a) {
+ return a & SHENG32_STATE_ACCEL;
+}
+
+static really_inline
+u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) {
+ return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK);
+}
+
+static really_inline
+u8 isDeadState64(const u8 a) {
+ return a & SHENG64_STATE_DEAD;
+}
+
+static really_inline
+u8 isAcceptState64(const u8 a) {
+ return a & SHENG64_STATE_ACCEPT;
+}
+
+static really_inline
+u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) {
+ return (a | b | c | d) & (SHENG64_STATE_FLAG_MASK);
+}
+#endif
+
/* these functions should be optimized out, used by NO_MATCHES mode */
static really_inline
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
@@ -108,162 +108,162 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_cod
-#define DEAD_FUNC32 isDeadState32
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_cod
-#define DEAD_FUNC64 isDeadState64
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_cod
+#define DEAD_FUNC32 isDeadState32
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_cod
+#define DEAD_FUNC64 isDeadState64
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* callback output, can't die */
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_co
-#define DEAD_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_co
-#define DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_co
+#define DEAD_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_co
+#define DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* stop at match, can die */
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_samd
-#define DEAD_FUNC32 isDeadState32
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_samd
-#define DEAD_FUNC64 isDeadState64
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_samd
+#define DEAD_FUNC32 isDeadState32
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_samd
+#define DEAD_FUNC64 isDeadState64
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* stop at match, can't die */
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_sam
-#define DEAD_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_sam
-#define DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_sam
+#define DEAD_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_sam
+#define DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* no match, can die */
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_nmd
-#define DEAD_FUNC32 isDeadState32
-#define ACCEPT_FUNC32 dummyFunc
-#define SHENG64_IMPL sheng64_nmd
-#define DEAD_FUNC64 isDeadState64
-#define ACCEPT_FUNC64 dummyFunc
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_nmd
+#define DEAD_FUNC32 isDeadState32
+#define ACCEPT_FUNC32 dummyFunc
+#define SHENG64_IMPL sheng64_nmd
+#define DEAD_FUNC64 isDeadState64
+#define ACCEPT_FUNC64 dummyFunc
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* no match, can't die */
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_nm
-#define DEAD_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 dummyFunc
-#define SHENG64_IMPL sheng64_nm
-#define DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 dummyFunc
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_nm
+#define DEAD_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 dummyFunc
+#define SHENG64_IMPL sheng64_nm
+#define DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 dummyFunc
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef DEAD_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef DEAD_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/*
@@ -277,16 +277,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_coda
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 isDeadState32
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 isAccelState32
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_coda
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 isDeadState32
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 isAccelState32
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define NO_SHENG64_IMPL
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -296,16 +296,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef NO_SHENG64_IMPL
+#endif
#undef STOP_AT_MATCH
/* callback output, can die, not accelerated */
@@ -316,20 +316,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_cod
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 isDeadState32
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_4_cod
-#define INTERESTING_FUNC64 hasInterestingStates64
-#define INNER_DEAD_FUNC64 isDeadState64
-#define OUTER_DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_cod
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 isDeadState32
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_4_cod
+#define INTERESTING_FUNC64 hasInterestingStates64
+#define INNER_DEAD_FUNC64 isDeadState64
+#define OUTER_DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -339,20 +339,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* callback output, can't die, accelerated */
@@ -363,16 +363,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_coa
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 isAccelState32
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_coa
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 isAccelState32
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define NO_SHENG64_IMPL
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -382,16 +382,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef NO_SHENG64_IMPL
+#endif
#undef STOP_AT_MATCH
/* callback output, can't die, not accelerated */
@@ -402,20 +402,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_co
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_4_co
-#define INTERESTING_FUNC64 hasInterestingStates64
-#define INNER_DEAD_FUNC64 dummyFunc
-#define OUTER_DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_co
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_4_co
+#define INTERESTING_FUNC64 hasInterestingStates64
+#define INNER_DEAD_FUNC64 dummyFunc
+#define OUTER_DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -425,20 +425,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* stop at match, can die, accelerated */
@@ -449,16 +449,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_samda
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 isDeadState32
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 isAccelState32
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_samda
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 isDeadState32
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 isAccelState32
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define NO_SHENG64_IMPL
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -468,16 +468,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef NO_SHENG64_IMPL
+#endif
#undef STOP_AT_MATCH
/* stop at match, can die, not accelerated */
@@ -488,20 +488,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_samd
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 isDeadState32
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_4_samd
-#define INTERESTING_FUNC64 hasInterestingStates64
-#define INNER_DEAD_FUNC64 isDeadState64
-#define OUTER_DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_samd
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 isDeadState32
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_4_samd
+#define INTERESTING_FUNC64 hasInterestingStates64
+#define INNER_DEAD_FUNC64 isDeadState64
+#define OUTER_DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -511,20 +511,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* stop at match, can't die, accelerated */
@@ -535,16 +535,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_sama
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 isAccelState32
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_sama
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 isAccelState32
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define NO_SHENG64_IMPL
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -554,16 +554,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef NO_SHENG64_IMPL
+#endif
#undef STOP_AT_MATCH
/* stop at match, can't die, not accelerated */
@@ -574,20 +574,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_sam
-#define INTERESTING_FUNC32 hasInterestingStates32
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 isAcceptState32
-#define SHENG64_IMPL sheng64_4_sam
-#define INTERESTING_FUNC64 hasInterestingStates64
-#define INNER_DEAD_FUNC64 dummyFunc
-#define OUTER_DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 isAcceptState64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_sam
+#define INTERESTING_FUNC32 hasInterestingStates32
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 isAcceptState32
+#define SHENG64_IMPL sheng64_4_sam
+#define INTERESTING_FUNC64 hasInterestingStates64
+#define INNER_DEAD_FUNC64 dummyFunc
+#define OUTER_DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 isAcceptState64
+#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -597,20 +597,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* no-match have interesting func as dummy, and die/accel checks are outer */
@@ -623,16 +623,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_nmda
-#define INTERESTING_FUNC32 dummyFunc4
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 isDeadState32
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 isAccelState32
-#define ACCEPT_FUNC32 dummyFunc
-#define NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_nmda
+#define INTERESTING_FUNC32 dummyFunc4
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 isDeadState32
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 isAccelState32
+#define ACCEPT_FUNC32 dummyFunc
+#define NO_SHENG64_IMPL
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -642,16 +642,16 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef NO_SHENG64_IMPL
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef NO_SHENG64_IMPL
+#endif
#undef STOP_AT_MATCH
/* no match, can die, not accelerated */
@@ -662,20 +662,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_nmd
-#define INTERESTING_FUNC32 dummyFunc4
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 isDeadState32
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 dummyFunc
-#define SHENG64_IMPL sheng64_4_nmd
-#define INTERESTING_FUNC64 dummyFunc4
-#define INNER_DEAD_FUNC64 dummyFunc
-#define OUTER_DEAD_FUNC64 isDeadState64
-#define ACCEPT_FUNC64 dummyFunc
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_nmd
+#define INTERESTING_FUNC32 dummyFunc4
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 isDeadState32
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 dummyFunc
+#define SHENG64_IMPL sheng64_4_nmd
+#define INTERESTING_FUNC64 dummyFunc4
+#define INNER_DEAD_FUNC64 dummyFunc
+#define OUTER_DEAD_FUNC64 isDeadState64
+#define ACCEPT_FUNC64 dummyFunc
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -685,20 +685,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
/* there is no performance benefit in accelerating a no-match case that can't
@@ -712,20 +712,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
-#define SHENG32_IMPL sheng32_4_nm
-#define INTERESTING_FUNC32 dummyFunc4
-#define INNER_DEAD_FUNC32 dummyFunc
-#define OUTER_DEAD_FUNC32 dummyFunc
-#define INNER_ACCEL_FUNC32 dummyFunc
-#define OUTER_ACCEL_FUNC32 dummyFunc
-#define ACCEPT_FUNC32 dummyFunc
-#define SHENG64_IMPL sheng64_4_nm
-#define INTERESTING_FUNC64 dummyFunc4
-#define INNER_DEAD_FUNC64 dummyFunc
-#define OUTER_DEAD_FUNC64 dummyFunc
-#define ACCEPT_FUNC64 dummyFunc
-#endif
+#if defined(HAVE_AVX512VBMI)
+#define SHENG32_IMPL sheng32_4_nm
+#define INTERESTING_FUNC32 dummyFunc4
+#define INNER_DEAD_FUNC32 dummyFunc
+#define OUTER_DEAD_FUNC32 dummyFunc
+#define INNER_ACCEL_FUNC32 dummyFunc
+#define OUTER_ACCEL_FUNC32 dummyFunc
+#define ACCEPT_FUNC32 dummyFunc
+#define SHENG64_IMPL sheng64_4_nm
+#define INTERESTING_FUNC64 dummyFunc4
+#define INNER_DEAD_FUNC64 dummyFunc
+#define OUTER_DEAD_FUNC64 dummyFunc
+#define ACCEPT_FUNC64 dummyFunc
+#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -735,20 +735,20 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
-#undef SHENG32_IMPL
-#undef INTERESTING_FUNC32
-#undef INNER_DEAD_FUNC32
-#undef OUTER_DEAD_FUNC32
-#undef INNER_ACCEL_FUNC32
-#undef OUTER_ACCEL_FUNC32
-#undef ACCEPT_FUNC32
-#undef SHENG64_IMPL
-#undef INTERESTING_FUNC64
-#undef INNER_DEAD_FUNC64
-#undef OUTER_DEAD_FUNC64
-#undef ACCEPT_FUNC64
-#endif
+#if defined(HAVE_AVX512VBMI)
+#undef SHENG32_IMPL
+#undef INTERESTING_FUNC32
+#undef INNER_DEAD_FUNC32
+#undef OUTER_DEAD_FUNC32
+#undef INNER_ACCEL_FUNC32
+#undef OUTER_ACCEL_FUNC32
+#undef ACCEPT_FUNC32
+#undef SHENG64_IMPL
+#undef INTERESTING_FUNC64
+#undef INNER_DEAD_FUNC64
+#undef OUTER_DEAD_FUNC64
+#undef ACCEPT_FUNC64
+#endif
#undef STOP_AT_MATCH
#endif // SHENG_DEFS_H
diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl.h b/contrib/libs/hyperscan/src/nfa/sheng_impl.h
index 924296699f..fb8ee16834 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/sheng_impl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -95,127 +95,127 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
-
-#if defined(HAVE_AVX512VBMI)
-static really_inline
-char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
- const struct sheng32 *s,
- u8 *const cached_accept_state,
- ReportID *const cached_accept_id,
- u8 single, u64a base_offset, const u8 *buf, const u8 *start,
- const u8 *end, const u8 **scan_end) {
- DEBUG_PRINTF("Starting DFA execution in state %u\n",
- *state & SHENG32_STATE_MASK);
- const u8 *cur_buf = start;
- if (DEAD_FUNC32(*state)) {
- DEBUG_PRINTF("Dead on arrival\n");
- *scan_end = end;
- return MO_CONTINUE_MATCHING;
- }
- DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
-
- m512 cur_state = set64x8(*state);
- const m512 *masks = s->succ_masks;
-
- while (likely(cur_buf != end)) {
- const u8 c = *cur_buf;
- const m512 succ_mask = masks[c];
- cur_state = vpermb512(cur_state, succ_mask);
- const u8 tmp = movd512(cur_state);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
- tmp & SHENG32_STATE_FLAG_MASK);
-
- if (unlikely(ACCEPT_FUNC32(tmp))) {
- DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK);
- u64a match_offset = base_offset + (cur_buf - buf) + 1;
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (u64a)(cur_buf - start));
- *state = tmp;
- *scan_end = cur_buf;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports32(s, cb, ctxt, tmp, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- cur_buf++;
- }
- *state = movd512(cur_state);
- *scan_end = cur_buf;
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
- const struct sheng64 *s,
- u8 *const cached_accept_state,
- ReportID *const cached_accept_id,
- u8 single, u64a base_offset, const u8 *buf, const u8 *start,
- const u8 *end, const u8 **scan_end) {
- DEBUG_PRINTF("Starting DFA execution in state %u\n",
- *state & SHENG64_STATE_MASK);
- const u8 *cur_buf = start;
- if (DEAD_FUNC64(*state)) {
- DEBUG_PRINTF("Dead on arrival\n");
- *scan_end = end;
- return MO_CONTINUE_MATCHING;
- }
- DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
-
- m512 cur_state = set64x8(*state);
- const m512 *masks = s->succ_masks;
-
- while (likely(cur_buf != end)) {
- const u8 c = *cur_buf;
- const m512 succ_mask = masks[c];
- cur_state = vpermb512(cur_state, succ_mask);
- const u8 tmp = movd512(cur_state);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
- tmp & SHENG64_STATE_FLAG_MASK);
-
- if (unlikely(ACCEPT_FUNC64(tmp))) {
- DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK);
- u64a match_offset = base_offset + (cur_buf - buf) + 1;
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (u64a)(cur_buf - start));
- *state = tmp;
- *scan_end = cur_buf;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports64(s, cb, ctxt, tmp, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- cur_buf++;
- }
- *state = movd512(cur_state);
- *scan_end = cur_buf;
- return MO_CONTINUE_MATCHING;
-}
-#endif
+
+#if defined(HAVE_AVX512VBMI)
+static really_inline
+char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
+ const struct sheng32 *s,
+ u8 *const cached_accept_state,
+ ReportID *const cached_accept_id,
+ u8 single, u64a base_offset, const u8 *buf, const u8 *start,
+ const u8 *end, const u8 **scan_end) {
+ DEBUG_PRINTF("Starting DFA execution in state %u\n",
+ *state & SHENG32_STATE_MASK);
+ const u8 *cur_buf = start;
+ if (DEAD_FUNC32(*state)) {
+ DEBUG_PRINTF("Dead on arrival\n");
+ *scan_end = end;
+ return MO_CONTINUE_MATCHING;
+ }
+ DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
+
+ m512 cur_state = set64x8(*state);
+ const m512 *masks = s->succ_masks;
+
+ while (likely(cur_buf != end)) {
+ const u8 c = *cur_buf;
+ const m512 succ_mask = masks[c];
+ cur_state = vpermb512(cur_state, succ_mask);
+ const u8 tmp = movd512(cur_state);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
+ tmp & SHENG32_STATE_FLAG_MASK);
+
+ if (unlikely(ACCEPT_FUNC32(tmp))) {
+ DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK);
+ u64a match_offset = base_offset + (cur_buf - buf) + 1;
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (u64a)(cur_buf - start));
+ *state = tmp;
+ *scan_end = cur_buf;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports32(s, cb, ctxt, tmp, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ cur_buf++;
+ }
+ *state = movd512(cur_state);
+ *scan_end = cur_buf;
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
+ const struct sheng64 *s,
+ u8 *const cached_accept_state,
+ ReportID *const cached_accept_id,
+ u8 single, u64a base_offset, const u8 *buf, const u8 *start,
+ const u8 *end, const u8 **scan_end) {
+ DEBUG_PRINTF("Starting DFA execution in state %u\n",
+ *state & SHENG64_STATE_MASK);
+ const u8 *cur_buf = start;
+ if (DEAD_FUNC64(*state)) {
+ DEBUG_PRINTF("Dead on arrival\n");
+ *scan_end = end;
+ return MO_CONTINUE_MATCHING;
+ }
+ DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
+
+ m512 cur_state = set64x8(*state);
+ const m512 *masks = s->succ_masks;
+
+ while (likely(cur_buf != end)) {
+ const u8 c = *cur_buf;
+ const m512 succ_mask = masks[c];
+ cur_state = vpermb512(cur_state, succ_mask);
+ const u8 tmp = movd512(cur_state);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
+ tmp & SHENG64_STATE_FLAG_MASK);
+
+ if (unlikely(ACCEPT_FUNC64(tmp))) {
+ DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK);
+ u64a match_offset = base_offset + (cur_buf - buf) + 1;
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (u64a)(cur_buf - start));
+ *state = tmp;
+ *scan_end = cur_buf;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports64(s, cb, ctxt, tmp, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ cur_buf++;
+ }
+ *state = movd512(cur_state);
+ *scan_end = cur_buf;
+ return MO_CONTINUE_MATCHING;
+}
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h
index e033cdadf0..440e7396e2 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h
+++ b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -282,430 +282,430 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
-
-#if defined(HAVE_AVX512VBMI)
-static really_inline
-char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
- const struct sheng32 *s,
- u8 *const cached_accept_state,
- ReportID *const cached_accept_id,
- u8 single, u64a base_offset, const u8 *buf, const u8 *start,
- const u8 *end, const u8 **scan_end) {
- DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
- *state & SHENG32_STATE_MASK);
- const u8 *cur_buf = start;
- const u8 *min_accel_dist = start;
- base_offset++;
- DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
-
- if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) {
- DEBUG_PRINTF("Accel state reached @ 0\n");
- const union AccelAux *aaux =
- get_accel32(s, *state & SHENG32_STATE_MASK);
- const u8 *new_offset = run_accel(aaux, cur_buf, end);
- if (new_offset < cur_buf + BAD_ACCEL_DIST) {
- min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
- } else {
- min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
- }
- DEBUG_PRINTF("Next accel chance: %llu\n",
- (u64a)(min_accel_dist - start));
- DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
- cur_buf = new_offset;
- DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
- }
- if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) {
- DEBUG_PRINTF("Dead on arrival\n");
- *scan_end = end;
- return MO_CONTINUE_MATCHING;
- }
-
- m512 cur_state = set64x8(*state);
- const m512 *masks = s->succ_masks;
-
- while (likely(end - cur_buf >= 4)) {
- const u8 *b1 = cur_buf;
- const u8 *b2 = cur_buf + 1;
- const u8 *b3 = cur_buf + 2;
- const u8 *b4 = cur_buf + 3;
- const u8 c1 = *b1;
- const u8 c2 = *b2;
- const u8 c3 = *b3;
- const u8 c4 = *b4;
-
- const m512 succ_mask1 = masks[c1];
- cur_state = vpermb512(cur_state, succ_mask1);
- const u8 a1 = movd512(cur_state);
-
- const m512 succ_mask2 = masks[c2];
- cur_state = vpermb512(cur_state, succ_mask2);
- const u8 a2 = movd512(cur_state);
-
- const m512 succ_mask3 = masks[c3];
- cur_state = vpermb512(cur_state, succ_mask3);
- const u8 a3 = movd512(cur_state);
-
- const m512 succ_mask4 = masks[c4];
- cur_state = vpermb512(cur_state, succ_mask4);
- const u8 a4 = movd512(cur_state);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
- a1 & SHENG32_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK,
- a2 & SHENG32_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK,
- a3 & SHENG32_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK,
- a4 & SHENG32_STATE_FLAG_MASK);
-
- if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) {
- if (ACCEPT_FUNC32(a1)) {
- u64a match_offset = base_offset + b1 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a1 & SHENG32_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b1 - start));
- *scan_end = b1;
- *state = a1;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports32(s, cb, ctxt, a1, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC32(a2)) {
- u64a match_offset = base_offset + b2 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a2 & SHENG32_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b2 - start));
- *scan_end = b2;
- *state = a2;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports32(s, cb, ctxt, a2, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC32(a3)) {
- u64a match_offset = base_offset + b3 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a3 & SHENG32_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b3 - start));
- *scan_end = b3;
- *state = a3;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports32(s, cb, ctxt, a3, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC32(a4)) {
- u64a match_offset = base_offset + b4 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a4 & SHENG32_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b4 - start));
- *scan_end = b4;
- *state = a4;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports32(s, cb, ctxt, a4, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (INNER_DEAD_FUNC32(a4)) {
- DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
- *scan_end = end;
- *state = a4;
- return MO_CONTINUE_MATCHING;
- }
- if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) {
- DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
- const union AccelAux *aaux =
- get_accel32(s, a4 & SHENG32_STATE_MASK);
- const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
- if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
- min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
- } else {
- min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
- }
- DEBUG_PRINTF("Next accel chance: %llu\n",
- (u64a)(min_accel_dist - start));
- DEBUG_PRINTF("Accel scanned %llu bytes\n",
- (u64a)(new_offset - cur_buf - 4));
- cur_buf = new_offset;
- DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
- continue;
- }
- }
- if (OUTER_DEAD_FUNC32(a4)) {
- DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
- *scan_end = end;
- *state = a4;
- return MO_CONTINUE_MATCHING;
- };
- if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) {
- DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
- const union AccelAux *aaux =
- get_accel32(s, a4 & SHENG32_STATE_MASK);
- const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
- if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
- min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
- } else {
- min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
- }
- DEBUG_PRINTF("Next accel chance: %llu\n",
- (u64a)(min_accel_dist - start));
- DEBUG_PRINTF("Accel scanned %llu bytes\n",
- (u64a)(new_offset - cur_buf - 4));
- cur_buf = new_offset;
- DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
- continue;
- };
- cur_buf += 4;
- }
- *state = movd512(cur_state);
- *scan_end = cur_buf;
- return MO_CONTINUE_MATCHING;
-}
-
-#ifndef NO_SHENG64_IMPL
-static really_inline
-char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
- const struct sheng64 *s,
- u8 *const cached_accept_state,
- ReportID *const cached_accept_id,
- u8 single, u64a base_offset, const u8 *buf, const u8 *start,
- const u8 *end, const u8 **scan_end) {
- DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
- *state & SHENG64_STATE_MASK);
- const u8 *cur_buf = start;
- base_offset++;
- DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
-
- if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) {
- DEBUG_PRINTF("Dead on arrival\n");
- *scan_end = end;
- return MO_CONTINUE_MATCHING;
- }
-
- m512 cur_state = set64x8(*state);
- const m512 *masks = s->succ_masks;
-
- while (likely(end - cur_buf >= 4)) {
- const u8 *b1 = cur_buf;
- const u8 *b2 = cur_buf + 1;
- const u8 *b3 = cur_buf + 2;
- const u8 *b4 = cur_buf + 3;
- const u8 c1 = *b1;
- const u8 c2 = *b2;
- const u8 c3 = *b3;
- const u8 c4 = *b4;
-
- const m512 succ_mask1 = masks[c1];
- cur_state = vpermb512(cur_state, succ_mask1);
- const u8 a1 = movd512(cur_state);
-
- const m512 succ_mask2 = masks[c2];
- cur_state = vpermb512(cur_state, succ_mask2);
- const u8 a2 = movd512(cur_state);
-
- const m512 succ_mask3 = masks[c3];
- cur_state = vpermb512(cur_state, succ_mask3);
- const u8 a3 = movd512(cur_state);
-
- const m512 succ_mask4 = masks[c4];
- cur_state = vpermb512(cur_state, succ_mask4);
- const u8 a4 = movd512(cur_state);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
- a1 & SHENG64_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK,
- a2 & SHENG64_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK,
- a3 & SHENG64_STATE_FLAG_MASK);
-
- DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
- DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK,
- a4 & SHENG64_STATE_FLAG_MASK);
-
- if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) {
- if (ACCEPT_FUNC64(a1)) {
- u64a match_offset = base_offset + b1 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a1 & SHENG64_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b1 - start));
- *scan_end = b1;
- *state = a1;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports64(s, cb, ctxt, a1, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC64(a2)) {
- u64a match_offset = base_offset + b2 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a2 & SHENG64_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b2 - start));
- *scan_end = b2;
- *state = a2;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports64(s, cb, ctxt, a2, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC64(a3)) {
- u64a match_offset = base_offset + b3 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a3 & SHENG64_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b3 - start));
- *scan_end = b3;
- *state = a3;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports64(s, cb, ctxt, a3, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (ACCEPT_FUNC64(a4)) {
- u64a match_offset = base_offset + b4 - buf;
- DEBUG_PRINTF("Accept state %u reached\n",
- a4 & SHENG64_STATE_MASK);
- DEBUG_PRINTF("Match @ %llu\n", match_offset);
- if (STOP_AT_MATCH) {
- DEBUG_PRINTF("Stopping at match @ %lli\n",
- (s64a)(b4 - start));
- *scan_end = b4;
- *state = a4;
- return MO_MATCHES_PENDING;
- }
- if (single) {
- if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
- MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else {
- if (fireReports64(s, cb, ctxt, a4, match_offset,
- cached_accept_state, cached_accept_id,
- 0) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (INNER_DEAD_FUNC64(a4)) {
- DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
- *scan_end = end;
- *state = a4;
- return MO_CONTINUE_MATCHING;
- }
- }
- if (OUTER_DEAD_FUNC64(a4)) {
- DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
- *scan_end = end;
- *state = a4;
- return MO_CONTINUE_MATCHING;
- }
- cur_buf += 4;
- }
- *state = movd512(cur_state);
- *scan_end = cur_buf;
- return MO_CONTINUE_MATCHING;
-}
-#endif // !NO_SHENG64_IMPL
-#endif
+
+#if defined(HAVE_AVX512VBMI)
+static really_inline
+char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
+ const struct sheng32 *s,
+ u8 *const cached_accept_state,
+ ReportID *const cached_accept_id,
+ u8 single, u64a base_offset, const u8 *buf, const u8 *start,
+ const u8 *end, const u8 **scan_end) {
+ DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
+ *state & SHENG32_STATE_MASK);
+ const u8 *cur_buf = start;
+ const u8 *min_accel_dist = start;
+ base_offset++;
+ DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
+
+ if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) {
+ DEBUG_PRINTF("Accel state reached @ 0\n");
+ const union AccelAux *aaux =
+ get_accel32(s, *state & SHENG32_STATE_MASK);
+ const u8 *new_offset = run_accel(aaux, cur_buf, end);
+ if (new_offset < cur_buf + BAD_ACCEL_DIST) {
+ min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
+ }
+ DEBUG_PRINTF("Next accel chance: %llu\n",
+ (u64a)(min_accel_dist - start));
+ DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
+ cur_buf = new_offset;
+ DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
+ }
+ if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) {
+ DEBUG_PRINTF("Dead on arrival\n");
+ *scan_end = end;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ m512 cur_state = set64x8(*state);
+ const m512 *masks = s->succ_masks;
+
+ while (likely(end - cur_buf >= 4)) {
+ const u8 *b1 = cur_buf;
+ const u8 *b2 = cur_buf + 1;
+ const u8 *b3 = cur_buf + 2;
+ const u8 *b4 = cur_buf + 3;
+ const u8 c1 = *b1;
+ const u8 c2 = *b2;
+ const u8 c3 = *b3;
+ const u8 c4 = *b4;
+
+ const m512 succ_mask1 = masks[c1];
+ cur_state = vpermb512(cur_state, succ_mask1);
+ const u8 a1 = movd512(cur_state);
+
+ const m512 succ_mask2 = masks[c2];
+ cur_state = vpermb512(cur_state, succ_mask2);
+ const u8 a2 = movd512(cur_state);
+
+ const m512 succ_mask3 = masks[c3];
+ cur_state = vpermb512(cur_state, succ_mask3);
+ const u8 a3 = movd512(cur_state);
+
+ const m512 succ_mask4 = masks[c4];
+ cur_state = vpermb512(cur_state, succ_mask4);
+ const u8 a4 = movd512(cur_state);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
+ a1 & SHENG32_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK,
+ a2 & SHENG32_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK,
+ a3 & SHENG32_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK,
+ a4 & SHENG32_STATE_FLAG_MASK);
+
+ if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) {
+ if (ACCEPT_FUNC32(a1)) {
+ u64a match_offset = base_offset + b1 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a1 & SHENG32_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b1 - start));
+ *scan_end = b1;
+ *state = a1;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports32(s, cb, ctxt, a1, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC32(a2)) {
+ u64a match_offset = base_offset + b2 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a2 & SHENG32_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b2 - start));
+ *scan_end = b2;
+ *state = a2;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports32(s, cb, ctxt, a2, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC32(a3)) {
+ u64a match_offset = base_offset + b3 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a3 & SHENG32_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b3 - start));
+ *scan_end = b3;
+ *state = a3;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports32(s, cb, ctxt, a3, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC32(a4)) {
+ u64a match_offset = base_offset + b4 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a4 & SHENG32_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b4 - start));
+ *scan_end = b4;
+ *state = a4;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports32(s, cb, ctxt, a4, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (INNER_DEAD_FUNC32(a4)) {
+ DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
+ *scan_end = end;
+ *state = a4;
+ return MO_CONTINUE_MATCHING;
+ }
+ if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) {
+ DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
+ const union AccelAux *aaux =
+ get_accel32(s, a4 & SHENG32_STATE_MASK);
+ const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
+ if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
+ min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
+ }
+ DEBUG_PRINTF("Next accel chance: %llu\n",
+ (u64a)(min_accel_dist - start));
+ DEBUG_PRINTF("Accel scanned %llu bytes\n",
+ (u64a)(new_offset - cur_buf - 4));
+ cur_buf = new_offset;
+ DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
+ continue;
+ }
+ }
+ if (OUTER_DEAD_FUNC32(a4)) {
+ DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
+ *scan_end = end;
+ *state = a4;
+ return MO_CONTINUE_MATCHING;
+ };
+ if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) {
+ DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
+ const union AccelAux *aaux =
+ get_accel32(s, a4 & SHENG32_STATE_MASK);
+ const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
+ if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
+ min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
+ }
+ DEBUG_PRINTF("Next accel chance: %llu\n",
+ (u64a)(min_accel_dist - start));
+ DEBUG_PRINTF("Accel scanned %llu bytes\n",
+ (u64a)(new_offset - cur_buf - 4));
+ cur_buf = new_offset;
+ DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
+ continue;
+ };
+ cur_buf += 4;
+ }
+ *state = movd512(cur_state);
+ *scan_end = cur_buf;
+ return MO_CONTINUE_MATCHING;
+}
+
+#ifndef NO_SHENG64_IMPL
+static really_inline
+char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
+ const struct sheng64 *s,
+ u8 *const cached_accept_state,
+ ReportID *const cached_accept_id,
+ u8 single, u64a base_offset, const u8 *buf, const u8 *start,
+ const u8 *end, const u8 **scan_end) {
+ DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
+ *state & SHENG64_STATE_MASK);
+ const u8 *cur_buf = start;
+ base_offset++;
+ DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
+
+ if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) {
+ DEBUG_PRINTF("Dead on arrival\n");
+ *scan_end = end;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ m512 cur_state = set64x8(*state);
+ const m512 *masks = s->succ_masks;
+
+ while (likely(end - cur_buf >= 4)) {
+ const u8 *b1 = cur_buf;
+ const u8 *b2 = cur_buf + 1;
+ const u8 *b3 = cur_buf + 2;
+ const u8 *b4 = cur_buf + 3;
+ const u8 c1 = *b1;
+ const u8 c2 = *b2;
+ const u8 c3 = *b3;
+ const u8 c4 = *b4;
+
+ const m512 succ_mask1 = masks[c1];
+ cur_state = vpermb512(cur_state, succ_mask1);
+ const u8 a1 = movd512(cur_state);
+
+ const m512 succ_mask2 = masks[c2];
+ cur_state = vpermb512(cur_state, succ_mask2);
+ const u8 a2 = movd512(cur_state);
+
+ const m512 succ_mask3 = masks[c3];
+ cur_state = vpermb512(cur_state, succ_mask3);
+ const u8 a3 = movd512(cur_state);
+
+ const m512 succ_mask4 = masks[c4];
+ cur_state = vpermb512(cur_state, succ_mask4);
+ const u8 a4 = movd512(cur_state);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
+ a1 & SHENG64_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK,
+ a2 & SHENG64_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK,
+ a3 & SHENG64_STATE_FLAG_MASK);
+
+ DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
+ DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK,
+ a4 & SHENG64_STATE_FLAG_MASK);
+
+ if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) {
+ if (ACCEPT_FUNC64(a1)) {
+ u64a match_offset = base_offset + b1 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a1 & SHENG64_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b1 - start));
+ *scan_end = b1;
+ *state = a1;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports64(s, cb, ctxt, a1, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC64(a2)) {
+ u64a match_offset = base_offset + b2 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a2 & SHENG64_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b2 - start));
+ *scan_end = b2;
+ *state = a2;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports64(s, cb, ctxt, a2, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC64(a3)) {
+ u64a match_offset = base_offset + b3 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a3 & SHENG64_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b3 - start));
+ *scan_end = b3;
+ *state = a3;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports64(s, cb, ctxt, a3, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (ACCEPT_FUNC64(a4)) {
+ u64a match_offset = base_offset + b4 - buf;
+ DEBUG_PRINTF("Accept state %u reached\n",
+ a4 & SHENG64_STATE_MASK);
+ DEBUG_PRINTF("Match @ %llu\n", match_offset);
+ if (STOP_AT_MATCH) {
+ DEBUG_PRINTF("Stopping at match @ %lli\n",
+ (s64a)(b4 - start));
+ *scan_end = b4;
+ *state = a4;
+ return MO_MATCHES_PENDING;
+ }
+ if (single) {
+ if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
+ MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else {
+ if (fireReports64(s, cb, ctxt, a4, match_offset,
+ cached_accept_state, cached_accept_id,
+ 0) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (INNER_DEAD_FUNC64(a4)) {
+ DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
+ *scan_end = end;
+ *state = a4;
+ return MO_CONTINUE_MATCHING;
+ }
+ }
+ if (OUTER_DEAD_FUNC64(a4)) {
+ DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
+ *scan_end = end;
+ *state = a4;
+ return MO_CONTINUE_MATCHING;
+ }
+ cur_buf += 4;
+ }
+ *state = movd512(cur_state);
+ *scan_end = cur_buf;
+ return MO_CONTINUE_MATCHING;
+}
+#endif // !NO_SHENG64_IMPL
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/sheng_internal.h b/contrib/libs/hyperscan/src/nfa/sheng_internal.h
index e133d32f5b..98536886c5 100644
--- a/contrib/libs/hyperscan/src/nfa/sheng_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/sheng_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -38,17 +38,17 @@
#define SHENG_STATE_MASK 0xF
#define SHENG_STATE_FLAG_MASK 0x70
-#define SHENG32_STATE_ACCEPT 0x20
-#define SHENG32_STATE_DEAD 0x40
-#define SHENG32_STATE_ACCEL 0x80
-#define SHENG32_STATE_MASK 0x1F
-#define SHENG32_STATE_FLAG_MASK 0xE0
-
-#define SHENG64_STATE_ACCEPT 0x40
-#define SHENG64_STATE_DEAD 0x80
-#define SHENG64_STATE_MASK 0x3F
-#define SHENG64_STATE_FLAG_MASK 0xC0
-
+#define SHENG32_STATE_ACCEPT 0x20
+#define SHENG32_STATE_DEAD 0x40
+#define SHENG32_STATE_ACCEL 0x80
+#define SHENG32_STATE_MASK 0x1F
+#define SHENG32_STATE_FLAG_MASK 0xE0
+
+#define SHENG64_STATE_ACCEPT 0x40
+#define SHENG64_STATE_DEAD 0x80
+#define SHENG64_STATE_MASK 0x3F
+#define SHENG64_STATE_FLAG_MASK 0xC0
+
#define SHENG_FLAG_SINGLE_REPORT 0x1
#define SHENG_FLAG_CAN_DIE 0x2
#define SHENG_FLAG_HAS_ACCEL 0x4
@@ -78,30 +78,30 @@ struct sheng {
ReportID report;
};
-struct sheng32 {
- m512 succ_masks[256];
- u32 length;
- u32 aux_offset;
- u32 report_offset;
- u32 accel_offset;
- u8 n_states;
- u8 anchored;
- u8 floating;
- u8 flags;
- ReportID report;
-};
-
-struct sheng64 {
- m512 succ_masks[256];
- u32 length;
- u32 aux_offset;
- u32 report_offset;
- u32 accel_offset;
- u8 n_states;
- u8 anchored;
- u8 floating;
- u8 flags;
- ReportID report;
-};
-
+struct sheng32 {
+ m512 succ_masks[256];
+ u32 length;
+ u32 aux_offset;
+ u32 report_offset;
+ u32 accel_offset;
+ u8 n_states;
+ u8 anchored;
+ u8 floating;
+ u8 flags;
+ ReportID report;
+};
+
+struct sheng64 {
+ m512 succ_masks[256];
+ u32 length;
+ u32 aux_offset;
+ u32 report_offset;
+ u32 accel_offset;
+ u8 n_states;
+ u8 anchored;
+ u8 floating;
+ u8 flags;
+ ReportID report;
+};
+
#endif /* SHENG_INTERNAL_H_ */
diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp
index f4ab79ce70..aa3faeb09d 100644
--- a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -301,28 +301,28 @@ void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
}
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
}
-
-static really_inline
-void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) {
- stringstream o;
-
- for (unsigned i = 0; i < sz; i++) {
- o.width(2);
- o << (buf[i] & SHENG32_STATE_MASK) << " ";
- }
- DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
-}
-
-static really_inline
-void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) {
- stringstream o;
-
- for (unsigned i = 0; i < sz; i++) {
- o.width(2);
- o << (buf[i] & SHENG64_STATE_MASK) << " ";
- }
- DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
-}
+
+static really_inline
+void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) {
+ stringstream o;
+
+ for (unsigned i = 0; i < sz; i++) {
+ o.width(2);
+ o << (buf[i] & SHENG32_STATE_MASK) << " ";
+ }
+ DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
+}
+
+static really_inline
+void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) {
+ stringstream o;
+
+ for (unsigned i = 0; i < sz; i++) {
+ o.width(2);
+ o << (buf[i] & SHENG64_STATE_MASK) << " ";
+ }
+ DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
+}
#endif
static
@@ -333,16 +333,16 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
}
}
-template <typename T>
+template <typename T>
static
-u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info,
- UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
- return 0;
-}
-
-template <>
-u8 getShengState<sheng>(dstate &state, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo) {
+u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info,
+ UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
+ return 0;
+}
+
+template <>
+u8 getShengState<sheng>(dstate &state, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id;
if (!state.reports.empty()) {
s |= SHENG_STATE_ACCEPT;
@@ -356,41 +356,41 @@ u8 getShengState<sheng>(dstate &state, dfa_info &info,
return s;
}
-template <>
-u8 getShengState<sheng32>(dstate &state, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo) {
- u8 s = state.impl_id;
- if (!state.reports.empty()) {
- s |= SHENG32_STATE_ACCEPT;
- }
- if (info.isDead(state)) {
- s |= SHENG32_STATE_DEAD;
- }
- if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
- s |= SHENG32_STATE_ACCEL;
- }
- return s;
-}
-
-template <>
-u8 getShengState<sheng64>(dstate &state, dfa_info &info,
- UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
- u8 s = state.impl_id;
- if (!state.reports.empty()) {
- s |= SHENG64_STATE_ACCEPT;
- }
- if (info.isDead(state)) {
- s |= SHENG64_STATE_DEAD;
- }
- return s;
-}
-
-template <typename T>
+template <>
+u8 getShengState<sheng32>(dstate &state, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo) {
+ u8 s = state.impl_id;
+ if (!state.reports.empty()) {
+ s |= SHENG32_STATE_ACCEPT;
+ }
+ if (info.isDead(state)) {
+ s |= SHENG32_STATE_DEAD;
+ }
+ if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
+ s |= SHENG32_STATE_ACCEL;
+ }
+ return s;
+}
+
+template <>
+u8 getShengState<sheng64>(dstate &state, dfa_info &info,
+ UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
+ u8 s = state.impl_id;
+ if (!state.reports.empty()) {
+ s |= SHENG64_STATE_ACCEPT;
+ }
+ if (info.isDead(state)) {
+ s |= SHENG64_STATE_DEAD;
+ }
+ return s;
+}
+
+template <typename T>
static
void fillAccelAux(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
DEBUG_PRINTF("Filling accel aux structures\n");
- T *s = (T *)getMutableImplNfa(n);
+ T *s = (T *)getMutableImplNfa(n);
u32 offset = s->accel_offset;
for (dstate_id_t i = 0; i < info.size(); i++) {
@@ -408,21 +408,21 @@ void fillAccelAux(struct NFA *n, dfa_info &info,
}
}
-template <typename T>
+template <typename T>
static
-void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
- UNUSED map<dstate_id_t, AccelScheme> &accelInfo,
- UNUSED u32 aux_offset, UNUSED u32 report_offset,
- UNUSED u32 accel_offset, UNUSED u32 total_size,
- UNUSED u32 dfa_size) {
-}
-
-template <>
-void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo,
- u32 aux_offset, u32 report_offset,
- u32 accel_offset, u32 total_size,
- u32 dfa_size) {
+void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
+ UNUSED map<dstate_id_t, AccelScheme> &accelInfo,
+ UNUSED u32 aux_offset, UNUSED u32 report_offset,
+ UNUSED u32 accel_offset, UNUSED u32 total_size,
+ UNUSED u32 dfa_size) {
+}
+
+template <>
+void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo,
+ u32 aux_offset, u32 report_offset,
+ u32 accel_offset, u32 total_size,
+ u32 dfa_size) {
n->length = total_size;
n->scratchStateSize = 1;
n->streamStateSize = 1;
@@ -438,65 +438,65 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
s->length = dfa_size;
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
- s->anchored = getShengState<sheng>(info.anchored, info, accelInfo);
- s->floating = getShengState<sheng>(info.floating, info, accelInfo);
-}
-
-template <>
-void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo,
- u32 aux_offset, u32 report_offset,
- u32 accel_offset, u32 total_size,
- u32 dfa_size) {
- n->length = total_size;
- n->scratchStateSize = 1;
- n->streamStateSize = 1;
- n->nPositions = info.size();
- n->type = SHENG_NFA_32;
- n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
-
- sheng32 *s = (sheng32 *)getMutableImplNfa(n);
- s->aux_offset = aux_offset;
- s->report_offset = report_offset;
- s->accel_offset = accel_offset;
- s->n_states = info.size();
- s->length = dfa_size;
- s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
-
- s->anchored = getShengState<sheng32>(info.anchored, info, accelInfo);
- s->floating = getShengState<sheng32>(info.floating, info, accelInfo);
-}
-
-template <>
-void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo,
- u32 aux_offset, u32 report_offset,
- u32 accel_offset, u32 total_size,
- u32 dfa_size) {
- n->length = total_size;
- n->scratchStateSize = 1;
- n->streamStateSize = 1;
- n->nPositions = info.size();
- n->type = SHENG_NFA_64;
- n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
-
- sheng64 *s = (sheng64 *)getMutableImplNfa(n);
- s->aux_offset = aux_offset;
- s->report_offset = report_offset;
- s->accel_offset = accel_offset;
- s->n_states = info.size();
- s->length = dfa_size;
- s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
-
- s->anchored = getShengState<sheng64>(info.anchored, info, accelInfo);
- s->floating = getShengState<sheng64>(info.floating, info, accelInfo);
-}
-
-template <typename T>
+ s->anchored = getShengState<sheng>(info.anchored, info, accelInfo);
+ s->floating = getShengState<sheng>(info.floating, info, accelInfo);
+}
+
+template <>
+void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo,
+ u32 aux_offset, u32 report_offset,
+ u32 accel_offset, u32 total_size,
+ u32 dfa_size) {
+ n->length = total_size;
+ n->scratchStateSize = 1;
+ n->streamStateSize = 1;
+ n->nPositions = info.size();
+ n->type = SHENG_NFA_32;
+ n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
+
+ sheng32 *s = (sheng32 *)getMutableImplNfa(n);
+ s->aux_offset = aux_offset;
+ s->report_offset = report_offset;
+ s->accel_offset = accel_offset;
+ s->n_states = info.size();
+ s->length = dfa_size;
+ s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
+
+ s->anchored = getShengState<sheng32>(info.anchored, info, accelInfo);
+ s->floating = getShengState<sheng32>(info.floating, info, accelInfo);
+}
+
+template <>
+void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo,
+ u32 aux_offset, u32 report_offset,
+ u32 accel_offset, u32 total_size,
+ u32 dfa_size) {
+ n->length = total_size;
+ n->scratchStateSize = 1;
+ n->streamStateSize = 1;
+ n->nPositions = info.size();
+ n->type = SHENG_NFA_64;
+ n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
+
+ sheng64 *s = (sheng64 *)getMutableImplNfa(n);
+ s->aux_offset = aux_offset;
+ s->report_offset = report_offset;
+ s->accel_offset = accel_offset;
+ s->n_states = info.size();
+ s->length = dfa_size;
+ s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
+
+ s->anchored = getShengState<sheng64>(info.anchored, info, accelInfo);
+ s->floating = getShengState<sheng64>(info.floating, info, accelInfo);
+}
+
+template <typename T>
static
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
map<dstate_id_t, AccelScheme> &accelInfo) {
- T *s = (T *)getMutableImplNfa(n);
+ T *s = (T *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
DEBUG_PRINTF("Filling tops for state %u\n", id);
@@ -513,14 +513,14 @@ void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
- aux->top = getShengState<T>(top_state, info, accelInfo);
+ aux->top = getShengState<T>(top_state, info, accelInfo);
}
-template <typename T>
+template <typename T>
static
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
vector<u32> &reports_eod, vector<u32> &report_offsets) {
- T *s = (T *)getMutableImplNfa(n);
+ T *s = (T *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
auto raw_id = info.raw_id(id);
@@ -540,97 +540,97 @@ void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
}
-template <typename T>
+template <typename T>
static
void fillSingleReport(NFA *n, ReportID r_id) {
- T *s = (T *)getMutableImplNfa(n);
+ T *s = (T *)getMutableImplNfa(n);
DEBUG_PRINTF("Single report ID: %u\n", r_id);
s->report = r_id;
s->flags |= SHENG_FLAG_SINGLE_REPORT;
}
-template <typename T>
+template <typename T>
static
-bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info,
- UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
- return true;
-}
-
-template <>
-bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo) {
+bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info,
+ UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
+ return true;
+}
+
+template <>
+bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) {
u8 buf[16] = {0};
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr);
- buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
+ buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
}
#ifdef DEBUG
dumpShuffleMask(chr, buf, sizeof(buf));
#endif
memcpy(&s->shuffle_masks[chr], buf, sizeof(m128));
}
- return true;
-}
-
-template <>
-bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo) {
- for (u16 chr = 0; chr < 256; chr++) {
- u8 buf[64] = {0};
-
- assert(info.size() <= 32);
- for (dstate_id_t idx = 0; idx < info.size(); idx++) {
- auto &succ_state = info.next(idx, chr);
-
- buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
- buf[32 + idx] = buf[idx];
- }
-#ifdef DEBUG
- dumpShuffleMask32(chr, buf, sizeof(buf));
-#endif
- memcpy(&s->succ_masks[chr], buf, sizeof(m512));
- }
- return true;
-}
-
-template <>
-bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
- map<dstate_id_t, AccelScheme> &accelInfo) {
- for (u16 chr = 0; chr < 256; chr++) {
- u8 buf[64] = {0};
-
- assert(info.size() <= 64);
- for (dstate_id_t idx = 0; idx < info.size(); idx++) {
- auto &succ_state = info.next(idx, chr);
-
- if (accelInfo.find(info.raw_id(succ_state.impl_id))
- != accelInfo.end()) {
- return false;
- }
- buf[idx] = getShengState<sheng64>(succ_state, info, accelInfo);
- }
-#ifdef DEBUG
- dumpShuffleMask64(chr, buf, sizeof(buf));
-#endif
- memcpy(&s->succ_masks[chr], buf, sizeof(m512));
- }
- return true;
-}
-
-bool has_accel_sheng(const NFA *) {
- return true; /* consider the sheng region as accelerated */
-}
-
-template <typename T>
-static
-bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
- set<dstate_id_t> *accel_states,
- sheng_build_strat &strat,
- dfa_info &info) {
+ return true;
+}
+
+template <>
+bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo) {
+ for (u16 chr = 0; chr < 256; chr++) {
+ u8 buf[64] = {0};
+
+ assert(info.size() <= 32);
+ for (dstate_id_t idx = 0; idx < info.size(); idx++) {
+ auto &succ_state = info.next(idx, chr);
+
+ buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
+ buf[32 + idx] = buf[idx];
+ }
+#ifdef DEBUG
+ dumpShuffleMask32(chr, buf, sizeof(buf));
+#endif
+ memcpy(&s->succ_masks[chr], buf, sizeof(m512));
+ }
+ return true;
+}
+
+template <>
+bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accelInfo) {
+ for (u16 chr = 0; chr < 256; chr++) {
+ u8 buf[64] = {0};
+
+ assert(info.size() <= 64);
+ for (dstate_id_t idx = 0; idx < info.size(); idx++) {
+ auto &succ_state = info.next(idx, chr);
+
+ if (accelInfo.find(info.raw_id(succ_state.impl_id))
+ != accelInfo.end()) {
+ return false;
+ }
+ buf[idx] = getShengState<sheng64>(succ_state, info, accelInfo);
+ }
+#ifdef DEBUG
+ dumpShuffleMask64(chr, buf, sizeof(buf));
+#endif
+ memcpy(&s->succ_masks[chr], buf, sizeof(m512));
+ }
+ return true;
+}
+
+bool has_accel_sheng(const NFA *) {
+ return true; /* consider the sheng region as accelerated */
+}
+
+template <typename T>
+static
+bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
+ set<dstate_id_t> *accel_states,
+ sheng_build_strat &strat,
+ dfa_info &info) {
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
* mode with our semantics */
raw.stripExtraEodReports();
@@ -645,7 +645,7 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
info.anchored.impl_id, info.floating.impl_id);
- u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T));
+ u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T));
vector<u32> reports, eod_reports, report_offsets;
u8 isSingle = 0;
ReportID single_report = 0;
@@ -667,129 +667,129 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
- populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size,
- reports_offset, accel_offset, total_size,
- total_size - sizeof(NFA));
+ populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size,
+ reports_offset, accel_offset, total_size,
+ total_size - sizeof(NFA));
DEBUG_PRINTF("Setting up aux and report structures\n");
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
- fillTops<T>(nfa.get(), info, idx, accelInfo);
- fillAux<T>(nfa.get(), info, idx, reports, eod_reports,
- report_offsets);
+ fillTops<T>(nfa.get(), info, idx, accelInfo);
+ fillAux<T>(nfa.get(), info, idx, reports, eod_reports,
+ report_offsets);
}
if (isSingle) {
- fillSingleReport<T>(nfa.get(), single_report);
+ fillSingleReport<T>(nfa.get(), single_report);
}
- fillAccelAux<T>(nfa.get(), info, accelInfo);
+ fillAccelAux<T>(nfa.get(), info, accelInfo);
if (accel_states) {
fillAccelOut(accelInfo, accel_states);
}
- if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
- return nullptr;
- }
+ if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
+ return nullptr;
+ }
+
+ return nfa;
+}
+
+bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm, bool only_accel_init,
+ set<dstate_id_t> *accel_states) {
+ if (!cc.grey.allowSheng) {
+ DEBUG_PRINTF("Sheng is not allowed!\n");
+ return nullptr;
+ }
+
+ sheng_build_strat strat(raw, rm, only_accel_init);
+ dfa_info info(strat);
+
+ DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
+
+ DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
+ raw.start_anchored, raw.start_floating);
+
+ DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
+ info.can_die ? "can" : "cannot", info.size());
+ if (info.size() > 16) {
+ DEBUG_PRINTF("Too many states\n");
+ return nullptr;
+ }
+
+ return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
+}
+
+bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm, bool only_accel_init,
+ set<dstate_id_t> *accel_states) {
+ if (!cc.grey.allowSheng) {
+ DEBUG_PRINTF("Sheng is not allowed!\n");
+ return nullptr;
+ }
+
+ if (!cc.target_info.has_avx512vbmi()) {
+ DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
+ return nullptr;
+ }
+ sheng_build_strat strat(raw, rm, only_accel_init);
+ dfa_info info(strat);
+
+ DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
+
+ DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
+ raw.start_anchored, raw.start_floating);
+
+ DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
+ info.can_die ? "can" : "cannot", info.size());
+ assert(info.size() > 16);
+ if (info.size() > 32) {
+ DEBUG_PRINTF("Too many states\n");
+ return nullptr;
+ }
+
+ return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
+}
+
+bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm, bool only_accel_init,
+ set<dstate_id_t> *accel_states) {
+ if (!cc.grey.allowSheng) {
+ DEBUG_PRINTF("Sheng is not allowed!\n");
+ return nullptr;
+ }
+
+ if (!cc.target_info.has_avx512vbmi()) {
+ DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
+ return nullptr;
+ }
+
+ sheng_build_strat strat(raw, rm, only_accel_init);
+ dfa_info info(strat);
+
+ DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
+
+ DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
+ raw.start_anchored, raw.start_floating);
+
+ DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
+ info.can_die ? "can" : "cannot", info.size());
+ assert(info.size() > 32);
+ if (info.size() > 64) {
+ DEBUG_PRINTF("Too many states\n");
+ return nullptr;
+ }
+ vector<dstate> old_states;
+ old_states = info.states;
+ auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
+ if (!nfa) {
+ info.states = old_states;
+ }
return nfa;
}
-bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm, bool only_accel_init,
- set<dstate_id_t> *accel_states) {
- if (!cc.grey.allowSheng) {
- DEBUG_PRINTF("Sheng is not allowed!\n");
- return nullptr;
- }
-
- sheng_build_strat strat(raw, rm, only_accel_init);
- dfa_info info(strat);
-
- DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
-
- DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
- raw.start_anchored, raw.start_floating);
-
- DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
- info.can_die ? "can" : "cannot", info.size());
- if (info.size() > 16) {
- DEBUG_PRINTF("Too many states\n");
- return nullptr;
- }
-
- return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
-}
-
-bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm, bool only_accel_init,
- set<dstate_id_t> *accel_states) {
- if (!cc.grey.allowSheng) {
- DEBUG_PRINTF("Sheng is not allowed!\n");
- return nullptr;
- }
-
- if (!cc.target_info.has_avx512vbmi()) {
- DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
- return nullptr;
- }
-
- sheng_build_strat strat(raw, rm, only_accel_init);
- dfa_info info(strat);
-
- DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
-
- DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
- raw.start_anchored, raw.start_floating);
-
- DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
- info.can_die ? "can" : "cannot", info.size());
- assert(info.size() > 16);
- if (info.size() > 32) {
- DEBUG_PRINTF("Too many states\n");
- return nullptr;
- }
-
- return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
-}
-
-bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm, bool only_accel_init,
- set<dstate_id_t> *accel_states) {
- if (!cc.grey.allowSheng) {
- DEBUG_PRINTF("Sheng is not allowed!\n");
- return nullptr;
- }
-
- if (!cc.target_info.has_avx512vbmi()) {
- DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
- return nullptr;
- }
-
- sheng_build_strat strat(raw, rm, only_accel_init);
- dfa_info info(strat);
-
- DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
-
- DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
- raw.start_anchored, raw.start_floating);
-
- DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
- info.can_die ? "can" : "cannot", info.size());
- assert(info.size() > 32);
- if (info.size() > 64) {
- DEBUG_PRINTF("Too many states\n");
- return nullptr;
- }
- vector<dstate> old_states;
- old_states = info.states;
- auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
- if (!nfa) {
- info.states = old_states;
- }
- return nfa;
-}
-
} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.h b/contrib/libs/hyperscan/src/nfa/shengcompile.h
index 175bf6a86f..256f4a4e50 100644
--- a/contrib/libs/hyperscan/src/nfa/shengcompile.h
+++ b/contrib/libs/hyperscan/src/nfa/shengcompile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -61,7 +61,7 @@ public:
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
- DfaType getType() const override { return Sheng; }
+ DfaType getType() const override { return Sheng; }
private:
raw_dfa &rdfa;
@@ -71,14 +71,14 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm, bool only_accel_init,
std::set<dstate_id_t> *accel_states = nullptr);
-bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm, bool only_accel_init,
- std::set<dstate_id_t> *accel_states = nullptr);
-
-bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
- const ReportManager &rm, bool only_accel_init,
- std::set<dstate_id_t> *accel_states = nullptr);
-
+bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm, bool only_accel_init,
+ std::set<dstate_id_t> *accel_states = nullptr);
+
+bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm, bool only_accel_init,
+ std::set<dstate_id_t> *accel_states = nullptr);
+
struct sheng_escape_info {
CharReach outs;
CharReach outs2_single;
diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli.h b/contrib/libs/hyperscan/src/nfa/vermicelli.h
index 7b35deb8d6..ed797d83f9 100644
--- a/contrib/libs/hyperscan/src/nfa/vermicelli.h
+++ b/contrib/libs/hyperscan/src/nfa/vermicelli.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -46,20 +46,20 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
- VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
-
+ VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
+
// Handle small scans.
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? vermMiniNocase(chars, buf, buf_end, 0)
- : vermMini(chars, buf, buf_end, 0);
- if (ptr) {
- return ptr;
- }
- return buf_end;
- }
-#else
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? vermMiniNocase(chars, buf, buf_end, 0)
+ : vermMini(chars, buf, buf_end, 0);
+ if (ptr) {
+ return ptr;
+ }
+ return buf_end;
+ }
+#else
if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
@@ -72,7 +72,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
}
return buf;
}
-#endif
+#endif
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) {
@@ -112,20 +112,20 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
- VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
-
+ VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
+
// Handle small scans.
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? vermMiniNocase(chars, buf, buf_end, 1)
- : vermMini(chars, buf, buf_end, 1);
- if (ptr) {
- return ptr;
- }
- return buf_end;
- }
-#else
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? vermMiniNocase(chars, buf, buf_end, 1)
+ : vermMini(chars, buf, buf_end, 1);
+ if (ptr) {
+ return ptr;
+ }
+ return buf_end;
+ }
+#else
if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
@@ -138,7 +138,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
}
return buf;
}
-#endif
+#endif
size_t min = (size_t)buf % VERM_BOUNDARY;
if (min) {
@@ -179,28 +179,28 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? dvermMiniNocase(chars1, chars2, buf, buf_end)
- : dvermMini(chars1, chars2, buf, buf_end);
- if (ptr) {
- return ptr;
- }
-
- /* check for partial match at end */
- u8 mask = nocase ? CASE_CLEAR : 0xff;
- if ((buf_end[-1] & mask) == (u8)c1) {
- DEBUG_PRINTF("partial!!!\n");
- return buf_end - 1;
- }
-
- return buf_end;
- }
-#endif
-
- assert((buf_end - buf) >= VERM_BOUNDARY);
- uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? dvermMiniNocase(chars1, chars2, buf, buf_end)
+ : dvermMini(chars1, chars2, buf, buf_end);
+ if (ptr) {
+ return ptr;
+ }
+
+ /* check for partial match at end */
+ u8 mask = nocase ? CASE_CLEAR : 0xff;
+ if ((buf_end[-1] & mask) == (u8)c1) {
+ DEBUG_PRINTF("partial!!!\n");
+ return buf_end - 1;
+ }
+
+ return buf_end;
+ }
+#endif
+
+ assert((buf_end - buf) >= VERM_BOUNDARY);
+ uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) {
// Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf forward to the next aligned address.
@@ -257,26 +257,26 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
VERM_TYPE mask1 = VERM_SET_FN(m1);
VERM_TYPE mask2 = VERM_SET_FN(m2);
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf,
- buf_end);
- if (ptr) {
- return ptr;
- }
-
- /* check for partial match at end */
- if ((buf_end[-1] & m1) == (u8)c1) {
- DEBUG_PRINTF("partial!!!\n");
- return buf_end - 1;
- }
-
- return buf_end;
- }
-#endif
-
- assert((buf_end - buf) >= VERM_BOUNDARY);
- uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf,
+ buf_end);
+ if (ptr) {
+ return ptr;
+ }
+
+ /* check for partial match at end */
+ if ((buf_end[-1] & m1) == (u8)c1) {
+ DEBUG_PRINTF("partial!!!\n");
+ return buf_end - 1;
+ }
+
+ return buf_end;
+ }
+#endif
+
+ assert((buf_end - buf) >= VERM_BOUNDARY);
+ uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) {
// Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf forward to the next aligned address.
@@ -308,7 +308,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
/* check for partial match at end */
if ((buf_end[-1] & m1) == (u8)c1) {
- DEBUG_PRINTF("partial!!!\n");
+ DEBUG_PRINTF("partial!!!\n");
return buf_end - 1;
}
@@ -324,20 +324,20 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
- VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
-
+ VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
+
// Handle small scans.
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? rvermMiniNocase(chars, buf, buf_end, 0)
- : rvermMini(chars, buf, buf_end, 0);
- if (ptr) {
- return ptr;
- }
- return buf - 1;
- }
-#else
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? rvermMiniNocase(chars, buf, buf_end, 0)
+ : rvermMini(chars, buf, buf_end, 0);
+ if (ptr) {
+ return ptr;
+ }
+ return buf - 1;
+ }
+#else
if (buf_end - buf < VERM_BOUNDARY) {
for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end;
@@ -350,7 +350,7 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
}
return buf_end;
}
-#endif
+#endif
size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) {
@@ -358,14 +358,14 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
// unaligned load, then skip buf backward to the next aligned address.
// There's some small overlap here, but we don't mind scanning it twice
// if we can do it quickly, do we?
- const u8 *ptr = nocase ? rvermUnalignNocase(chars,
- buf_end - VERM_BOUNDARY,
- 0)
- : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
- 0);
-
- if (ptr) {
- return ptr;
+ const u8 *ptr = nocase ? rvermUnalignNocase(chars,
+ buf_end - VERM_BOUNDARY,
+ 0)
+ : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
+ 0);
+
+ if (ptr) {
+ return ptr;
}
buf_end -= min;
@@ -396,20 +396,20 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
- VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
-
+ VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
+
// Handle small scans.
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? rvermMiniNocase(chars, buf, buf_end, 1)
- : rvermMini(chars, buf, buf_end, 1);
- if (ptr) {
- return ptr;
- }
- return buf - 1;
- }
-#else
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? rvermMiniNocase(chars, buf, buf_end, 1)
+ : rvermMini(chars, buf, buf_end, 1);
+ if (ptr) {
+ return ptr;
+ }
+ return buf - 1;
+ }
+#else
if (buf_end - buf < VERM_BOUNDARY) {
for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end;
@@ -422,7 +422,7 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
}
return buf_end;
}
-#endif
+#endif
size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) {
@@ -430,14 +430,14 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
// unaligned load, then skip buf backward to the next aligned address.
// There's some small overlap here, but we don't mind scanning it twice
// if we can do it quickly, do we?
- const u8 *ptr = nocase ? rvermUnalignNocase(chars,
- buf_end - VERM_BOUNDARY,
- 1)
- : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
- 1);
-
- if (ptr) {
- return ptr;
+ const u8 *ptr = nocase ? rvermUnalignNocase(chars,
+ buf_end - VERM_BOUNDARY,
+ 1)
+ : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
+ 1);
+
+ if (ptr) {
+ return ptr;
}
buf_end -= min;
@@ -470,32 +470,32 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
-#ifdef HAVE_AVX512
- if (buf_end - buf <= VERM_BOUNDARY) {
- const u8 *ptr = nocase
- ? rdvermMiniNocase(chars1, chars2, buf, buf_end)
- : rdvermMini(chars1, chars2, buf, buf_end);
-
- if (ptr) {
- return ptr;
- }
-
- // check for partial match at end ???
- return buf - 1;
- }
-#endif
-
- assert((buf_end - buf) >= VERM_BOUNDARY);
- size_t min = (size_t)buf_end % VERM_BOUNDARY;
+#ifdef HAVE_AVX512
+ if (buf_end - buf <= VERM_BOUNDARY) {
+ const u8 *ptr = nocase
+ ? rdvermMiniNocase(chars1, chars2, buf, buf_end)
+ : rdvermMini(chars1, chars2, buf, buf_end);
+
+ if (ptr) {
+ return ptr;
+ }
+
+ // check for partial match at end ???
+ return buf - 1;
+ }
+#endif
+
+ assert((buf_end - buf) >= VERM_BOUNDARY);
+ size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) {
// input not aligned, so we need to run one iteration with an unaligned
// load, then skip buf forward to the next aligned address. There's
// some small overlap here, but we don't mind scanning it twice if we
// can do it quickly, do we?
- const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
- buf_end - VERM_BOUNDARY)
- : rdvermPrecondition(chars1, chars2,
- buf_end - VERM_BOUNDARY);
+ const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
+ buf_end - VERM_BOUNDARY)
+ : rdvermPrecondition(chars1, chars2,
+ buf_end - VERM_BOUNDARY);
if (ptr) {
return ptr;
diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
index 3ec28dbf77..3307486cff 100644
--- a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
+++ b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2020, Intel Corporation
+ * Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,8 +32,8 @@
* (users should include vermicelli.h)
*/
-#if !defined(HAVE_AVX512)
-
+#if !defined(HAVE_AVX512)
+
#define VERM_BOUNDARY 16
#define VERM_TYPE m128
#define VERM_SET_FN set16x8
@@ -393,497 +393,497 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
return NULL;
}
-
-#else // HAVE_AVX512
-
-#define VERM_BOUNDARY 64
-#define VERM_TYPE m512
-#define VERM_SET_FN set64x8
-
-static really_inline
-const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
-
- u64a z = eq512mask(chars, data);
-
- if (negate) {
- z = ~z & mask;
- }
- z &= mask;
- if (unlikely(z)) {
- return buf + ctz64(z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
- m512 casemask = set64x8(CASE_CLEAR);
- m512 v = and512(casemask, data);
-
- u64a z = eq512mask(chars, v);
-
- if (negate) {
- z = ~z & mask;
- }
- z &= mask;
- if (unlikely(z)) {
- return buf + ctz64(z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- assert((size_t)buf % 64 == 0);
- for (; buf + 63 < buf_end; buf += 64) {
- m512 data = load512(buf);
- u64a z = eq512mask(chars, data);
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- }
- return NULL;
-}
-
-static really_inline
-const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf,
- const u8 *buf_end, char negate) {
- assert((size_t)buf % 64 == 0);
- m512 casemask = set64x8(CASE_CLEAR);
-
- for (; buf + 63 < buf_end; buf += 64) {
- m512 data = load512(buf);
- u64a z = eq512mask(chars, and512(casemask, data));
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) {
- m512 data = loadu512(buf); // unaligned
- u64a z = eq512mask(chars, data);
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return buf + ctz64(z);
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) {
- m512 casemask = set64x8(CASE_CLEAR);
- m512 data = loadu512(buf); // unaligned
- u64a z = eq512mask(chars, and512(casemask, data));
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return buf + ctz64(z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf,
- const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
-
- u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
-
- z &= mask;
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
- const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
- m512 casemask = set64x8(CASE_CLEAR);
- m512 v = and512(casemask, data);
-
- u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
-
- z &= mask;
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2,
- const u8 *buf, const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
- m512 v1 = and512(data, mask1);
- m512 v2 = and512(data, mask2);
-
- u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
-
- z &= mask;
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- for (; buf + 64 < buf_end; buf += 64) {
- m512 data = load512(buf);
- u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
- if (buf[63] == c1 && buf[64] == c2) {
- z |= (1ULL << 63);
- }
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- }
-
- return NULL;
-}
-
-static really_inline
-const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf % 64 == 0);
- m512 casemask = set64x8(CASE_CLEAR);
-
- for (; buf + 64 < buf_end; buf += 64) {
- m512 data = load512(buf);
- m512 v = and512(casemask, data);
- u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
- if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) {
- z |= (1ULL << 63);
- }
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- }
-
- return NULL;
-}
-
-static really_inline
-const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2,
- m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1,
- u8 m2, const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf % 64 == 0);
-
- for (; buf + 64 < buf_end; buf += 64) {
- m512 data = load512(buf);
- m512 v1 = and512(data, mask1);
- m512 v2 = and512(data, mask2);
- u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
-
- if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) {
- z |= (1ULL << 63);
- }
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- }
-
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
- m512 data = loadu512(buf); // unaligned
- u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
- /* due to laziness, nonalphas and nocase having interesting behaviour */
- m512 casemask = set64x8(CASE_CLEAR);
- m512 data = loadu512(buf); // unaligned
- m512 v = and512(casemask, data);
- u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2,
- m512 mask1, m512 mask2, const u8 *buf) {
- m512 data = loadu512(buf); // unaligned
- m512 v1 = and512(data, mask1);
- m512 v2 = and512(data, mask2);
- u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- u64a pos = ctz64(z);
- return buf + pos;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *lastMatchOffset(const u8 *buf_end, u64a z) {
- assert(z);
- return buf_end - 64 + 63 - clz64(z);
-}
-
-static really_inline
-const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
-
- u64a z = eq512mask(chars, data);
-
- if (negate) {
- z = ~z & mask;
- }
- z &= mask;
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
- m512 casemask = set64x8(CASE_CLEAR);
- m512 v = and512(casemask, data);
-
- u64a z = eq512mask(chars, v);
-
- if (negate) {
- z = ~z & mask;
- }
- z &= mask;
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- assert((size_t)buf_end % 64 == 0);
- for (; buf + 63 < buf_end; buf_end -= 64) {
- m512 data = load512(buf_end - 64);
- u64a z = eq512mask(chars, data);
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf,
- const u8 *buf_end, char negate) {
- assert((size_t)buf_end % 64 == 0);
- m512 casemask = set64x8(CASE_CLEAR);
-
- for (; buf + 63 < buf_end; buf_end -= 64) {
- m512 data = load512(buf_end - 64);
- u64a z = eq512mask(chars, and512(casemask, data));
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) {
- m512 data = loadu512(buf); // unaligned
- u64a z = eq512mask(chars, data);
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) {
- m512 casemask = set64x8(CASE_CLEAR);
- m512 data = loadu512(buf); // unaligned
- u64a z = eq512mask(chars, and512(casemask, data));
- if (negate) {
- z = ~z & ~0ULL;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf,
- const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
-
- u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
-
- z &= mask;
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
- const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- __mmask64 mask = (~0ULL) >> (64 - len);
- m512 data = loadu_maskz_m512(mask, buf);
- m512 casemask = set64x8(CASE_CLEAR);
- m512 v = and512(casemask, data);
-
- u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
-
- z &= mask;
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf_end % 64 == 0);
-
- for (; buf + 64 < buf_end; buf_end -= 64) {
- m512 data = load512(buf_end - 64);
- u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
- if (buf_end[-65] == c1 && buf_end[-64] == c2) {
- z |= 1;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return buf_end;
-}
-
-static really_inline
-const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf_end % 64 == 0);
- m512 casemask = set64x8(CASE_CLEAR);
-
- for (; buf + 64 < buf_end; buf_end -= 64) {
- m512 data = load512(buf_end - 64);
- m512 v = and512(casemask, data);
- u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
- if ((buf_end[-65] & CASE_CLEAR) == c1
- && (buf_end[-64] & CASE_CLEAR) == c2) {
- z |= 1;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return buf_end;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
- m512 data = loadu512(buf);
- u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
-
- // no fixup of the boundary required - the aligned run will pick it up
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
-
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
- // due to laziness, nonalphas and nocase having interesting behaviour
- m512 casemask = set64x8(CASE_CLEAR);
- m512 data = loadu512(buf);
- m512 v = and512(casemask, data);
- u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
- // no fixup of the boundary required - the aligned run will pick it up
- if (unlikely(z)) {
- return lastMatchOffset(buf + 64, z);
- }
-
- return NULL;
-}
-
-#endif // HAVE_AVX512
+
+#else // HAVE_AVX512
+
+#define VERM_BOUNDARY 64
+#define VERM_TYPE m512
+#define VERM_SET_FN set64x8
+
+static really_inline
+const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+
+ u64a z = eq512mask(chars, data);
+
+ if (negate) {
+ z = ~z & mask;
+ }
+ z &= mask;
+ if (unlikely(z)) {
+ return buf + ctz64(z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 v = and512(casemask, data);
+
+ u64a z = eq512mask(chars, v);
+
+ if (negate) {
+ z = ~z & mask;
+ }
+ z &= mask;
+ if (unlikely(z)) {
+ return buf + ctz64(z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ assert((size_t)buf % 64 == 0);
+ for (; buf + 63 < buf_end; buf += 64) {
+ m512 data = load512(buf);
+ u64a z = eq512mask(chars, data);
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf,
+ const u8 *buf_end, char negate) {
+ assert((size_t)buf % 64 == 0);
+ m512 casemask = set64x8(CASE_CLEAR);
+
+ for (; buf + 63 < buf_end; buf += 64) {
+ m512 data = load512(buf);
+ u64a z = eq512mask(chars, and512(casemask, data));
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) {
+ m512 data = loadu512(buf); // unaligned
+ u64a z = eq512mask(chars, data);
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return buf + ctz64(z);
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) {
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 data = loadu512(buf); // unaligned
+ u64a z = eq512mask(chars, and512(casemask, data));
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return buf + ctz64(z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf,
+ const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+
+ u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
+
+ z &= mask;
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
+ const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 v = and512(casemask, data);
+
+ u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
+
+ z &= mask;
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2,
+ const u8 *buf, const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+ m512 v1 = and512(data, mask1);
+ m512 v2 = and512(data, mask2);
+
+ u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
+
+ z &= mask;
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ for (; buf + 64 < buf_end; buf += 64) {
+ m512 data = load512(buf);
+ u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
+ if (buf[63] == c1 && buf[64] == c2) {
+ z |= (1ULL << 63);
+ }
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ }
+
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf % 64 == 0);
+ m512 casemask = set64x8(CASE_CLEAR);
+
+ for (; buf + 64 < buf_end; buf += 64) {
+ m512 data = load512(buf);
+ m512 v = and512(casemask, data);
+ u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
+ if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) {
+ z |= (1ULL << 63);
+ }
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ }
+
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2,
+ m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1,
+ u8 m2, const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf % 64 == 0);
+
+ for (; buf + 64 < buf_end; buf += 64) {
+ m512 data = load512(buf);
+ m512 v1 = and512(data, mask1);
+ m512 v2 = and512(data, mask2);
+ u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
+
+ if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) {
+ z |= (1ULL << 63);
+ }
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ }
+
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
+ m512 data = loadu512(buf); // unaligned
+ u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
+ /* due to laziness, nonalphas and nocase having interesting behaviour */
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 data = loadu512(buf); // unaligned
+ m512 v = and512(casemask, data);
+ u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2,
+ m512 mask1, m512 mask2, const u8 *buf) {
+ m512 data = loadu512(buf); // unaligned
+ m512 v1 = and512(data, mask1);
+ m512 v2 = and512(data, mask2);
+ u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ u64a pos = ctz64(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *lastMatchOffset(const u8 *buf_end, u64a z) {
+ assert(z);
+ return buf_end - 64 + 63 - clz64(z);
+}
+
+static really_inline
+const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+
+ u64a z = eq512mask(chars, data);
+
+ if (negate) {
+ z = ~z & mask;
+ }
+ z &= mask;
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 v = and512(casemask, data);
+
+ u64a z = eq512mask(chars, v);
+
+ if (negate) {
+ z = ~z & mask;
+ }
+ z &= mask;
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ assert((size_t)buf_end % 64 == 0);
+ for (; buf + 63 < buf_end; buf_end -= 64) {
+ m512 data = load512(buf_end - 64);
+ u64a z = eq512mask(chars, data);
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf,
+ const u8 *buf_end, char negate) {
+ assert((size_t)buf_end % 64 == 0);
+ m512 casemask = set64x8(CASE_CLEAR);
+
+ for (; buf + 63 < buf_end; buf_end -= 64) {
+ m512 data = load512(buf_end - 64);
+ u64a z = eq512mask(chars, and512(casemask, data));
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) {
+ m512 data = loadu512(buf); // unaligned
+ u64a z = eq512mask(chars, data);
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) {
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 data = loadu512(buf); // unaligned
+ u64a z = eq512mask(chars, and512(casemask, data));
+ if (negate) {
+ z = ~z & ~0ULL;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf,
+ const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+
+ u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
+
+ z &= mask;
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
+ const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ __mmask64 mask = (~0ULL) >> (64 - len);
+ m512 data = loadu_maskz_m512(mask, buf);
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 v = and512(casemask, data);
+
+ u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
+
+ z &= mask;
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf_end % 64 == 0);
+
+ for (; buf + 64 < buf_end; buf_end -= 64) {
+ m512 data = load512(buf_end - 64);
+ u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
+ if (buf_end[-65] == c1 && buf_end[-64] == c2) {
+ z |= 1;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return buf_end;
+}
+
+static really_inline
+const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf_end % 64 == 0);
+ m512 casemask = set64x8(CASE_CLEAR);
+
+ for (; buf + 64 < buf_end; buf_end -= 64) {
+ m512 data = load512(buf_end - 64);
+ m512 v = and512(casemask, data);
+ u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
+ if ((buf_end[-65] & CASE_CLEAR) == c1
+ && (buf_end[-64] & CASE_CLEAR) == c2) {
+ z |= 1;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return buf_end;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
+ m512 data = loadu512(buf);
+ u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
+
+ // no fixup of the boundary required - the aligned run will pick it up
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
+ // due to laziness, nonalphas and nocase having interesting behaviour
+ m512 casemask = set64x8(CASE_CLEAR);
+ m512 data = loadu512(buf);
+ m512 v = and512(casemask, data);
+ u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
+ // no fixup of the boundary required - the aligned run will pick it up
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 64, z);
+ }
+
+ return NULL;
+}
+
+#endif // HAVE_AVX512